aboutsummaryrefslogtreecommitdiff
path: root/common/patterns.js
diff options
context:
space:
mode:
Diffstat (limited to 'common/patterns.js')
-rw-r--r--common/patterns.js75
1 files changed, 46 insertions, 29 deletions
diff --git a/common/patterns.js b/common/patterns.js
index e198482..7d28dfe 100644
--- a/common/patterns.js
+++ b/common/patterns.js
@@ -41,50 +41,67 @@
* proprietary program, I am not going to enforce this in court.
*/
-const MAX_URL_PATH_LEN = 12;
-const MAX_URL_PATH_CHARS = 255;
-const MAX_DOMAIN_LEN = 7;
-const MAX_DOMAIN_CHARS = 100;
+const MAX = {
+ URL_PATH_LEN: 12,
+ URL_PATH_CHARS: 255,
+ DOMAIN_LEN: 7,
+ DOMAIN_CHARS: 100
+};
const proto_regex = /^(\w+):\/\/(.*)$/;
const user_re = "[^/?#@]+@"
-const domain_re = "[^/?#]+";
+const domain_re = "[.*a-zA-Z0-9-]+";
const path_re = "[^?#]*";
const query_re = "\\??[^#]*";
const http_regex = new RegExp(`^(${domain_re})(${path_re})(${query_re}).*`);
-const file_regex = new RegExp(`^(${path_re}).*`);
+const file_regex = new RegExp(`^(/${path_re}).*`);
const ftp_regex = new RegExp(`^(${user_re})?(${domain_re})(${path_re}).*`);
-function deconstruct_url(url)
+function match_or_throw(regex, string, error_msg)
{
- const proto_match = proto_regex.exec(url);
- if (proto_match === null)
- return undefined;
+ const match = regex.exec(string);
+ if (match === null)
+ throw error_msg;
+ return match;
+}
+
+function deconstruct_url(url, use_limits=true)
+{
+ const max = MAX;
+ if (!use_limits) {
+ for (key in MAX)
+ max[key] = Infinity;
+ }
+
+ const matcher = (re, str) => match_or_throw(re, str, `bad url '${url}'`)
+
+ const proto_match = matcher(proto_regex, url);
const deco = {proto: proto_match[1]};
if (deco.proto === "file") {
- deco.path = file_regex.exec(proto_match[2])[1];
+ deco.path = matcher(file_regex, proto_match[2])[1];
} else if (deco.proto === "ftp") {
- [deco.domain, deco.path] = ftp_regex.exec(proto_match[2]).slice(2, 4);
+ [deco.domain, deco.path] =
+ matcher(ftp_regex, proto_match[2]).slice(2, 4);
+ } else if (deco.proto === "http" || deco.proto === "https") {
+ [deco.domain, deco.path, deco.query] =
+ matcher(http_regex, proto_match[2]).slice(1, 4);
+ deco.domain = deco.domain.toLowerCase();
} else {
- const http_match = http_regex.exec(proto_match[2]);
- if (!http_match)
- return undefined;
- [deco.domain, deco.path, deco.query] = http_match.slice(1, 4);
+ throw `unsupported protocol in url '${url}'`;
}
- const leading_dash = deco.path[0] === "/";
- deco.trailing_dash = deco.path[deco.path.length - 1] === "/";
+ deco.trailing_slash = deco.path[deco.path.length - 1] === "/";
if (deco.domain) {
- if (deco.domain.length > MAX_DOMAIN_CHARS) {
+ if (deco.domain.length > max.DOMAIN_CHARS) {
const idx = deco.domain.indexOf(".", deco.domain.length -
- MAX_DOMAIN_CHARS);
+ max.DOMAIN_CHARS);
if (idx === -1)
deco.domain = [];
else
@@ -93,7 +110,7 @@ function deconstruct_url(url)
deco.domain_truncated = true;
}
- if (deco.path.length > MAX_URL_PATH_CHARS) {
+ if (deco.path.length > max.URL_PATH_CHARS) {
deco.path = deco.path.substring(0, deco.path.lastIndexOf("/"));
deco.path_truncated = true;
}
@@ -101,16 +118,14 @@ function deconstruct_url(url)
if (typeof deco.domain === "string") {
deco.domain = deco.domain.split(".");
- if (deco.domain.splice(0, deco.domain.length - MAX_DOMAIN_LEN).length
+ if (deco.domain.splice(0, deco.domain.length - max.DOMAIN_LEN).length
> 0)
deco.domain_truncated = true;
}
deco.path = deco.path.split("/").filter(s => s !== "");
- if (deco.domain && deco.path.splice(MAX_URL_PATH_LEN).length > 0)
+ if (deco.domain && deco.path.splice(max.URL_PATH_LEN).length > 0)
deco.path_truncated = true;
- if (leading_dash || deco.path.length === 0)
- deco.path.unshift("");
return deco;
}
@@ -132,13 +147,14 @@ function* each_domain_pattern(deco)
function* each_path_pattern(deco)
{
- for (let slice = deco.path.length; slice > 0; slice--) {
- const path_part = deco.path.slice(0, slice).join("/");
+ for (let slice = deco.path.length; slice >= 0; slice--) {
+ const path_part = ["", ...deco.path.slice(0, slice)].join("/");
const path_wildcards = [];
if (slice === deco.path.length && !deco.path_truncated) {
- if (deco.trailing_dash)
+ if (deco.trailing_slash)
yield path_part + "/";
- yield path_part;
+ if (slice > 0 || deco.proto !== "file")
+ yield path_part;
}
if (slice === deco.path.length - 1 && !deco.path_truncated &&
deco.path[slice] !== "*")
@@ -171,5 +187,6 @@ function* each_url_pattern(url)
/*
* EXPORTS_START
* EXPORT each_url_pattern
+ * EXPORT deconstruct_url
* EXPORTS_END
*/