diff options
Diffstat (limited to 'common/patterns.js')
-rw-r--r-- | common/patterns.js | 75 |
1 files changed, 46 insertions, 29 deletions
diff --git a/common/patterns.js b/common/patterns.js index e198482..7d28dfe 100644 --- a/common/patterns.js +++ b/common/patterns.js @@ -41,50 +41,67 @@ * proprietary program, I am not going to enforce this in court. */ -const MAX_URL_PATH_LEN = 12; -const MAX_URL_PATH_CHARS = 255; -const MAX_DOMAIN_LEN = 7; -const MAX_DOMAIN_CHARS = 100; +const MAX = { + URL_PATH_LEN: 12, + URL_PATH_CHARS: 255, + DOMAIN_LEN: 7, + DOMAIN_CHARS: 100 +}; const proto_regex = /^(\w+):\/\/(.*)$/; const user_re = "[^/?#@]+@" -const domain_re = "[^/?#]+"; +const domain_re = "[.*a-zA-Z0-9-]+"; const path_re = "[^?#]*"; const query_re = "\\??[^#]*"; const http_regex = new RegExp(`^(${domain_re})(${path_re})(${query_re}).*`); -const file_regex = new RegExp(`^(${path_re}).*`); +const file_regex = new RegExp(`^(/${path_re}).*`); const ftp_regex = new RegExp(`^(${user_re})?(${domain_re})(${path_re}).*`); -function deconstruct_url(url) +function match_or_throw(regex, string, error_msg) { - const proto_match = proto_regex.exec(url); - if (proto_match === null) - return undefined; + const match = regex.exec(string); + if (match === null) + throw error_msg; + return match; +} + +function deconstruct_url(url, use_limits=true) +{ + const max = MAX; + if (!use_limits) { + for (key in MAX) + max[key] = Infinity; + } + + const matcher = (re, str) => match_or_throw(re, str, `bad url '${url}'`) + + const proto_match = matcher(proto_regex, url); const deco = {proto: proto_match[1]}; if (deco.proto === "file") { - deco.path = file_regex.exec(proto_match[2])[1]; + deco.path = matcher(file_regex, proto_match[2])[1]; } else if (deco.proto === "ftp") { - [deco.domain, deco.path] = ftp_regex.exec(proto_match[2]).slice(2, 4); + [deco.domain, deco.path] = + matcher(ftp_regex, proto_match[2]).slice(2, 4); + } else if (deco.proto === "http" || deco.proto === "https") { + [deco.domain, deco.path, deco.query] = + matcher(http_regex, proto_match[2]).slice(1, 4); + deco.domain = deco.domain.toLowerCase(); } else { - const http_match = http_regex.exec(proto_match[2]); - if (!http_match) - return undefined; - [deco.domain, deco.path, deco.query] = http_match.slice(1, 4); + throw `unsupported protocol in url '${url}'`; } - const leading_dash = deco.path[0] === "/"; - deco.trailing_dash = deco.path[deco.path.length - 1] === "/"; + deco.trailing_slash = deco.path[deco.path.length - 1] === "/"; if (deco.domain) { - if (deco.domain.length > MAX_DOMAIN_CHARS) { + if (deco.domain.length > max.DOMAIN_CHARS) { const idx = deco.domain.indexOf(".", deco.domain.length - - MAX_DOMAIN_CHARS); + max.DOMAIN_CHARS); if (idx === -1) deco.domain = []; else @@ -93,7 +110,7 @@ function deconstruct_url(url) deco.domain_truncated = true; } - if (deco.path.length > MAX_URL_PATH_CHARS) { + if (deco.path.length > max.URL_PATH_CHARS) { deco.path = deco.path.substring(0, deco.path.lastIndexOf("/")); deco.path_truncated = true; } @@ -101,16 +118,14 @@ function deconstruct_url(url) if (typeof deco.domain === "string") { deco.domain = deco.domain.split("."); - if (deco.domain.splice(0, deco.domain.length - MAX_DOMAIN_LEN).length + if (deco.domain.splice(0, deco.domain.length - max.DOMAIN_LEN).length > 0) deco.domain_truncated = true; } deco.path = deco.path.split("/").filter(s => s !== ""); - if (deco.domain && deco.path.splice(MAX_URL_PATH_LEN).length > 0) + if (deco.domain && deco.path.splice(max.URL_PATH_LEN).length > 0) deco.path_truncated = true; - if (leading_dash || deco.path.length === 0) - deco.path.unshift(""); return deco; } @@ -132,13 +147,14 @@ function* each_domain_pattern(deco) function* each_path_pattern(deco) { - for (let slice = deco.path.length; slice > 0; slice--) { - const path_part = deco.path.slice(0, slice).join("/"); + for (let slice = deco.path.length; slice >= 0; slice--) { + const path_part = ["", ...deco.path.slice(0, slice)].join("/"); const path_wildcards = []; if (slice === deco.path.length && !deco.path_truncated) { - if (deco.trailing_dash) + if (deco.trailing_slash) yield path_part + "/"; - yield path_part; + if (slice > 0 || deco.proto !== "file") + yield path_part; } if (slice === deco.path.length - 1 && !deco.path_truncated && deco.path[slice] !== "*") @@ -171,5 +187,6 @@ function* each_url_pattern(url) /* * EXPORTS_START * EXPORT each_url_pattern + * EXPORT deconstruct_url * EXPORTS_END */ |