aboutsummaryrefslogtreecommitdiff
path: root/common
diff options
context:
space:
mode:
Diffstat (limited to 'common')
-rw-r--r--common/patterns.js141
1 files changed, 47 insertions, 94 deletions
diff --git a/common/patterns.js b/common/patterns.js
index ebb55ab..ae29fcd 100644
--- a/common/patterns.js
+++ b/common/patterns.js
@@ -5,6 +5,11 @@
* Redistribution terms are gathered in the `copyright' file.
*/
+const MAX_URL_PATH_LEN = 12;
+const MAX_URL_PATH_CHARS = 255;
+const MAX_DOMAIN_LEN = 7;
+const MAX_DOMAIN_CHARS = 100;
+
const proto_regex = /^(\w+):\/\/(.*)$/;
const user_re = "[^/?#@]+@"
@@ -37,103 +42,51 @@ function deconstruct_url(url)
[deco.domain, deco.path, deco.query] = http_match.slice(1, 4);
}
- if (deco.domain)
- deco.domain = deco.domain.split(".");
-
const leading_dash = deco.path[0] === "/";
deco.trailing_dash = deco.path[deco.path.length - 1] === "/";
- deco.path = deco.path.split("/").filter(s => s !== "");
- if (leading_dash || deco.path.length === 0)
- deco.path.unshift("");
- return deco;
-}
+ if (deco.domain) {
+ if (deco.domain.length > MAX_DOMAIN_CHARS) {
+ const idx = deco.domain.indexOf(".", deco.domain.length -
+ MAX_DOMAIN_CHARS);
+ if (idx === -1)
+ deco.domain = [];
+ else
+ deco.domain = deco.domain.substring(idx + 1);
-/* Be sane: both arguments should be arrays of length >= 2 */
-function domain_matches(url_domain, pattern_domain)
-{
- const length_difference = url_domain.length - pattern_domain.length;
-
- for (let i = 1; i <= url_domain.length; i++) {
- const url_part = url_domain[url_domain.length - i];
- const pattern_part = pattern_domain[pattern_domain.length - i];
-
- if (pattern_domain.length === i) {
- if (pattern_part === "*")
- return length_difference === 0;
- if (pattern_part === "**")
- return length_difference > 0;
- if (pattern_part === "***")
- return true;
- return length_difference === 0 && pattern_part === url_part;
+ deco.domain_truncated = true;
}
- if (pattern_part !== url_part)
- return false;
- }
-
- return pattern_domain.length === url_domain.length + 1 &&
- pattern_domain[0] === "***";
-}
-
-function path_matches(url_path, url_trailing_dash,
- pattern_path, pattern_trailing_dash)
-{
- const dashes_ok = !(pattern_trailing_dash && !url_trailing_dash);
-
- if (pattern_path.length === 0)
- return url_path.length === 0 && dashes_ok;
-
- const length_difference = url_path.length - pattern_path.length;
-
- for (let i = 0; i < url_path.length; i++) {
- if (pattern_path.length === i + 1) {
- if (pattern_path[i] === "*")
- return length_difference === 0;
- if (pattern_path[i] === "**") {
- return length_difference > 0 ||
- (url_path[i] === "**" && dashes_ok);
- }
- if (pattern_path[i] === "***")
- return length_difference >= 0;
- return length_difference === 0 &&
- pattern_path[i] === url_path[i] && dashes_ok;
+ if (deco.path.length > MAX_URL_PATH_CHARS) {
+ deco.path = deco.path.substring(0, deco.path.lastIndexOf("/"));
+ deco.path_truncated = true;
}
-
- if (pattern_path[i] !== url_path[i])
- return false;
}
- return false;
-}
-
-function url_matches(url, pattern)
-{
- const url_deco = deconstruct_url(url);
- const pattern_deco = deconstruct_url(pattern);
-
- if (url_deco === undefined || pattern_deco === undefined) {
- console.log(`bad comparison: ${url} and ${pattern}`);
- return false
+ if (typeof deco.domain === "string") {
+ deco.domain = deco.domain.split(".");
+ if (deco.domain.splice(0, deco.domain.length - MAX_DOMAIN_LEN).length
+ > 0)
+ deco.domain_truncated = true;
}
- return pattern_deco.proto === url_deco.proto &&
- !(pattern_deco.proto === "file" && pattern_deco.trailing_dash) &&
- !!url_deco.domain === !!pattern_deco.domain &&
- (!url_deco.domain ||
- domain_matches(url_deco.domain, pattern_deco.domain)) &&
- path_matches(url_deco.path, url_deco.trailing_dash,
- pattern_deco.path, pattern_deco.trailing_dash);
+ deco.path = deco.path.split("/").filter(s => s !== "");
+ if (deco.domain && deco.path.splice(MAX_URL_PATH_LEN).length > 0)
+ deco.path_truncated = true;
+ if (leading_dash || deco.path.length === 0)
+ deco.path.unshift("");
+
+ return deco;
}
-function* each_domain_pattern(domain_segments)
+function* each_domain_pattern(deco)
{
- for (let slice = 0; slice < domain_segments.length; slice++) {
- const domain_part = domain_segments.slice(slice).join(".");
+ for (let slice = 0; slice < deco.domain.length - 1; slice++) {
+ const domain_part = deco.domain.slice(slice).join(".");
const domain_wildcards = [];
- if (slice === 0)
+ if (slice === 0 && !deco.domain_truncated)
yield domain_part;
- if (slice === 1)
+ if (slice === 1 && !deco.domain_truncated)
yield "*." + domain_part;
if (slice > 1)
yield "**." + domain_part;
@@ -141,22 +94,23 @@ function* each_domain_pattern(domain_segments)
}
}
-function* each_path_pattern(path_segments, trailing_dash)
+function* each_path_pattern(deco)
{
- for (let slice = path_segments.length; slice > 0; slice--) {
- const path_part = path_segments.slice(0, slice).join("/");
+ for (let slice = deco.path.length; slice > 0; slice--) {
+ const path_part = deco.path.slice(0, slice).join("/");
const path_wildcards = [];
- if (slice === path_segments.length) {
- if (trailing_dash)
+ if (slice === deco.path.length && !deco.path_truncated) {
+ if (deco.trailing_dash)
yield path_part + "/";
yield path_part;
}
- if (slice === path_segments.length - 1 && path_segments[slice] !== "*")
+ if (slice === deco.path.length - 1 && !deco.path_truncated &&
+ deco.path[slice] !== "*")
yield path_part + "/*";
- if (slice < path_segments.length - 1)
+ if (slice < deco.path.length - 1)
yield path_part + "/**";
- if (slice < path_segments.length - 1 ||
- path_segments[path_segments.length - 1] !== "***")
+ if (slice !== deco.path.length - 1 || deco.path_truncated ||
+ deco.path[slice] !== "***")
yield path_part + "/***";
}
}
@@ -167,20 +121,19 @@ function* each_url_pattern(url)
const deco = deconstruct_url(url);
if (deco === undefined) {
- console.log("bad url format", url);
+ console.error("bad url format", url);
return false;
}
- const all_domains = deco.domain ? each_domain_pattern(deco.domain) : [""];
+ const all_domains = deco.domain ? each_domain_pattern(deco) : [""];
for (const domain of all_domains) {
- for (const path of each_path_pattern(deco.path, deco.trailing_dash))
+ for (const path of each_path_pattern(deco))
yield `${deco.proto}://${domain}${path}`;
}
}
/*
* EXPORTS_START
- * EXPORT url_matches
* EXPORT each_url_pattern
* EXPORTS_END
*/