diff options
Diffstat (limited to 'common')
-rw-r--r-- | common/patterns.js | 141 |
1 files changed, 47 insertions, 94 deletions
diff --git a/common/patterns.js b/common/patterns.js index ebb55ab..ae29fcd 100644 --- a/common/patterns.js +++ b/common/patterns.js @@ -5,6 +5,11 @@ * Redistribution terms are gathered in the `copyright' file. */ +const MAX_URL_PATH_LEN = 12; +const MAX_URL_PATH_CHARS = 255; +const MAX_DOMAIN_LEN = 7; +const MAX_DOMAIN_CHARS = 100; + const proto_regex = /^(\w+):\/\/(.*)$/; const user_re = "[^/?#@]+@" @@ -37,103 +42,51 @@ function deconstruct_url(url) [deco.domain, deco.path, deco.query] = http_match.slice(1, 4); } - if (deco.domain) - deco.domain = deco.domain.split("."); - const leading_dash = deco.path[0] === "/"; deco.trailing_dash = deco.path[deco.path.length - 1] === "/"; - deco.path = deco.path.split("/").filter(s => s !== ""); - if (leading_dash || deco.path.length === 0) - deco.path.unshift(""); - return deco; -} + if (deco.domain) { + if (deco.domain.length > MAX_DOMAIN_CHARS) { + const idx = deco.domain.indexOf(".", deco.domain.length - + MAX_DOMAIN_CHARS); + if (idx === -1) + deco.domain = []; + else + deco.domain = deco.domain.substring(idx + 1); -/* Be sane: both arguments should be arrays of length >= 2 */ -function domain_matches(url_domain, pattern_domain) -{ - const length_difference = url_domain.length - pattern_domain.length; - - for (let i = 1; i <= url_domain.length; i++) { - const url_part = url_domain[url_domain.length - i]; - const pattern_part = pattern_domain[pattern_domain.length - i]; - - if (pattern_domain.length === i) { - if (pattern_part === "*") - return length_difference === 0; - if (pattern_part === "**") - return length_difference > 0; - if (pattern_part === "***") - return true; - return length_difference === 0 && pattern_part === url_part; + deco.domain_truncated = true; } - if (pattern_part !== url_part) - return false; - } - - return pattern_domain.length === url_domain.length + 1 && - pattern_domain[0] === "***"; -} - -function path_matches(url_path, url_trailing_dash, - pattern_path, pattern_trailing_dash) -{ - const dashes_ok = !(pattern_trailing_dash && !url_trailing_dash); - - if (pattern_path.length === 0) - return url_path.length === 0 && dashes_ok; - - const length_difference = url_path.length - pattern_path.length; - - for (let i = 0; i < url_path.length; i++) { - if (pattern_path.length === i + 1) { - if (pattern_path[i] === "*") - return length_difference === 0; - if (pattern_path[i] === "**") { - return length_difference > 0 || - (url_path[i] === "**" && dashes_ok); - } - if (pattern_path[i] === "***") - return length_difference >= 0; - return length_difference === 0 && - pattern_path[i] === url_path[i] && dashes_ok; + if (deco.path.length > MAX_URL_PATH_CHARS) { + deco.path = deco.path.substring(0, deco.path.lastIndexOf("/")); + deco.path_truncated = true; } - - if (pattern_path[i] !== url_path[i]) - return false; } - return false; -} - -function url_matches(url, pattern) -{ - const url_deco = deconstruct_url(url); - const pattern_deco = deconstruct_url(pattern); - - if (url_deco === undefined || pattern_deco === undefined) { - console.log(`bad comparison: ${url} and ${pattern}`); - return false + if (typeof deco.domain === "string") { + deco.domain = deco.domain.split("."); + if (deco.domain.splice(0, deco.domain.length - MAX_DOMAIN_LEN).length + > 0) + deco.domain_truncated = true; } - return pattern_deco.proto === url_deco.proto && - !(pattern_deco.proto === "file" && pattern_deco.trailing_dash) && - !!url_deco.domain === !!pattern_deco.domain && - (!url_deco.domain || - domain_matches(url_deco.domain, pattern_deco.domain)) && - path_matches(url_deco.path, url_deco.trailing_dash, - pattern_deco.path, pattern_deco.trailing_dash); + deco.path = deco.path.split("/").filter(s => s !== ""); + if (deco.domain && deco.path.splice(MAX_URL_PATH_LEN).length > 0) + deco.path_truncated = true; + if (leading_dash || deco.path.length === 0) + deco.path.unshift(""); + + return deco; } -function* each_domain_pattern(domain_segments) +function* each_domain_pattern(deco) { - for (let slice = 0; slice < domain_segments.length; slice++) { - const domain_part = domain_segments.slice(slice).join("."); + for (let slice = 0; slice < deco.domain.length - 1; slice++) { + const domain_part = deco.domain.slice(slice).join("."); const domain_wildcards = []; - if (slice === 0) + if (slice === 0 && !deco.domain_truncated) yield domain_part; - if (slice === 1) + if (slice === 1 && !deco.domain_truncated) yield "*." + domain_part; if (slice > 1) yield "**." + domain_part; @@ -141,22 +94,23 @@ function* each_domain_pattern(domain_segments) } } -function* each_path_pattern(path_segments, trailing_dash) +function* each_path_pattern(deco) { - for (let slice = path_segments.length; slice > 0; slice--) { - const path_part = path_segments.slice(0, slice).join("/"); + for (let slice = deco.path.length; slice > 0; slice--) { + const path_part = deco.path.slice(0, slice).join("/"); const path_wildcards = []; - if (slice === path_segments.length) { - if (trailing_dash) + if (slice === deco.path.length && !deco.path_truncated) { + if (deco.trailing_dash) yield path_part + "/"; yield path_part; } - if (slice === path_segments.length - 1 && path_segments[slice] !== "*") + if (slice === deco.path.length - 1 && !deco.path_truncated && + deco.path[slice] !== "*") yield path_part + "/*"; - if (slice < path_segments.length - 1) + if (slice < deco.path.length - 1) yield path_part + "/**"; - if (slice < path_segments.length - 1 || - path_segments[path_segments.length - 1] !== "***") + if (slice !== deco.path.length - 1 || deco.path_truncated || + deco.path[slice] !== "***") yield path_part + "/***"; } } @@ -167,20 +121,19 @@ function* each_url_pattern(url) const deco = deconstruct_url(url); if (deco === undefined) { - console.log("bad url format", url); + console.error("bad url format", url); return false; } - const all_domains = deco.domain ? each_domain_pattern(deco.domain) : [""]; + const all_domains = deco.domain ? each_domain_pattern(deco) : [""]; for (const domain of all_domains) { - for (const path of each_path_pattern(deco.path, deco.trailing_dash)) + for (const path of each_path_pattern(deco)) yield `${deco.proto}://${domain}${path}`; } } /* * EXPORTS_START - * EXPORT url_matches * EXPORT each_url_pattern * EXPORTS_END */ |