/** * Hydrilla/Lernette operations on page url patterns * * Copyright (C) 2021 Wojtek Kosior * Redistribution terms are gathered in the `copyright' file. */ const proto_re = "[a-zA-Z]*:\/\/"; const domain_re = "[^/?#]+"; const segments_re = "/[^?#]*"; const query_re = "\\?[^#]*"; const url_regex = new RegExp(`\ ^\ (${proto_re})\ (${domain_re})\ (${segments_re})?\ (${query_re})?\ #?.*\$\ `); function deconstruct_url(url) { const regex_match = url_regex.exec(url); if (regex_match === null) return undefined; let [_, proto, domain, path, query] = regex_match; domain = domain.split("."); let path_trailing_dash = path && path[path.length - 1] === "/"; path = (path || "").split("/").filter(s => s !== ""); path.unshift(""); return {proto, domain, path, query, path_trailing_dash}; } /* Be sane: both arguments should be arrays of length >= 2 */ function domain_matches(url_domain, pattern_domain) { const length_difference = url_domain.length - pattern_domain.length; for (let i = 1; i <= url_domain.length; i++) { const url_part = url_domain[url_domain.length - i]; const pattern_part = pattern_domain[pattern_domain.length - i]; if (pattern_domain.length === i) { if (pattern_part === "*") return length_difference === 0; if (pattern_part === "**") return length_difference > 0; if (pattern_part === "***") return true; return length_difference === 0 && pattern_part === url_part; } if (pattern_part !== url_part) return false; } return pattern_domain.length === url_domain.length + 1 && pattern_domain[0] === "***"; } function path_matches(url_path, url_trailing_dash, pattern_path, pattern_trailing_dash) { const dashes_ok = !(pattern_trailing_dash && !url_trailing_dash); if (pattern_path.length === 0) return url_path.length === 0 && dashes_ok; const length_difference = url_path.length - pattern_path.length; for (let i = 0; i < url_path.length; i++) { if (pattern_path.length === i + 1) { if (pattern_path[i] === "*") return length_difference === 0; if (pattern_path[i] === "**") { return length_difference > 0 || (url_path[i] === "**" && dashes_ok); } if (pattern_path[i] === "***") return length_difference >= 0; return length_difference === 0 && pattern_path[i] === url_path[i] && dashes_ok; } if (pattern_path[i] !== url_path[i]) return false; } return false; } function url_matches(url, pattern) { const url_deco = deconstruct_url(url); const pattern_deco = deconstruct_url(pattern); if (url_deco === undefined || pattern_deco === undefined) { console.log(`bad comparison: ${url} and ${pattern}`); return false } if (pattern_deco.proto !== url_deco.proto) return false; return domain_matches(url_deco.domain, pattern_deco.domain) && path_matches(url_deco.path, url_deco.path_trailing_dash, pattern_deco.path, pattern_deco.path_trailing_dash); } /* * Call callback for every possible pattern that matches url. Return when there * are no more patterns or callback returns false. */ function for_each_possible_pattern(url, callback) { const deco = deconstruct_url(url); if (deco === undefined) { console.log("bad url format", url); return; } for (let d_slice = 0; d_slice < deco.domain.length; d_slice++) { const domain_part = deco.domain.slice(d_slice).join("."); const domain_wildcards = []; if (d_slice === 0) domain_wildcards.push(""); if (d_slice === 1) domain_wildcards.push("*."); if (d_slice > 0) domain_wildcards.push("**."); domain_wildcards.push("***."); for (const domain_wildcard of domain_wildcards) { const domain_pattern = domain_wildcard + domain_part; for (let s_slice = deco.path.length; s_slice > 0; s_slice--) { const path_part = deco.path.slice(0, s_slice).join("/"); const path_wildcards = []; if (s_slice === deco.path.length) { if (deco.path_trailing_dash) path_wildcards.push("/"); path_wildcards.push(""); } if (s_slice === deco.path.length - 1 && deco.path[s_slice] !== "*") path_wildcards.push("/*"); if (s_slice < deco.path.length && (deco.path[s_slice] !== "**" || s_slice < deco.path.length - 1)) path_wildcards.push("/**"); if (deco.path[s_slice] !== "***" || s_slice < deco.path.length) path_wildcards.push("/***"); for (const path_wildcard of path_wildcards) { const path_pattern = path_part + path_wildcard; const pattern = deco.proto + domain_pattern + path_pattern; if (callback(pattern) === false) return; } } } } } function possible_patterns(url) { const patterns = []; for_each_possible_pattern(url, patterns.push); return patterns; } /* * EXPORTS_START * EXPORT url_matches * EXPORT for_each_possible_pattern * EXPORT possible_patterns * EXPORTS_END */