From 538376341e9a50ebd350897fe26f43c433f0ee06 Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Fri, 27 Aug 2021 10:01:32 +0200 Subject: enable whitelisting of `file://' protocol\n\nThis commit additionally also changes the semantics of triple asterisk wildcard in URL path. --- common/misc.js | 12 ++-- common/patterns.js | 160 ++++++++++++++++++++++------------------------- common/settings_query.js | 27 ++++---- 3 files changed, 94 insertions(+), 105 deletions(-) (limited to 'common') diff --git a/common/misc.js b/common/misc.js index d6b9662..fd70f62 100644 --- a/common/misc.js +++ b/common/misc.js @@ -84,11 +84,13 @@ function open_in_settings(prefix, name) window.open(url, "_blank"); } -/* Check if url corresponds to a browser's special page */ -function is_privileged_url(url) -{ - return !!/^(chrome(-extension)?|moz-extension):\/\/|^about:/i.exec(url); -} +/* + * Check if url corresponds to a browser's special page (or a directory index in + * case of `file://' protocol). + */ +const privileged_reg = + /^(chrome(-extension)?|moz-extension):\/\/|^about:|^file:\/\/.*\/$/; +const is_privileged_url = url => privileged_reg.test(url); /* Parse a CSP header */ function parse_csp(csp) { diff --git a/common/patterns.js b/common/patterns.js index be7c650..0a322b0 100644 --- a/common/patterns.js +++ b/common/patterns.js @@ -5,35 +5,41 @@ * Redistribution terms are gathered in the `copyright' file. */ -const proto_re = "[a-zA-Z]*:\/\/"; +const proto_regex = /^(\w+):\/\/(.*)$/; + const domain_re = "[^/?#]+"; -const segments_re = "/[^?#]*"; -const query_re = "\\?[^#]*"; - -const url_regex = new RegExp(`\ -^\ -(${proto_re})\ -(${domain_re})\ -(${segments_re})?\ -(${query_re})?\ -#?.*\$\ -`); +const path_re = "[^?#]*"; +const query_re = "\\??[^#]*"; + +const http_regex = new RegExp(`^(${domain_re})(${path_re})(${query_re}).*`); + +const file_regex = new RegExp(`^(${path_re}).*`); function deconstruct_url(url) { - const regex_match = url_regex.exec(url); - if (regex_match === null) + const proto_match = proto_regex.exec(url); + if (proto_match === null) return undefined; - let [_, proto, domain, path, query] = regex_match; + const deco = {proto: proto_match[1]}; - domain = domain.split("."); - let path_trailing_dash = - path && path[path.length - 1] === "/"; - path = (path || "").split("/").filter(s => s !== ""); - path.unshift(""); + if (deco.proto === "file") { + deco.path = file_regex.exec(proto_match[2])[1]; + } else { + const http_match = http_regex.exec(proto_match[2]); + if (!http_match) + return undefined; + [deco.domain, deco.path, deco.query] = http_match.slice(1, 4); + deco.domain = deco.domain.split("."); + } - return {proto, domain, path, query, path_trailing_dash}; + const leading_dash = deco.path[0] === "/"; + deco.trailing_dash = deco.path[deco.path.length - 1] === "/"; + deco.path = deco.path.split("/").filter(s => s !== ""); + if (leading_dash || deco.path.length === 0) + deco.path.unshift(""); + + return deco; } /* Be sane: both arguments should be arrays of length >= 2 */ @@ -104,84 +110,70 @@ function url_matches(url, pattern) return false } - if (pattern_deco.proto !== url_deco.proto) - return false; - - return domain_matches(url_deco.domain, pattern_deco.domain) && - path_matches(url_deco.path, url_deco.path_trailing_dash, - pattern_deco.path, pattern_deco.path_trailing_dash); + return pattern_deco.proto === url_deco.proto && + !(pattern_deco.proto === "file" && pattern_deco.trailing_dash) && + !!url_deco.domain === !!pattern_deco.domain && + (!url_deco.domain || + domain_matches(url_deco.domain, pattern_deco.domain)) && + path_matches(url_deco.path, url_deco.trailing_dash, + pattern_deco.path, pattern_deco.trailing_dash); } -/* - * Call callback for every possible pattern that matches url. Return when there - * are no more patterns or callback returns false. - */ -function for_each_possible_pattern(url, callback) +function* each_domain_pattern(domain_segments) { - const deco = deconstruct_url(url); - - if (deco === undefined) { - console.log("bad url format", url); - return; + for (let slice = 0; slice < domain_segments.length; slice++) { + const domain_part = domain_segments.slice(slice).join("."); + const domain_wildcards = []; + if (slice === 0) + yield domain_part; + if (slice === 1) + yield "*." + domain_part; + if (slice > 1) + yield "**." + domain_part; + yield "***." + domain_part; } +} - for (let d_slice = 0; d_slice < deco.domain.length; d_slice++) { - const domain_part = deco.domain.slice(d_slice).join("."); - const domain_wildcards = []; - if (d_slice === 0) - domain_wildcards.push(""); - if (d_slice === 1) - domain_wildcards.push("*."); - if (d_slice > 0) - domain_wildcards.push("**."); - domain_wildcards.push("***."); - - for (const domain_wildcard of domain_wildcards) { - const domain_pattern = domain_wildcard + domain_part; - - for (let s_slice = deco.path.length; s_slice > 0; s_slice--) { - const path_part = deco.path.slice(0, s_slice).join("/"); - const path_wildcards = []; - if (s_slice === deco.path.length) { - if (deco.path_trailing_dash) - path_wildcards.push("/"); - path_wildcards.push(""); - } - if (s_slice === deco.path.length - 1 && - deco.path[s_slice] !== "*") - path_wildcards.push("/*"); - if (s_slice < deco.path.length && - (deco.path[s_slice] !== "**" || - s_slice < deco.path.length - 1)) - path_wildcards.push("/**"); - if (deco.path[s_slice] !== "***" || s_slice < deco.path.length) - path_wildcards.push("/***"); - - for (const path_wildcard of path_wildcards) { - const path_pattern = path_part + path_wildcard; - - const pattern = deco.proto + domain_pattern + path_pattern; - - if (callback(pattern) === false) - return; - } - } +function* each_path_pattern(path_segments, trailing_dash) +{ + for (let slice = path_segments.length; slice > 0; slice--) { + const path_part = path_segments.slice(0, slice).join("/"); + const path_wildcards = []; + if (slice === path_segments.length) { + if (trailing_dash) + yield path_part + "/"; + yield path_part; } + if (slice === path_segments.length - 1 && path_segments[slice] !== "*") + yield path_part + "/*"; + if (slice < path_segments.length - 1) + yield path_part + "/**"; + if (slice < path_segments.length - 1 || + path_segments[path_segments.length - 1] !== "***") + yield path_part + "/***"; } } -function possible_patterns(url) +/* Generate every possible pattern that matches url. */ +function* each_url_pattern(url) { - const patterns = []; - for_each_possible_pattern(url, patterns.push); + const deco = deconstruct_url(url); - return patterns; + if (deco === undefined) { + console.log("bad url format", url); + return false; + } + + const all_domains = deco.domain ? each_domain_pattern(deco.domain) : [""]; + for (const domain of all_domains) { + for (const path of each_path_pattern(deco.path, deco.trailing_dash)) + yield `${deco.proto}://${domain}${path}`; + } } /* * EXPORTS_START * EXPORT url_matches - * EXPORT for_each_possible_pattern - * EXPORT possible_patterns + * EXPORT each_url_pattern * EXPORTS_END */ diff --git a/common/settings_query.js b/common/settings_query.js index e85ae63..b54e580 100644 --- a/common/settings_query.js +++ b/common/settings_query.js @@ -8,30 +8,25 @@ /* * IMPORTS_START * IMPORT TYPE_PREFIX - * IMPORT for_each_possible_pattern + * IMPORT each_url_pattern * IMPORTS_END */ -function check_pattern(storage, pattern, multiple, matched) -{ - const settings = storage.get(TYPE_PREFIX.PAGE, pattern); - - if (settings === undefined) - return; - - matched.push([pattern, settings]); - - if (!multiple) - return false; -} - function query(storage, url, multiple) { const matched = []; const cb = p => check_pattern(storage, p, multiple, matched); - for_each_possible_pattern(url, cb); + for (const pattern of each_url_pattern(url)) { + const result = [pattern, storage.get(TYPE_PREFIX.PAGE, pattern)]; + if (result[1] === undefined) + continue; + + if (!multiple) + return result; + matched.push(result); + } - return multiple ? matched : (matched[0] || [undefined, undefined]); + return multiple ? matched : [undefined, undefined]; } function query_best(storage, url) -- cgit v1.2.3