From 538376341e9a50ebd350897fe26f43c433f0ee06 Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Fri, 27 Aug 2021 10:01:32 +0200 Subject: enable whitelisting of `file://' protocol\n\nThis commit additionally also changes the semantics of triple asterisk wildcard in URL path. --- common/misc.js | 12 ++-- common/patterns.js | 160 ++++++++++++++++++++++------------------------- common/settings_query.js | 27 ++++---- content/freezer.js | 1 + content/main.js | 86 ++++++++++++++++++++++--- content/page_actions.js | 22 ++++--- html/display-panel.js | 5 +- 7 files changed, 190 insertions(+), 123 deletions(-) diff --git a/common/misc.js b/common/misc.js index d6b9662..fd70f62 100644 --- a/common/misc.js +++ b/common/misc.js @@ -84,11 +84,13 @@ function open_in_settings(prefix, name) window.open(url, "_blank"); } -/* Check if url corresponds to a browser's special page */ -function is_privileged_url(url) -{ - return !!/^(chrome(-extension)?|moz-extension):\/\/|^about:/i.exec(url); -} +/* + * Check if url corresponds to a browser's special page (or a directory index in + * case of `file://' protocol). + */ +const privileged_reg = + /^(chrome(-extension)?|moz-extension):\/\/|^about:|^file:\/\/.*\/$/; +const is_privileged_url = url => privileged_reg.test(url); /* Parse a CSP header */ function parse_csp(csp) { diff --git a/common/patterns.js b/common/patterns.js index be7c650..0a322b0 100644 --- a/common/patterns.js +++ b/common/patterns.js @@ -5,35 +5,41 @@ * Redistribution terms are gathered in the `copyright' file. */ -const proto_re = "[a-zA-Z]*:\/\/"; +const proto_regex = /^(\w+):\/\/(.*)$/; + const domain_re = "[^/?#]+"; -const segments_re = "/[^?#]*"; -const query_re = "\\?[^#]*"; - -const url_regex = new RegExp(`\ -^\ -(${proto_re})\ -(${domain_re})\ -(${segments_re})?\ -(${query_re})?\ -#?.*\$\ -`); +const path_re = "[^?#]*"; +const query_re = "\\??[^#]*"; + +const http_regex = new RegExp(`^(${domain_re})(${path_re})(${query_re}).*`); + +const file_regex = new RegExp(`^(${path_re}).*`); function deconstruct_url(url) { - const regex_match = url_regex.exec(url); - if (regex_match === null) + const proto_match = proto_regex.exec(url); + if (proto_match === null) return undefined; - let [_, proto, domain, path, query] = regex_match; + const deco = {proto: proto_match[1]}; - domain = domain.split("."); - let path_trailing_dash = - path && path[path.length - 1] === "/"; - path = (path || "").split("/").filter(s => s !== ""); - path.unshift(""); + if (deco.proto === "file") { + deco.path = file_regex.exec(proto_match[2])[1]; + } else { + const http_match = http_regex.exec(proto_match[2]); + if (!http_match) + return undefined; + [deco.domain, deco.path, deco.query] = http_match.slice(1, 4); + deco.domain = deco.domain.split("."); + } - return {proto, domain, path, query, path_trailing_dash}; + const leading_dash = deco.path[0] === "/"; + deco.trailing_dash = deco.path[deco.path.length - 1] === "/"; + deco.path = deco.path.split("/").filter(s => s !== ""); + if (leading_dash || deco.path.length === 0) + deco.path.unshift(""); + + return deco; } /* Be sane: both arguments should be arrays of length >= 2 */ @@ -104,84 +110,70 @@ function url_matches(url, pattern) return false } - if (pattern_deco.proto !== url_deco.proto) - return false; - - return domain_matches(url_deco.domain, pattern_deco.domain) && - path_matches(url_deco.path, url_deco.path_trailing_dash, - pattern_deco.path, pattern_deco.path_trailing_dash); + return pattern_deco.proto === url_deco.proto && + !(pattern_deco.proto === "file" && pattern_deco.trailing_dash) && + !!url_deco.domain === !!pattern_deco.domain && + (!url_deco.domain || + domain_matches(url_deco.domain, pattern_deco.domain)) && + path_matches(url_deco.path, url_deco.trailing_dash, + pattern_deco.path, pattern_deco.trailing_dash); } -/* - * Call callback for every possible pattern that matches url. Return when there - * are no more patterns or callback returns false. - */ -function for_each_possible_pattern(url, callback) +function* each_domain_pattern(domain_segments) { - const deco = deconstruct_url(url); - - if (deco === undefined) { - console.log("bad url format", url); - return; + for (let slice = 0; slice < domain_segments.length; slice++) { + const domain_part = domain_segments.slice(slice).join("."); + const domain_wildcards = []; + if (slice === 0) + yield domain_part; + if (slice === 1) + yield "*." + domain_part; + if (slice > 1) + yield "**." + domain_part; + yield "***." + domain_part; } +} - for (let d_slice = 0; d_slice < deco.domain.length; d_slice++) { - const domain_part = deco.domain.slice(d_slice).join("."); - const domain_wildcards = []; - if (d_slice === 0) - domain_wildcards.push(""); - if (d_slice === 1) - domain_wildcards.push("*."); - if (d_slice > 0) - domain_wildcards.push("**."); - domain_wildcards.push("***."); - - for (const domain_wildcard of domain_wildcards) { - const domain_pattern = domain_wildcard + domain_part; - - for (let s_slice = deco.path.length; s_slice > 0; s_slice--) { - const path_part = deco.path.slice(0, s_slice).join("/"); - const path_wildcards = []; - if (s_slice === deco.path.length) { - if (deco.path_trailing_dash) - path_wildcards.push("/"); - path_wildcards.push(""); - } - if (s_slice === deco.path.length - 1 && - deco.path[s_slice] !== "*") - path_wildcards.push("/*"); - if (s_slice < deco.path.length && - (deco.path[s_slice] !== "**" || - s_slice < deco.path.length - 1)) - path_wildcards.push("/**"); - if (deco.path[s_slice] !== "***" || s_slice < deco.path.length) - path_wildcards.push("/***"); - - for (const path_wildcard of path_wildcards) { - const path_pattern = path_part + path_wildcard; - - const pattern = deco.proto + domain_pattern + path_pattern; - - if (callback(pattern) === false) - return; - } - } +function* each_path_pattern(path_segments, trailing_dash) +{ + for (let slice = path_segments.length; slice > 0; slice--) { + const path_part = path_segments.slice(0, slice).join("/"); + const path_wildcards = []; + if (slice === path_segments.length) { + if (trailing_dash) + yield path_part + "/"; + yield path_part; } + if (slice === path_segments.length - 1 && path_segments[slice] !== "*") + yield path_part + "/*"; + if (slice < path_segments.length - 1) + yield path_part + "/**"; + if (slice < path_segments.length - 1 || + path_segments[path_segments.length - 1] !== "***") + yield path_part + "/***"; } } -function possible_patterns(url) +/* Generate every possible pattern that matches url. */ +function* each_url_pattern(url) { - const patterns = []; - for_each_possible_pattern(url, patterns.push); + const deco = deconstruct_url(url); - return patterns; + if (deco === undefined) { + console.log("bad url format", url); + return false; + } + + const all_domains = deco.domain ? each_domain_pattern(deco.domain) : [""]; + for (const domain of all_domains) { + for (const path of each_path_pattern(deco.path, deco.trailing_dash)) + yield `${deco.proto}://${domain}${path}`; + } } /* * EXPORTS_START * EXPORT url_matches - * EXPORT for_each_possible_pattern - * EXPORT possible_patterns + * EXPORT each_url_pattern * EXPORTS_END */ diff --git a/common/settings_query.js b/common/settings_query.js index e85ae63..b54e580 100644 --- a/common/settings_query.js +++ b/common/settings_query.js @@ -8,30 +8,25 @@ /* * IMPORTS_START * IMPORT TYPE_PREFIX - * IMPORT for_each_possible_pattern + * IMPORT each_url_pattern * IMPORTS_END */ -function check_pattern(storage, pattern, multiple, matched) -{ - const settings = storage.get(TYPE_PREFIX.PAGE, pattern); - - if (settings === undefined) - return; - - matched.push([pattern, settings]); - - if (!multiple) - return false; -} - function query(storage, url, multiple) { const matched = []; const cb = p => check_pattern(storage, p, multiple, matched); - for_each_possible_pattern(url, cb); + for (const pattern of each_url_pattern(url)) { + const result = [pattern, storage.get(TYPE_PREFIX.PAGE, pattern)]; + if (result[1] === undefined) + continue; + + if (!multiple) + return result; + matched.push(result); + } - return multiple ? matched : (matched[0] || [undefined, undefined]); + return multiple ? matched : [undefined, undefined]; } function query_best(storage, url) diff --git a/content/freezer.js b/content/freezer.js index 9dbc95e..0ea362e 100644 --- a/content/freezer.js +++ b/content/freezer.js @@ -49,6 +49,7 @@ function mozilla_suppress_scripts(e) { console.log('Script suppressor has detached.'); return; } + console.log("script event", e); if (e.isTrusted && !e.target._hachette_payload) { e.preventDefault(); console.log('Suppressed script', e.target); diff --git a/content/main.js b/content/main.js index 984b3cb..06d3bf1 100644 --- a/content/main.js +++ b/content/main.js @@ -10,6 +10,7 @@ * IMPORTS_START * IMPORT handle_page_actions * IMPORT extract_signed + * IMPORT sign_data * IMPORT gen_nonce * IMPORT is_privileged_url * IMPORT mozilla_suppress_scripts @@ -31,13 +32,13 @@ function accept_node(node, parent) parent.hachette_corresponding.appendChild(clone); } -if (!is_privileged_url(document.URL)) { - /* Signature valid for half an hour. */ - const min_time = new Date().getTime() - 1800 * 1000; +function extract_cookie_policy(cookie, min_time) +{ let best_result = {time: -1}; let policy = null; const extracted_signatures = []; - for (const match of document.cookie.matchAll(/hachette-(\w*)=([^;]*)/g)) { + + for (const match of cookie.matchAll(/hachette-(\w*)=([^;]*)/g)) { const new_result = extract_signed(...match.slice(1, 3)); if (new_result.fail) continue; @@ -56,17 +57,84 @@ if (!is_privileged_url(document.URL)) { policy = new_policy; } + return [policy, extracted_signatures]; +} + +function extract_url_policy(url, min_time) +{ + const [base_url, payload, anchor] = + /^([^#]*)#?([^#]*)(#?.*)$/.exec(url).splice(1, 4); + + const match = /^hachette_([^_]+)_(.*)$/.exec(payload); + if (!match) + return [null, url]; + + const result = extract_signed(...match.slice(1, 3)); + if (result.fail) + return [null, url]; + + const original_url = base_url + anchor; + const policy = result.time < min_time ? null : + JSON.parse(decodeURIComponent(result.data)); + + return [policy.url === original_url ? policy : null, original_url]; +} + +function employ_nonhttp_policy(policy) +{ + if (!policy.allow) + return; + + policy.nonce = gen_nonce(); + const [base_url, target] = /^([^#]*)(#?.*)$/.exec(policy.url).slice(1, 3); + const encoded_policy = encodeURIComponent(JSON.stringify(policy)); + const payload = "hachette_" + + sign_data(encoded_policy, new Date().getTime()).join("_"); + const resulting_url = `${base_url}#${payload}${target}`; + location.href = resulting_url; + location.reload(); +} + +if (!is_privileged_url(document.URL)) { + let policy_received_callback = () => undefined; + let policy; + + /* Signature valid for half an hour. */ + const min_time = new Date().getTime() - 1800 * 1000; + + if (/^https?:/.test(document.URL)) { + let signatures; + [policy, signatures] = extract_cookie_policy(document.cookie, min_time); + for (const signature of signatures) + document.cookie = `hachette-${signature}=; Max-Age=-1;`; + } else { + const scheme = /^([^:]*)/.exec(document.URL)[1]; + const known_scheme = ["file"].includes(scheme); + + if (!known_scheme) + console.warn(`Unknown url scheme: \`${scheme}'!`); + + let original_url; + [policy, original_url] = extract_url_policy(document.URL, min_time); + history.replaceState(null, "", original_url); + + if (known_scheme && !policy) + policy_received_callback = employ_nonhttp_policy; + } + if (!policy) { - console.warn("WARNING! Using default policy!!!"); + console.warn("Using default policy!"); policy = {allow: false, nonce: gen_nonce()}; } - for (const signature of extracted_signatures) - document.cookie = `hachette-${signature}=; Max-Age=-1;`; - - handle_page_actions(policy.nonce); + handle_page_actions(policy.nonce, policy_received_callback); if (!policy.allow) { + if (is_mozilla) { + const script = document.querySelector("script"); + if (script) + script.textContent = "throw 'blocked';\n" + script.textContent; + } const old_html = document.documentElement; const new_html = document.createElement("html"); old_html.replaceWith(new_html); diff --git a/content/page_actions.js b/content/page_actions.js index aff56b8..6a6b3a0 100644 --- a/content/page_actions.js +++ b/content/page_actions.js @@ -14,10 +14,13 @@ * IMPORTS_END */ -var port; -var loaded = false; -var scripts_awaiting = []; -var nonce; +let policy_received_callback; +/* Snapshot url early because document.URL can be changed by other code. */ +let url; +let port; +let loaded = false; +let scripts_awaiting = []; +let nonce; function handle_message(message) { @@ -31,8 +34,10 @@ function handle_message(message) scripts_awaiting.push(script_text); } } - if (action === "settings") + if (action === "settings") { report_settings(data); + policy_received_callback({url, allow: !!data[1] && data[1].allow}); + } } function document_loaded(event) @@ -56,11 +61,14 @@ function add_script(script_text) report_script(script_text); } -function handle_page_actions(script_nonce) { +function handle_page_actions(script_nonce, policy_received_cb) { + policy_received_callback = policy_received_cb; + url = document.URL; + document.addEventListener("DOMContentLoaded", document_loaded); port = browser.runtime.connect({name : CONNECTION_TYPE.PAGE_ACTIONS}); port.onMessage.addListener(handle_message); - port.postMessage({url: document.URL}); + port.postMessage({url}); nonce = script_nonce; } diff --git a/html/display-panel.js b/html/display-panel.js index 2539ded..bc190ac 100644 --- a/html/display-panel.js +++ b/html/display-panel.js @@ -20,7 +20,7 @@ * IMPORT TYPE_PREFIX * IMPORT nice_name * IMPORT open_in_settings - * IMPORT for_each_possible_pattern + * IMPORT each_url_pattern * IMPORT by_id * IMPORT clone_template * IMPORTS_END @@ -127,7 +127,8 @@ function handle_page_change(change) function populate_possible_patterns_list(url) { - for_each_possible_pattern(url, add_pattern_to_list); + for (const pattern of each_url_pattern(url)) + add_pattern_to_list(pattern); for (const [pattern, settings] of query_all(storage, url)) { set_pattern_li_button_text(ensure_pattern_exists(pattern), -- cgit v1.2.3