diff options
-rw-r--r-- | background/policy_injector.js | 23 | ||||
-rw-r--r-- | common/misc.js | 27 | ||||
-rw-r--r-- | content/main.js | 99 | ||||
-rw-r--r-- | content/sanitize_document.js | 209 |
4 files changed, 256 insertions, 102 deletions
diff --git a/background/policy_injector.js b/background/policy_injector.js index 702f879..3398b53 100644 --- a/background/policy_injector.js +++ b/background/policy_injector.js @@ -18,19 +18,12 @@ * IMPORT query_best * IMPORT sanitize_csp_header * IMPORT csp_rule + * IMPORT is_csp_header_name * IMPORTS_END */ var storage; -const csp_header_names = new Set([ - "content-security-policy", - "x-webkit-csp", - "x-content-security-policy" -]); - -const report_only = "content-security-policy-report-only"; - function headers_inject(details) { const url = details.url; @@ -40,7 +33,6 @@ function headers_inject(details) const [pattern, settings] = query_best(storage, url); const allow = !!(settings && settings.allow); const nonce = gen_nonce(); - const rule = `'nonce-${nonce}'`; let orig_csp_headers; let old_signature; @@ -70,20 +62,19 @@ function headers_inject(details) } orig_csp_headers = orig_csp_headers || - headers.filter(h => csp_header_names.has(h.name.toLowerCase())); - headers = headers.filter(h => !csp_header_names.has(h.name.toLowerCase())); + headers.filter(h => is_csp_header_name(h.name)); - /* Remove headers that only snitch on us */ - if (!allow) - headers = headers.filter(h => h.name.toLowerCase() !== report_only); + /* When blocking remove report-only CSP headers that snitch on us. */ + headers = headers.filter(h => !is_csp_header_name(h.name, !allow)); if (old_signature) headers = headers.filter(h => h.name.search(old_signature) === -1); - const sanitizer = h => sanitize_csp_header(h, rule, allow); + const policy_object = {allow, nonce, url}; + const sanitizer = h => sanitize_csp_header(h, policy_object); headers.push(...orig_csp_headers.map(sanitizer)); - const policy = encodeURIComponent(JSON.stringify({allow, nonce, url})); + const policy = encodeURIComponent(JSON.stringify(policy_object)); const policy_signature = sign_data(policy, new Date()); const later_30sec = new Date(new Date().getTime() + 30000).toGMTString(); headers.push({ diff --git a/common/misc.js b/common/misc.js index 6e825d6..8894d60 100644 --- a/common/misc.js +++ b/common/misc.js @@ -78,6 +78,23 @@ function csp_rule(nonce) return `script-src ${rule}; script-src-elem ${rule}; script-src-attr 'none'; prefetch-src 'none';`; } +/* Check if some HTTP header might define CSP rules. */ +const csp_header_names = new Set([ + "content-security-policy", + "x-webkit-csp", + "x-content-security-policy" +]); + +const report_only_header_name = "content-security-policy-report-only"; + +function is_csp_header_name(string, include_report_only) +{ + string = string && string.toLowerCase() || ""; + + return (include_report_only && string === report_only_header_name) || + csp_header_names.has(string); +} + /* * Print item together with type, e.g. * nice_name("s", "hello") → "hello (script)" @@ -127,11 +144,12 @@ function parse_csp(csp) { } /* Make CSP headers do our bidding, not interfere */ -function sanitize_csp_header(header, rule, allow) +function sanitize_csp_header(header, policy) { + const rule = `'nonce-${policy.nonce}'`; const csp = parse_csp(header.value); - if (!allow) { + if (!policy.allow) { /* No snitching */ delete csp['report-to']; delete csp['report-uri']; @@ -153,11 +171,11 @@ function sanitize_csp_header(header, rule, allow) else csp['script-src-elem'] = [rule]; - const new_policy = Object.entries(csp).map( + const new_csp = Object.entries(csp).map( i => `${i[0]} ${i[1].join(' ')};` ); - return {name: header.name, value: new_policy.join('')}; + return {name: header.name, value: new_csp.join('')}; } /* Regexes and objest to use as/in schemas for parse_json_with_schema(). */ @@ -178,6 +196,7 @@ const matchers = { * EXPORT extract_signed * EXPORT sign_data * EXPORT csp_rule + * EXPORT is_csp_header_name * EXPORT nice_name * EXPORT open_in_settings * EXPORT is_privileged_url diff --git a/content/main.js b/content/main.js index 164ebe3..441636c 100644 --- a/content/main.js +++ b/content/main.js @@ -11,87 +11,24 @@ * IMPORT handle_page_actions * IMPORT extract_signed * IMPORT gen_nonce - * IMPORT csp_rule * IMPORT is_privileged_url - * IMPORT sanitize_attributes * IMPORT mozilla_suppress_scripts * IMPORT is_chrome * IMPORT is_mozilla * IMPORT start_activity_info_server + * IMPORT modify_on_the_fly * IMPORTS_END */ -/* - * Due to some technical limitations the chosen method of whitelisting sites - * is to smuggle whitelist indicator in page's url as a "magical" string - * after '#'. Right now this is only supplemental in HTTP(s) pages where - * blocking of native scripts also happens through CSP header injection but is - * necessary for protocols like ftp:// and file://. - * - * The code that actually injects the magical string into ftp:// and file:// - * urls has not yet been added to the extension. - */ - -var nonce = undefined; - -function handle_mutation(mutations, observer) -{ - if (document.readyState === 'complete') { - console.log("mutation handling complete"); - observer.disconnect(); - return; - } - for (const mutation of mutations) { - for (const node of mutation.addedNodes) - block_node(node); - } -} - -function block_nodes_recursively(node) -{ - block_node(node); - for (const child of node.children) - block_nodes_recursively(child); -} - -function block_node(node) +function accept_node(node, parent) { + const clone = document.importNode(node, false); + node.hachette_corresponding = clone; /* - * Modifying <script> element doesn't always prevent its execution in some - * Mozilla browsers. This is Chromium-specific code. + * TODO: Stop page's own issues like "Error parsing a meta element's + * content:" from appearing as extension's errors. */ - if (node.tagName === "SCRIPT") { - block_script(node); - return; - } - - sanitize_attributes(node); - - if (node.tagName === "HEAD") - inject_csp(node); -} - -function block_script(node) -{ - /* - * Disabling scripts this way allows them to still be relatively - * easily accessed in case they contain some useful data. - */ - if (node.hasAttribute("type")) - node.setAttribute("blocked-type", node.getAttribute("type")); - node.setAttribute("type", "application/json"); -} - -function inject_csp(head) -{ - let meta = document.createElement("meta"); - meta.setAttribute("http-equiv", "Content-Security-Policy"); - meta.setAttribute("content", csp_rule(nonce)); - - if (head.firstElementChild === null) - head.appendChild(meta); - else - head.insertBefore(meta, head.firstElementChild); + parent.hachette_corresponding.appendChild(clone); } if (!is_privileged_url(document.URL)) { @@ -110,20 +47,18 @@ if (!is_privileged_url(document.URL)) { handle_page_actions(policy.nonce); - if (!policy.allow) { - block_nodes_recursively(document.documentElement); + if (!policy.allow && is_mozilla) + addEventListener('beforescriptexecute', mozilla_suppress_scripts, true); - if (is_chrome) { - var observer = new MutationObserver(handle_mutation); - observer.observe(document.documentElement, { - attributes: true, - childList: true, - subtree: true - }); - } + if (!policy.allow && is_chrome) { + const old_html = document.documentElement; + const new_html = document.createElement("html"); + old_html.replaceWith(new_html); + old_html.hachette_corresponding = new_html; - if (is_mozilla) - addEventListener('beforescriptexecute', mozilla_suppress_scripts, true); + const modify_end = + modify_on_the_fly(old_html, policy, {node_eater: accept_node}); + document.addEventListener("DOMContentLoaded", modify_end); } start_activity_info_server(); diff --git a/content/sanitize_document.js b/content/sanitize_document.js new file mode 100644 index 0000000..1533526 --- /dev/null +++ b/content/sanitize_document.js @@ -0,0 +1,209 @@ +/** + * Hachette modify HTML document as it loads and reconstruct HTML code from it + * + * Copyright (C) 2021 Wojtek Kosior + * Redistribution terms are gathered in the `copyright' file. + */ + +/* + * IMPORTS_START + * IMPORT gen_nonce + * IMPORT csp_rule + * IMPORT is_csp_header_name + * IMPORT sanitize_csp_header + * IMPORT sanitize_attributes + * IMPORTS_END + */ + +/* + * Functions that sanitize elements. The script blocking measures are, when + * possible, going to be applied together with CSP rules injected using + * webRequest. + */ + +const blocked = "blocked"; + +function block_attribute(node, attr) +{ + /* + * Disabling attributed this way allows them to still be relatively + * easily accessed in case they contain some useful data. + */ + + const construct_name = [attr]; + while (node.hasAttribute(construct_name.join(""))) + construct_name.unshift(blocked); + + while (construct_name.length > 1) { + construct_name.shift(); + const name = construct_name.join(""); + node.setAttribute(`${blocked}-${name}`, node.getAttribute(name)); + } + + node.removeAttribute(attr); +} + +function sanitize_script(script, policy) +{ + if (policy.allow) + return; + + block_attribute(script, "type"); + script.setAttribute("type", "application/json"); +} + +function inject_csp(head, policy) +{ + if (policy.allow) + return; + + const meta = document.createElement("meta"); + meta.setAttribute("http-equiv", "Content-Security-Policy"); + meta.setAttribute("content", csp_rule(policy.nonce)); + meta.hachette_ignore = true; + head.prepend(meta); +} + +function sanitize_http_equiv_csp_rule(meta, policy) +{ + const http_equiv = meta.getAttribute("http-equiv"); + + if (!is_csp_header_name(http_equiv, !policy.allow)) + return; + + if (policy.allow || is_csp_header_name(http_equiv, false)) { + let value = meta.getAttribute("content"); + block_attribute(meta, "content"); + if (value) { + value = sanitize_csp_header({value}, policy).value; + meta.setAttribute("content", value); + } + return; + } + + block_attribute(meta, "http-equiv"); +} + +function sanitize_node(node, policy) +{ + if (node.tagName === "SCRIPT") + sanitize_script(node, policy); + + if (node.tagName === "HEAD") + inject_csp(node, policy); + + if (node.tagName === "META") + sanitize_http_equiv_csp_rule(node, policy); + + if (!policy.allow) + sanitize_attributes(node, policy); +} + +const serializer = new XMLSerializer(); + +function start_node(node, data) +{ + if (!data.writer) + return; + + node.hachette_started = true; + const clone = node.cloneNode(false); + clone.textContent = data.uniq; + data.writer(data.uniq_reg.exec(clone.outerHTML)[1]); +} + +function finish_node(node, data) +{ + const nodes_to_process = [node]; + + while (true) { + node = nodes_to_process.pop(); + if (!node) + break; + + nodes_to_process.push(node, node.hachette_last_added); + } + + while (nodes_to_process.length > 0) { + const node = nodes_to_process.pop(); + node.remove(); + + if (!data.writer) + continue; + + if (node.hachette_started) { + node.textContent = data.uniq; + data.writer(data.uniq_reg.exec(node.outerHTML)[2]); + continue; + } + + data.writer(node.outerHTML || serializer.serializeToString(node)); + } +} + +/* + * Important! Due to some weirdness node.parentElement is not alway correct + * under Chromium. Track node relations manually. + */ +function handle_added_node(node, true_parent, data) +{ + if (node.hachette_ignore || true_parent.hachette_ignore) + return; + + if (!true_parent.hachette_started) + start_node(true_parent, data) + + sanitize_node(node, data.policy); + + if (data.node_eater) + data.node_eater(node, true_parent); + + finish_node(true_parent.hachette_last_added, data); + + true_parent.hachette_last_added = node; +} + +function handle_mutation(mutations, data) +{ + /* + * Chromium: for an unknown reason mutation.target is not always the same as + * node.parentElement. The former is the correct one. + */ + for (const mutation of mutations) { + for (const node of mutation.addedNodes) + handle_added_node(node, mutation.target, data); + } +} + +function finish_processing(data) +{ + handle_mutation(data.observer.takeRecords(), data); + finish_node(data.html_element, data); + data.observer.disconnect(); +} + +function modify_on_the_fly(html_element, policy, consumers) +{ + const uniq = gen_nonce(); + const uniq_reg = new RegExp(`^(.*)${uniq}(.*)$`); + const data = {policy, html_element, uniq, uniq_reg, ...consumers}; + + start_node(data.html_element, data); + + var observer = new MutationObserver(m => handle_mutation(m, data)); + observer.observe(data.html_element, { + attributes: true, + childList: true, + subtree: true + }); + + data.observer = observer; + + return () => finish_processing(data); +} + +/* + * EXPORTS_START + * EXPORT modify_on_the_fly + * EXPORTS_END + */ |