diff options
Diffstat (limited to 'content/main.js')
-rw-r--r-- | content/main.js | 228 |
1 files changed, 156 insertions, 72 deletions
diff --git a/content/main.js b/content/main.js index 3ebf093..cec9943 100644 --- a/content/main.js +++ b/content/main.js @@ -1,5 +1,7 @@ /** - * Hachette main content script run in all frames + * This file is part of Haketilo. + * + * Function: Main content script that runs in all frames. * * Copyright (C) 2021 Wojtek Kosior * Copyright (C) 2021 jahoti @@ -13,23 +15,27 @@ * IMPORT sign_data * IMPORT gen_nonce * IMPORT is_privileged_url - * IMPORT mozilla_suppress_scripts * IMPORT is_chrome * IMPORT is_mozilla * IMPORT start_activity_info_server * IMPORT make_csp_rule - * IMPORT is_csp_header_name - * IMPORT sanitize_csp_header + * IMPORT csp_header_regex * IMPORTS_END */ +document.content_loaded = document.readyState === "complete"; +const wait_loaded = e => e.content_loaded ? Promise.resolve() : + new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true})); + +wait_loaded(document).then(() => document.content_loaded = true); + function extract_cookie_policy(cookie, min_time) { let best_result = {time: -1}; let policy = null; const extracted_signatures = []; - for (const match of cookie.matchAll(/hachette-(\w*)=([^;]*)/g)) { + for (const match of cookie.matchAll(/haketilo-(\w*)=([^;]*)/g)) { const new_result = extract_signed(...match.slice(1, 3)); if (new_result.fail) continue; @@ -56,7 +62,7 @@ function extract_url_policy(url, min_time) const [base_url, payload, anchor] = /^([^#]*)#?([^#]*)(#?.*)$/.exec(url).splice(1, 4); - const match = /^hachette_([^_]+)_(.*)$/.exec(payload); + const match = /^haketilo_([^_]+)_(.*)$/.exec(payload); if (!match) return [null, url]; @@ -79,7 +85,7 @@ function employ_nonhttp_policy(policy) policy.nonce = gen_nonce(); const [base_url, target] = /^([^#]*)(#?.*)$/.exec(policy.url).slice(1, 3); const encoded_policy = encodeURIComponent(JSON.stringify(policy)); - const payload = "hachette_" + + const payload = "haketilo_" + sign_data(encoded_policy, new Date().getTime()).join("_"); const resulting_url = `${base_url}#${payload}${target}`; location.href = resulting_url; @@ -87,18 +93,17 @@ function employ_nonhttp_policy(policy) } /* + * In the case of HTML documents: * 1. When injecting some payload we need to sanitize <meta> CSP tags before * they reach the document. * 2. Only <meta> tags inside <head> are considered valid by the browser and * need to be considered. * 3. We want to detach <html> from document, wait until its <head> completes * loading, sanitize it and re-attach <html>. - * 4. Browsers are eager to add <meta>'s that appear after `</head>' but before - * `<body>'. Due to this behavior the `DOMContentLoaded' event is considered - * unreliable (although it could still work properly, it is just problematic - * to verify). - * 5. We shall wait for anything to appear in or after <body> and take that as - * a sign <head> has _really_ finished loading. + * 4. We shall wait for anything to appear in or after <body> and take that as + * a sign <head> has finished loading. + * 5. Otherwise, getting the `DOMContentLoaded' event on the document shall also + * be a sign that <head> is fully loaded. */ function make_body_start_observer(DOM_element, waiting) @@ -124,20 +129,23 @@ function try_body_started(waiting) function finish_waiting(waiting) { + if (waiting.finished) + return; + waiting.finished = true; waiting.observers.forEach(observer => observer.disconnect()); - waiting.doc.removeEventListener("DOMContentLoaded", waiting.loaded_cb); setTimeout(waiting.callback, 0); } function _wait_for_head(doc, detached_html, callback) { const waiting = {doc, detached_html, callback, observers: []}; + if (try_body_started(waiting)) return; waiting.observers = [make_body_start_observer(detached_html, waiting)]; - waiting.loaded_cb = () => finish_waiting(waiting); - doc.addEventListener("DOMContentLoaded", waiting.loaded_cb); + + wait_loaded(doc).then(() => finish_waiting(waiting)); } function wait_for_head(doc, detached_html) @@ -147,105 +155,176 @@ function wait_for_head(doc, detached_html) const blocked_str = "blocked"; -function block_attribute(node, attr) +function block_attribute(node, attr, ns=null) { + const [hasa, geta, seta, rema] = ["has", "get", "set", "remove"] + .map(m => (n, ...args) => typeof ns === "string" ? + n[`${m}AttributeNS`](ns, ...args) : n[`${m}Attribute`](...args)); /* - * Disabling attributes this way allows them to still be relatively - * easily accessed in case they contain some useful data. + * Disabling attributes by prepending `-blocked' allows them to still be + * relatively easily accessed in case they contain some useful data. */ const construct_name = [attr]; - while (node.hasAttribute(construct_name.join(""))) + while (hasa(node, construct_name.join(""))) construct_name.unshift(blocked_str); while (construct_name.length > 1) { construct_name.shift(); const name = construct_name.join(""); - node.setAttribute(`${blocked_str}-${name}`, node.getAttribute(name)); + seta(node, `${blocked_str}-${name}`, geta(node, name)); } - node.removeAttribute(attr); + rema(node, attr); } -function sanitize_meta(meta, policy) +/* + * Used to disable `<script>'s and `<meta>'s that have not yet been added to + * live DOM (doesn't work for those already added). + */ +function sanitize_meta(meta) { - const http_equiv = meta.getAttribute("http-equiv"); - const value = meta.content; - - if (!value || !is_csp_header_name(http_equiv, true)) - return; - - block_attribute(meta, "content"); + if (csp_header_regex.test(meta.httpEquiv) && meta.content) + block_attribute(meta, "content"); } function sanitize_script(script) { - script.hachette_blocked_type = script.type; + script.haketilo_blocked_type = script.getAttribute("type"); script.type = "text/plain"; } /* - * Executed after script has been connected to the DOM, when it is no longer - * eligible for being executed by the browser + * Executed after `<script>' has been connected to the DOM, when it is no longer + * eligible for being executed by the browser. */ -function desanitize_script(script, policy) +function desanitize_script(script) { - script.setAttribute("type", script.hachette_blocked_type); + script.setAttribute("type", script.haketilo_blocked_type); - if (script.hachette_blocked_type === undefined) + if ([null, undefined].includes(script.haketilo_blocked_type)) script.removeAttribute("type"); - delete script.hachette_blocked_type; + delete script.haketilo_blocked_type; +} + +const bad_url_reg = /^data:([^,;]*ml|unknown-content-type)/i; +function sanitize_urls(element) +{ + for (const attr of [...element.attributes || []] + .filter(attr => /^(href|src|data)$/i.test(attr.localName)) + .filter(attr => bad_url_reg.test(attr.value))) + block_attribute(element, attr.localName, attr.namespaceURI); +} + +function start_data_urls_sanitizing(doc) +{ + doc.querySelectorAll("*[href], *[src], *[data]").forEach(sanitize_urls); + if (!doc.content_loaded) { + const mutation_handler = m => m.addedNodes.forEach(sanitize_urls); + const mo = new MutationObserver(ms => ms.forEach(mutation_handler)); + mo.observe(doc, {childList: true, subtree: true}); + wait_loaded(doc).then(() => mo.disconnect()); + } +} + +/* + * Normally, we block scripts with CSP. However, Mozilla does optimizations that + * cause part of the DOM to be loaded when our content scripts get to run. Thus, + * before the CSP rules we inject (for non-HTTP pages) become effective, we need + * to somehow block the execution of `<script>'s and intrinsics that were + * already there. Additionally, some browsers (IceCat 60) seem to have problems + * applying this CSP to non-inline `<scripts>' in certain scenarios. + */ +function prevent_script_execution(event) +{ + if (!event.target.haketilo_payload) + event.preventDefault(); } -function apply_hachette_csp_rules(doc, policy) +function mozilla_initial_block(doc) { - const meta = doc.createElement("meta"); - meta.setAttribute("http-equiv", "Content-Security-Policy"); - meta.setAttribute("content", make_csp_rule(policy)); - doc.head.append(meta); - /* CSP is already in effect, we can remove the <meta> now. */ - meta.remove(); + doc.addEventListener("beforescriptexecute", prevent_script_execution); + + for (const elem of doc.querySelectorAll("*")) { + [...elem.attributes].map(attr => attr.localName) + .filter(attr => /^on/.test(attr) && elem.wrappedJSObject[attr]) + .forEach(attr => elem.wrappedJSObject[attr] = null); + } } +/* + * Here we block all scripts of a document which might be either and + * HTMLDocument or an XMLDocument. Modifying an XML document might disrupt + * Mozilla's XML preview. This is an unfortunate thing we have to accept for + * now. XML documents *have to* be sanitized as well because they might + * contain `<script>' tags (or on* attributes) with namespace declared as + * "http://www.w3.org/1999/xhtml" or "http://www.w3.org/2000/svg" which allows + * javascript execution. + */ async function sanitize_document(doc, policy) { /* + * Blocking of scripts that are in the DOM from the beginning. Needed for + * Mozilla. + */ + if (is_mozilla) + mozilla_initial_block(doc); + + /* * Ensure our CSP rules are employed from the beginning. This CSP injection * method is, when possible, going to be applied together with CSP rules * injected using webRequest. + * Using elements namespaced as HTML makes this CSP injection also work for + * non-HTML documents. */ - const has_own_head = doc.head; - if (!has_own_head) - doc.documentElement.prepend(doc.createElement("head")); - - apply_hachette_csp_rules(doc, policy); - - /* Probably not needed, but...: proceed with DOM in its initial state. */ - if (!has_own_head) - doc.head.remove(); + const html = new DOMParser().parseFromString(`<html><head><meta \ +http-equiv="Content-Security-Policy" content="${make_csp_rule(policy)}"\ +/></head><body>Loading...</body></html>`, "text/html").documentElement; /* - * <html> node gets hijacked now, to be re-attached after <head> is loaded + * Root node gets hijacked now, to be re-attached after <head> is loaded * and sanitized. */ - const old_html = doc.documentElement; - const new_html = doc.createElement("html"); - old_html.replaceWith(new_html); + const root = doc.documentElement; + root.replaceWith(html); - await wait_for_head(doc, old_html); + /* + * When we don't inject payload, we neither block document's CSP `<meta>' + * tags nor wait for `<head>' to be parsed. + */ + if (policy.has_payload) { + await wait_for_head(doc, root); - for (const meta of old_html.querySelectorAll("head meta")) - sanitize_meta(meta, policy); + root.querySelectorAll("head meta") + .forEach(m => sanitize_meta(m, policy)); + } - if (!policy.allow) - for (const script of old_html.querySelectorAll("script")) - sanitize_script(script, policy); + root.querySelectorAll("script").forEach(s => sanitize_script(s, policy)); + html.replaceWith(root); + root.querySelectorAll("script").forEach(s => desanitize_script(s, policy)); - new_html.replaceWith(old_html); + start_data_urls_sanitizing(doc); +} - if (!policy.allow) - for (const script of old_html.querySelectorAll("script")) - desanitize_script(script, policy); +async function disable_service_workers() +{ + if (!navigator.serviceWorker) + return; + + const registrations = await navigator.serviceWorker.getRegistrations(); + if (registrations.length === 0) + return; + + console.warn("Service Workers detected on this page! Unregistering and reloading"); + + try { + await Promise.all(registrations.map(r => r.unregister())); + } finally { + location.reload(); + } + + /* Never actually return! */ + return new Promise(() => 0); } if (!is_privileged_url(document.URL)) { @@ -259,7 +338,7 @@ if (!is_privileged_url(document.URL)) { let signatures; [policy, signatures] = extract_cookie_policy(document.cookie, min_time); for (const signature of signatures) - document.cookie = `hachette-${signature}=; Max-Age=-1;`; + document.cookie = `haketilo-${signature}=; Max-Age=-1;`; } else { const scheme = /^([^:]*)/.exec(document.URL)[1]; const known_scheme = ["file", "ftp"].includes(scheme); @@ -276,14 +355,19 @@ if (!is_privileged_url(document.URL)) { } if (!policy) { - console.warn("Using fallback policy!"); + console.debug("Using fallback policy!"); policy = {allow: false, nonce: gen_nonce()}; } + if (!(document instanceof HTMLDocument)) + policy.has_payload = false; + + console.debug("current policy", policy); + const doc_ready = Promise.all([ - (policy.allow && !policy.has_payload) ? Promise.resolve : sanitize_document(document, policy), - new Promise(cb => document.addEventListener("DOMContentLoaded", - cb, {once: true})) + policy.allow ? Promise.resolve() : sanitize_document(document, policy), + policy.allow ? Promise.resolve() : disable_service_workers(), + wait_loaded(document) ]); handle_page_actions(policy.nonce, policy_received_callback, doc_ready); |