diff options
Diffstat (limited to 'content')
-rw-r--r-- | content/activity_info_server.js | 17 | ||||
-rw-r--r-- | content/freezer.js | 63 | ||||
-rw-r--r-- | content/main.js | 339 | ||||
-rw-r--r-- | content/page_actions.js | 50 | ||||
-rw-r--r-- | content/repo_query.js | 5 |
5 files changed, 313 insertions, 161 deletions
diff --git a/content/activity_info_server.js b/content/activity_info_server.js index 81a25fb..aa92b75 100644 --- a/content/activity_info_server.js +++ b/content/activity_info_server.js @@ -1,7 +1,8 @@ /** - * part of Hachette - * Informing about activities performed by content script (script injection, - * script blocking). + * This file is part of Haketilo. + * + * Function: Informing the popup about what happens in the content script + * (script injection, script blocking, etc.). * * Copyright (C) 2021 Wojtek Kosior * Redistribution terms are gathered in the `copyright' file. @@ -41,7 +42,14 @@ function report_script(script_data) function report_settings(settings) { - report_activity("settings", settings); + const settings_clone = {}; + Object.assign(settings_clone, settings) + report_activity("settings", settings_clone); +} + +function report_document_type(is_html) +{ + report_activity("is_html", is_html); } function report_repo_query_action(update, port) @@ -91,5 +99,6 @@ function start_activity_info_server() * EXPORT start_activity_info_server * EXPORT report_script * EXPORT report_settings + * EXPORT report_document_type * EXPORTS_END */ diff --git a/content/freezer.js b/content/freezer.js deleted file mode 100644 index 9dbc95e..0000000 --- a/content/freezer.js +++ /dev/null @@ -1,63 +0,0 @@ -/** - * Helper functions for blocking scripts in pages, based off NoScript's lib/DocumentFreezer.js - * - * Copyright (C) 2005-2021 Giorgio Maone - https://maone.net - * Copyright (C) 2021 jahoti - * Redistribution terms are gathered in the `copyright' file. - */ - -const loaderAttributes = ["href", "src", "data"]; -const jsOrDataUrlRx = /^(?:data:(?:[^,;]*ml|unknown-content-type)|javascript:)/i; - -function sanitize_attributes(element) { - if (element._frozen) - return; - let fa = []; - let loaders = []; - let attributes = element.attributes || []; - - for (let a of attributes) { - let name = a.localName.toLowerCase(); - if (loaderAttributes.includes(name)) - if (jsOrDataUrlRx.test(a.value)) - loaders.push(a); - - else if (name.startsWith("on")) { - console.debug("Removing", a, element.outerHTML); - fa.push(a.cloneNode()); - a.value = ""; - element[name] = null; - } - } - if (loaders.length) { - for (let a of loaders) { - fa.push(a.cloneNode()); - a.value = "javascript://frozen"; - } - if ("contentWindow" in element) - element.replaceWith(element = element.cloneNode(true)); - - } - if (fa.length) - element._frozenAttributes = fa; - element._frozen = true; -} - -function mozilla_suppress_scripts(e) { - if (document.readyState === 'complete') { - removeEventListener('beforescriptexecute', blockExecute, true); - console.log('Script suppressor has detached.'); - return; - } - if (e.isTrusted && !e.target._hachette_payload) { - e.preventDefault(); - console.log('Suppressed script', e.target); - } -}; - -/* - * EXPORTS_START - * EXPORT mozilla_suppress_scripts - * EXPORT sanitize_attributes - * EXPORTS_END - */ diff --git a/content/main.js b/content/main.js index 9ed557c..ce1ff7a 100644 --- a/content/main.js +++ b/content/main.js @@ -1,5 +1,7 @@ /** - * Hachette main content script run in all frames + * This file is part of Haketilo. + * + * Function: Main content script that runs in all frames. * * Copyright (C) 2021 Wojtek Kosior * Copyright (C) 2021 jahoti @@ -9,123 +11,310 @@ /* * IMPORTS_START * IMPORT handle_page_actions - * IMPORT url_extract_target - * IMPORT gen_unique * IMPORT gen_nonce - * IMPORT csp_rule * IMPORT is_privileged_url - * IMPORT sanitize_attributes - * IMPORT mozilla_suppress_scripts + * IMPORT browser * IMPORT is_chrome * IMPORT is_mozilla * IMPORT start_activity_info_server + * IMPORT make_csp_rule + * IMPORT csp_header_regex + * IMPORT report_settings * IMPORTS_END */ +document.content_loaded = document.readyState === "complete"; +const wait_loaded = e => e.content_loaded ? Promise.resolve() : + new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true})); + +wait_loaded(document).then(() => document.content_loaded = true); + /* - * Due to some technical limitations the chosen method of whitelisting sites - * is to smuggle whitelist indicator in page's url as a "magical" string - * after '#'. Right now this is only supplemental in HTTP(s) pages where - * blocking of native scripts also happens through CSP header injection but is - * necessary for protocols like ftp:// and file://. - * - * The code that actually injects the magical string into ftp:// and file:// - * urls has not yet been added to the extension. + * In the case of HTML documents: + * 1. When injecting some payload we need to sanitize <meta> CSP tags before + * they reach the document. + * 2. Only <meta> tags inside <head> are considered valid by the browser and + * need to be considered. + * 3. We want to detach <html> from document, wait until its <head> completes + * loading, sanitize it and re-attach <html>. + * 4. We shall wait for anything to appear in or after <body> and take that as + * a sign <head> has finished loading. + * 5. Otherwise, getting the `DOMContentLoaded' event on the document shall also + * be a sign that <head> is fully loaded. */ -var nonce = undefined; +function make_body_start_observer(DOM_element, waiting) +{ + const observer = new MutationObserver(() => try_body_started(waiting)); + observer.observe(DOM_element, {childList: true}); + return observer; +} -function handle_mutation(mutations, observer) +function try_body_started(waiting) { - if (document.readyState === 'complete') { - console.log("mutation handling complete"); - observer.disconnect(); - return; - } - for (const mutation of mutations) { - for (const node of mutation.addedNodes) - block_node(node); + const body = waiting.detached_html.querySelector("body"); + + if ((body && (body.firstChild || body.nextSibling)) || + waiting.doc.documentElement.nextSibling) { + finish_waiting(waiting); + return true; } + + if (body && waiting.observers.length < 2) + waiting.observers.push(make_body_start_observer(body, waiting)); } -function block_nodes_recursively(node) +function finish_waiting(waiting) { - block_node(node); - for (const child of node.children) - block_nodes_recursively(child); + if (waiting.finished) + return; + waiting.finished = true; + waiting.observers.forEach(observer => observer.disconnect()); + setTimeout(waiting.callback, 0); } -function block_node(node) +function _wait_for_head(doc, detached_html, callback) { + const waiting = {doc, detached_html, callback, observers: []}; + + if (try_body_started(waiting)) + return; + + waiting.observers = [make_body_start_observer(detached_html, waiting)]; + + wait_loaded(doc).then(() => finish_waiting(waiting)); +} + +function wait_for_head(doc, detached_html) +{ + return new Promise(cb => _wait_for_head(doc, detached_html, cb)); +} + +const blocked_str = "blocked"; + +function block_attribute(node, attr, ns=null) +{ + const [hasa, geta, seta, rema] = ["has", "get", "set", "remove"] + .map(m => (n, ...args) => typeof ns === "string" ? + n[`${m}AttributeNS`](ns, ...args) : n[`${m}Attribute`](...args)); /* - * Modifying <script> element doesn't always prevent its execution in some - * Mozilla browsers. This is Chromium-specific code. + * Disabling attributes by prepending `-blocked' allows them to still be + * relatively easily accessed in case they contain some useful data. */ - if (node.tagName === "SCRIPT") { - block_script(node); - return; + const construct_name = [attr]; + while (hasa(node, construct_name.join(""))) + construct_name.unshift(blocked_str); + + while (construct_name.length > 1) { + construct_name.shift(); + const name = construct_name.join(""); + seta(node, `${blocked_str}-${name}`, geta(node, name)); + } + + rema(node, attr); +} + +/* + * Used to disable `<script>'s and `<meta>'s that have not yet been added to + * live DOM (doesn't work for those already added). + */ +function sanitize_meta(meta) +{ + if (csp_header_regex.test(meta.httpEquiv) && meta.content) + block_attribute(meta, "content"); +} + +function sanitize_script(script) +{ + script.haketilo_blocked_type = script.getAttribute("type"); + script.type = "text/plain"; +} + +/* + * Executed after `<script>' has been connected to the DOM, when it is no longer + * eligible for being executed by the browser. + */ +function desanitize_script(script) +{ + script.setAttribute("type", script.haketilo_blocked_type); + + if ([null, undefined].includes(script.haketilo_blocked_type)) + script.removeAttribute("type"); + + delete script.haketilo_blocked_type; +} + +const bad_url_reg = /^data:([^,;]*ml|unknown-content-type)/i; +function sanitize_urls(element) +{ + for (const attr of [...element.attributes || []] + .filter(attr => /^(href|src|data)$/i.test(attr.localName)) + .filter(attr => bad_url_reg.test(attr.value))) + block_attribute(element, attr.localName, attr.namespaceURI); +} + +function start_data_urls_sanitizing(doc) +{ + doc.querySelectorAll("*[href], *[src], *[data]").forEach(sanitize_urls); + if (!doc.content_loaded) { + const mutation_handler = m => m.addedNodes.forEach(sanitize_urls); + const mo = new MutationObserver(ms => ms.forEach(mutation_handler)); + mo.observe(doc, {childList: true, subtree: true}); + wait_loaded(doc).then(() => mo.disconnect()); } +} + +/* + * Normally, we block scripts with CSP. However, Mozilla does optimizations that + * cause part of the DOM to be loaded when our content scripts get to run. Thus, + * before the CSP rules we inject (for non-HTTP pages) become effective, we need + * to somehow block the execution of `<script>'s and intrinsics that were + * already there. Additionally, some browsers (IceCat 60) seem to have problems + * applying this CSP to non-inline `<scripts>' in certain scenarios. + */ +function prevent_script_execution(event) +{ + if (!event.target.haketilo_payload) + event.preventDefault(); +} - sanitize_attributes(node); +function mozilla_initial_block(doc) +{ + doc.addEventListener("beforescriptexecute", prevent_script_execution); - if (node.tagName === "HEAD") - inject_csp(node); + for (const elem of doc.querySelectorAll("*")) { + [...elem.attributes].map(attr => attr.localName) + .filter(attr => /^on/.test(attr) && elem.wrappedJSObject[attr]) + .forEach(attr => elem.wrappedJSObject[attr] = null); + } } -function block_script(node) +/* + * Here we block all scripts of a document which might be either and + * HTMLDocument or an XMLDocument. Modifying an XML document might disrupt + * Mozilla's XML preview. This is an unfortunate thing we have to accept for + * now. XML documents *have to* be sanitized as well because they might + * contain `<script>' tags (or on* attributes) with namespace declared as + * "http://www.w3.org/1999/xhtml" or "http://www.w3.org/2000/svg" which allows + * javascript execution. + */ +async function sanitize_document(doc, policy) { /* - * Disabling scripts this way allows them to still be relatively - * easily accessed in case they contain some useful data. + * Blocking of scripts that are in the DOM from the beginning. Needed for + * Mozilla. */ - if (node.hasAttribute("type")) - node.setAttribute("blocked-type", node.getAttribute("type")); - node.setAttribute("type", "application/json"); + if (is_mozilla) + mozilla_initial_block(doc); + + /* + * Ensure our CSP rules are employed from the beginning. This CSP injection + * method is, when possible, going to be applied together with CSP rules + * injected using webRequest. + * Using elements namespaced as HTML makes this CSP injection also work for + * non-HTML documents. + */ + const html = new DOMParser().parseFromString(`<html><head><meta \ +http-equiv="Content-Security-Policy" content="${make_csp_rule(policy)}"\ +/></head><body>Loading...</body></html>`, "text/html").documentElement; + + /* + * Root node gets hijacked now, to be re-attached after <head> is loaded + * and sanitized. + */ + const root = doc.documentElement; + root.replaceWith(html); + + /* + * When we don't inject payload, we neither block document's CSP `<meta>' + * tags nor wait for `<head>' to be parsed. + */ + if (policy.has_payload) { + await wait_for_head(doc, root); + + root.querySelectorAll("head meta") + .forEach(m => sanitize_meta(m, policy)); + } + + root.querySelectorAll("script").forEach(s => sanitize_script(s, policy)); + html.replaceWith(root); + root.querySelectorAll("script").forEach(s => desanitize_script(s, policy)); + + start_data_urls_sanitizing(doc); } -function inject_csp(head) +async function _disable_service_workers() { - console.log('injecting CSP'); + if (!navigator.serviceWorker) + return; - let meta = document.createElement("meta"); - meta.setAttribute("http-equiv", "Content-Security-Policy"); - meta.setAttribute("content", csp_rule(nonce)); + const registrations = await navigator.serviceWorker.getRegistrations(); + if (registrations.length === 0) + return; + + console.warn("Service Workers detected on this page! Unregistering and reloading."); - if (head.firstElementChild === null) - head.appendChild(meta); - else - head.insertBefore(meta, head.firstElementChild); + try { + await Promise.all(registrations.map(r => r.unregister())); + } finally { + location.reload(); + } + + /* Never actually return! */ + return new Promise(() => 0); } -if (!is_privileged_url(document.URL)) { - const targets = url_extract_target(document.URL); - if (targets.policy) { - if (targets.target2) - window.location.href = targets.base_url + targets.target2; - else - history.replaceState(null, "", targets.base_url); +/* + * Trying to use servce workers APIs might result in exceptions, for example + * when in a non-HTML document. Because of this, we wrap the function that does + * the actual work in a try {} block. + */ +async function disable_service_workers() +{ + try { + await _disable_service_workers() + } catch (e) { + console.debug("Exception thrown during an attempt to detect and disable service workers.", e); } +} - const policy = targets.current ? targets.policy : {}; +function synchronously_get_policy(url) +{ + const encoded_url = encodeURIComponent(url); + const request_url = `${browser.runtime.getURL("dummy")}?url=${encoded_url}`; - nonce = policy.nonce || gen_nonce(); - handle_page_actions(nonce); + try { + var xhttp = new XMLHttpRequest(); + xhttp.open("GET", request_url, false); + xhttp.send(); + } catch(e) { + console.error("Failure to synchronously fetch policy for url.", e); + return {allow: false}; + } - if (!policy.allow) { - block_nodes_recursively(document.documentElement); + const policy = /^[^?]*\?settings=(.*)$/.exec(xhttp.responseURL)[1]; + return JSON.parse(decodeURIComponent(policy)); +} - if (is_chrome) { - var observer = new MutationObserver(handle_mutation); - observer.observe(document.documentElement, { - attributes: true, - childList: true, - subtree: true - }); - } +if (!is_privileged_url(document.URL)) { + const policy = synchronously_get_policy(document.URL); - if (is_mozilla) - addEventListener('beforescriptexecute', mozilla_suppress_scripts, true); - } + if (!(document instanceof HTMLDocument)) + delete policy.payload; + + console.debug("current policy", policy); + + report_settings(policy); + + policy.nonce = gen_nonce(); + + const doc_ready = Promise.all([ + policy.allow ? Promise.resolve() : sanitize_document(document, policy), + policy.allow ? Promise.resolve() : disable_service_workers(), + wait_loaded(document) + ]); + + handle_page_actions(policy, doc_ready); start_activity_info_server(); } diff --git a/content/page_actions.js b/content/page_actions.js index aff56b8..845e452 100644 --- a/content/page_actions.js +++ b/content/page_actions.js @@ -1,5 +1,7 @@ /** - * Hachette handling of page actions in content scripts + * This file is part of Haketilo. + * + * Function: Handle page actions in a content script. * * Copyright (C) 2021 Wojtek Kosior * Redistribution terms are gathered in the `copyright' file. @@ -10,14 +12,17 @@ * IMPORT CONNECTION_TYPE * IMPORT browser * IMPORT report_script - * IMPORT report_settings + * IMPORT report_document_type * IMPORTS_END */ -var port; -var loaded = false; -var scripts_awaiting = []; -var nonce; +let policy; +/* Snapshot url and content type early; these can be changed by other code. */ +let url; +let is_html; +let port; +let loaded = false; +let scripts_awaiting = []; function handle_message(message) { @@ -31,11 +36,12 @@ function handle_message(message) scripts_awaiting.push(script_text); } } - if (action === "settings") - report_settings(data); + else { + console.error(`Bad page action '${action}'.`); + } } -function document_loaded(event) +function document_ready(event) { loaded = true; @@ -47,22 +53,32 @@ function document_loaded(event) function add_script(script_text) { + if (!is_html) + return; + let script = document.createElement("script"); script.textContent = script_text; - script.setAttribute("nonce", nonce); - script._hachette_payload = true; + script.setAttribute("nonce", policy.nonce); + script.haketilo_payload = true; document.body.appendChild(script); report_script(script_text); } -function handle_page_actions(script_nonce) { - document.addEventListener("DOMContentLoaded", document_loaded); - port = browser.runtime.connect({name : CONNECTION_TYPE.PAGE_ACTIONS}); - port.onMessage.addListener(handle_message); - port.postMessage({url: document.URL}); +function handle_page_actions(_policy, doc_ready_promise) { + policy = _policy; - nonce = script_nonce; + url = document.URL; + is_html = document instanceof HTMLDocument; + report_document_type(is_html); + + doc_ready_promise.then(document_ready); + + if (policy.payload) { + port = browser.runtime.connect({name : CONNECTION_TYPE.PAGE_ACTIONS}); + port.onMessage.addListener(handle_message); + port.postMessage({payload: policy.payload}); + } } /* diff --git a/content/repo_query.js b/content/repo_query.js index 3708108..637282c 100644 --- a/content/repo_query.js +++ b/content/repo_query.js @@ -1,6 +1,7 @@ /** - * part of Hachette - * Getting available content for site from remote repositories. + * This file is part of Haketilo. + * + * Function: Getting available content for site from remote repositories. * * Copyright (C) 2021 Wojtek Kosior * Redistribution terms are gathered in the `copyright' file. |