diff options
Diffstat (limited to 'content/policy_enforcing.js')
-rw-r--r-- | content/policy_enforcing.js | 326 |
1 files changed, 326 insertions, 0 deletions
diff --git a/content/policy_enforcing.js b/content/policy_enforcing.js new file mode 100644 index 0000000..25c8b6b --- /dev/null +++ b/content/policy_enforcing.js @@ -0,0 +1,326 @@ +/** + * This file is part of Haketilo. + * + * Function: Enforcing script blocking rules on a given page, working from a + * content script. + * + * Copyright (C) 2021,2022 Wojtek Kosior + * Copyright (C) 2021 jahoti + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * As additional permission under GNU GPL version 3 section 7, you + * may distribute forms of that code without the copy of the GNU + * GPL normally required by section 4, provided you include this + * license notice and, in case of non-source distribution, a URL + * through which recipients can access the Corresponding Source. + * If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not + * obligated to do so. If you do not wish to do so, delete this + * exception statement from your version. + * + * As a special exception to the GPL, any HTML file which merely + * makes function calls to this code, and for that purpose + * includes it by reference shall be deemed a separate work for + * copyright law purposes. If you modify this code, you may extend + * this exception to your version of the code, but you are not + * obligated to do so. If you do not wish to do so, delete this + * exception statement from your version. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + * + * I, Wojtek Kosior, thereby promise not to sue for violation of this file's + * license. Although I request that you do not make use of this code in a + * proprietary program, I am not going to enforce this in court. + */ + +#FROM common/misc.js IMPORT gen_nonce + +document.content_loaded = document.readyState === "complete"; +const wait_loaded = e => e.content_loaded ? Promise.resolve() : + new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true})); + +wait_loaded(document).then(() => document.content_loaded = true); + +/* + * In the case of HTML documents: + * 1. When injecting some payload we need to sanitize <meta> CSP tags before + * they reach the document. + * 2. Only <meta> tags inside <head> are considered valid by the browser and + * need to be considered. + * 3. We want to detach <html> from document, wait until its <head> completes + * loading, sanitize it and re-attach <html>. + * 4. We shall wait for anything to appear in or after <body> and take that as + * a sign <head> has finished loading. + * 5. Otherwise, getting the `DOMContentLoaded' event on the document shall also + * be a sign that <head> is fully loaded. + */ + +function make_body_start_observer(DOM_element, waiting) { + const observer = new MutationObserver(() => try_body_started(waiting)); + observer.observe(DOM_element, {childList: true}); + return observer; +} + +function try_body_started(waiting) { + const body = waiting.detached_html.querySelector("body"); + + if ((body && (body.firstChild || body.nextSibling)) || + waiting.doc.documentElement.nextSibling) { + finish_waiting(waiting); + return true; + } + + if (body && waiting.observers.length < 2) + waiting.observers.push(make_body_start_observer(body, waiting)); +} + +function finish_waiting(waiting) { + if (waiting.finished) + return; + waiting.finished = true; + waiting.observers.forEach(observer => observer.disconnect()); + setTimeout(waiting.callback, 0); +} + +function _wait_for_head(doc, detached_html, callback) { + const waiting = {doc, detached_html, callback, observers: []}; + + if (try_body_started(waiting)) + return; + + waiting.observers = [make_body_start_observer(detached_html, waiting)]; + + wait_loaded(doc).then(() => finish_waiting(waiting)); +} + +function wait_for_head(doc, detached_html) { + return new Promise(cb => _wait_for_head(doc, detached_html, cb)); +} + +const blocked_str = "blocked"; + +function block_attribute(node, attr, ns=null, replace_with="") { + const [hasa, geta, seta, rema] = ["has", "get", "set", "remove"] + .map(m => (n, ...args) => typeof ns === "string" ? + n[`${m}AttributeNS`](ns, ...args) : n[`${m}Attribute`](...args)); + /* + * Disabling attributes by prepending `blocked-' allows them to still be + * relatively easily accessed in case they contain some useful data. + */ + const construct_name = [attr]; + while (hasa(node, construct_name.join(""))) + construct_name.unshift(blocked_str); + + while (construct_name.length > 1) { + construct_name.shift(); + const name = construct_name.join(""); + seta(node, `${blocked_str}-${name}`, geta(node, name)); + } + + rema(node, attr); + seta(node, attr, replace_with); +} + +/* + * Used to disable `<script>'s and `<meta>'s that have not yet been added to + * live DOM (doesn't work for those already added). + */ +function sanitize_meta(meta) { + if (csp_header_regex.test(meta.httpEquiv) && meta.content) + block_attribute(meta, "content"); +} + +function sanitize_script(script) { + script.haketilo_blocked_type = script.getAttribute("type"); + script.type = "text/plain"; +} + +/* + * Executed after `<script>' has been connected to the DOM, when it is no longer + * eligible for being executed by the browser. + */ +function desanitize_script(script) { + script.setAttribute("type", script.haketilo_blocked_type); + + if ([null, undefined].includes(script.haketilo_blocked_type)) + script.removeAttribute("type"); + + delete script.haketilo_blocked_type; +} + +const bad_url_reg = /^data:([^,;]*ml|unknown-content-type)|^javascript:/i; +function sanitize_element_urls(element) { + if (element.haketilo_sanitized_urls) + return; + + element.haketilo_sanitized_urls = true; + + for (const attr of [...element.attributes || []] + .filter(attr => /^(href|src|data)$/i.test(attr.localName)) + .filter(attr => bad_url_reg.test(attr.value))) { + const replacement_value = /^href$/i.test(attr.localName) ? + "javascript:void('blocked');" : "data:text/plain,blocked"; + block_attribute(element, attr.localName, attr.namespaceURI, + replacement_value); + } +} + +function sanitize_tree_urls(root) { + root.querySelectorAll("*[href], *[src], *[data]") + .forEach(sanitize_element_urls); +} + +function start_urls_sanitizing(doc) { + sanitize_tree_urls(doc); + if (!doc.content_loaded) { + const mutation_handler = + m => m.addedNodes.forEach(sanitize_element_urls); + const mo = new MutationObserver(ms => ms.forEach(mutation_handler)); + mo.observe(doc, {childList: true, subtree: true}); + wait_loaded(doc).then(() => mo.disconnect()); + } +} + +#IF MOZILLA +/* + * Normally, we block scripts with CSP. However, Mozilla does optimizations that + * cause part of the DOM to be loaded when our content scripts get to run. Thus, + * before the CSP rules we inject (for non-HTTP pages) become effective, we need + * to somehow block the execution of `<script>'s and intrinsics that were + * already there. Additionally, some browsers (IceCat 60) seem to have problems + * applying this CSP to non-inline `<scripts>' in certain scenarios. + */ +function prevent_script_execution(event) { + if (!event.target.haketilo_payload) + event.preventDefault(); +} +#ENDIF + +/* + * Here we block all scripts of a document which might be either and + * HTMLDocument or an XMLDocument. Modifying an XML document might disrupt + * Mozilla's XML preview. This is an unfortunate thing we have to accept for + * now. XML documents *have to* be sanitized as well because they might + * contain `<script>' tags (or on* attributes) with namespace declared as + * "http://www.w3.org/1999/xhtml" or "http://www.w3.org/2000/svg" which allows + * javascript execution. + */ +async function sanitize_document(doc, policy) { +#IF MOZILLA + /* + * Blocking of scripts that are in the DOM from the beginning. Needed for + * Mozilla. + */ + const listener_args = ["beforescriptexecute", prevent_script_execution]; + doc.addEventListener(...listener_args); + wait_loaded(doc).then(() => doc.removeEventListener(...listener_args)); + + for (const elem of doc.querySelectorAll("*")) { + [...elem.attributes].map(attr => attr.localName) + .filter(attr => /^on/.test(attr) && elem.wrappedJSObject[attr]) + .forEach(attr => elem.wrappedJSObject[attr] = null); + } + + sanitize_tree_urls(doc.documentElement); +#ENDIF + + /* + * Ensure our CSP rules are employed from the beginning. This CSP injection + * method is, when possible, going to be applied together with CSP rules + * injected using webRequest. + * Using elements namespaced as HTML makes this CSP injection also work for + * non-HTML documents. + */ + const source = `\ +<!DOCTYPE html> +<html> + <head> + <meta http-equiv="Content-Security-Policy" content="${policy.csp}"/> + </head> + <body> + Loading... + </body> +</html>`; + const html = + new DOMParser().parseFromString(source, "text/html").documentElement; + + /* + * Root node gets hijacked now, to be re-attached after <head> is loaded + * and sanitized. + */ + const root = doc.documentElement; + root.replaceWith(html); + + /* + * When we don't inject payload, we neither block document's CSP `<meta>' + * tags nor wait for `<head>' to be parsed. + */ + if (policy.payload) { + await wait_for_head(doc, root); + + root.querySelectorAll("head meta") + .forEach(m => sanitize_meta(m, policy)); + } + + root.querySelectorAll("script").forEach(s => sanitize_script(s, policy)); + sanitize_tree_urls(root); + html.replaceWith(root); + root.querySelectorAll("script").forEach(s => desanitize_script(s, policy)); + + start_urls_sanitizing(doc); +} + +async function _disable_service_workers() { + if (!navigator.serviceWorker) + return; + + const registrations = await navigator.serviceWorker.getRegistrations(); + if (registrations.length === 0) + return; + + console.warn("Service Workers detected on this page! Unregistering and reloading."); + + try { + await Promise.all(registrations.map(r => r.unregister())); + } finally { + location.reload(); + } + + /* Never actually return! */ + return new Promise(() => 0); +} + +/* + * Trying to use servce workers APIs might result in exceptions, for example + * when in a non-HTML document. Because of this, we wrap the function that does + * the actual work in a try {} block. + */ +async function disable_service_workers() { + try { + await _disable_service_workers() + } catch (e) { + console.debug("Exception thrown during an attempt to detect and disable service workers.", e); + } +} + +function enforce_blocking(policy) { + if (policy.allow) + return; + + return Promise.all([ + sanitize_document(document, policy), + disable_service_workers(), + wait_loaded(document) + ]); +} +#EXPORT enforce_blocking |