1 files changed, 326 insertions, 0 deletions
diff --git a/content/policy_enforcing.js b/content/policy_enforcing.js
new file mode 100644
index 0000000..25c8b6b
--- /dev/null
+++ b/content/policy_enforcing.js
@@ -0,0 +1,326 @@
+/**
+ * This file is part of Haketilo.
+ *
+ * Function: Enforcing script blocking rules on a given page, working from a
+ *           content script.
+ *
+ * Copyright (C) 2021,2022 Wojtek Kosior
+ * Copyright (C) 2021 jahoti
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * As additional permission under GNU GPL version 3 section 7, you
+ * may distribute forms of that code without the copy of the GNU
+ * GPL normally required by section 4, provided you include this
+ * license notice and, in case of non-source distribution, a URL
+ * through which recipients can access the Corresponding Source.
+ * If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not
+ * obligated to do so. If you do not wish to do so, delete this
+ * exception statement from your version.
+ *
+ * As a special exception to the GPL, any HTML file which merely
+ * makes function calls to this code, and for that purpose
+ * includes it by reference shall be deemed a separate work for
+ * copyright law purposes. If you modify this code, you may extend
+ * this exception to your version of the code, but you are not
+ * obligated to do so. If you do not wish to do so, delete this
+ * exception statement from your version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * I, Wojtek Kosior, thereby promise not to sue for violation of this file's
+ * license. Although I request that you do not make use of this code in a
+ * proprietary program, I am not going to enforce this in court.
+ */
+
+#FROM common/misc.js IMPORT gen_nonce
+
+document.content_loaded = document.readyState === "complete";
+const wait_loaded = e => e.content_loaded ? Promise.resolve() :
+      new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true}));
+
+wait_loaded(document).then(() => document.content_loaded = true);
+
+/*
+ * In the case of HTML documents:
+ * 1. When injecting some payload we need to sanitize <meta> CSP tags before
+ *    they reach the document.
+ * 2. Only <meta> tags inside <head> are considered valid by the browser and
+ *    need to be considered.
+ * 3. We want to detach <html> from document, wait until its <head> completes
+ *    loading, sanitize it and re-attach <html>.
+ * 4. We shall wait for anything to appear in or after <body> and take that as
+ *    a sign <head> has finished loading.
+ * 5. Otherwise, getting the `DOMContentLoaded' event on the document shall also
+ *    be a sign that <head> is fully loaded.
+ */
+
+function make_body_start_observer(DOM_element, waiting) {
+    const observer = new MutationObserver(() => try_body_started(waiting));
+    observer.observe(DOM_element, {childList: true});
+    return observer;
+}
+
+function try_body_started(waiting) {
+    const body = waiting.detached_html.querySelector("body");
+
+    if ((body && (body.firstChild || body.nextSibling)) ||
+	waiting.doc.documentElement.nextSibling) {
+	finish_waiting(waiting);
+	return true;
+    }
+
+    if (body && waiting.observers.length < 2)
+	waiting.observers.push(make_body_start_observer(body, waiting));
+}
+
+function finish_waiting(waiting) {
+    if (waiting.finished)
+	return;
+    waiting.finished = true;
+    waiting.observers.forEach(observer => observer.disconnect());
+    setTimeout(waiting.callback, 0);
+}
+
+function _wait_for_head(doc, detached_html, callback) {
+    const waiting = {doc, detached_html, callback, observers: []};
+
+    if (try_body_started(waiting))
+	return;
+
+    waiting.observers = [make_body_start_observer(detached_html, waiting)];
+
+    wait_loaded(doc).then(() => finish_waiting(waiting));
+}
+
+function wait_for_head(doc, detached_html) {
+    return new Promise(cb => _wait_for_head(doc, detached_html, cb));
+}
+
+const blocked_str = "blocked";
+
+function block_attribute(node, attr, ns=null, replace_with="") {
+    const [hasa, geta, seta, rema] = ["has", "get", "set", "remove"]
+	  .map(m => (n, ...args) => typeof ns === "string" ?
+	       n[`${m}AttributeNS`](ns, ...args) : n[`${m}Attribute`](...args));
+    /*
+     * Disabling attributes by prepending `blocked-' allows them to still be
+     * relatively easily accessed in case they contain some useful data.
+     */
+    const construct_name = [attr];
+    while (hasa(node, construct_name.join("")))
+	construct_name.unshift(blocked_str);
+
+    while (construct_name.length > 1) {
+	construct_name.shift();
+	const name = construct_name.join("");
+	seta(node, `${blocked_str}-${name}`, geta(node, name));
+    }
+
+    rema(node, attr);
+    seta(node, attr, replace_with);
+}
+
+/*
+ * Used to disable `<script>'s and `<meta>'s that have not yet been added to
+ * live DOM (doesn't work for those already added).
+ */
+function sanitize_meta(meta) {
+    if (csp_header_regex.test(meta.httpEquiv) && meta.content)
+	block_attribute(meta, "content");
+}
+
+function sanitize_script(script) {
+    script.haketilo_blocked_type = script.getAttribute("type");
+    script.type = "text/plain";
+}
+
+/*
+ * Executed after `<script>' has been connected to the DOM, when it is no longer
+ * eligible for being executed by the browser.
+ */
+function desanitize_script(script) {
+    script.setAttribute("type", script.haketilo_blocked_type);
+
+    if ([null, undefined].includes(script.haketilo_blocked_type))
+	script.removeAttribute("type");
+
+    delete script.haketilo_blocked_type;
+}
+
+const bad_url_reg = /^data:([^,;]*ml|unknown-content-type)|^javascript:/i;
+function sanitize_element_urls(element) {
+    if (element.haketilo_sanitized_urls)
+	return;
+
+    element.haketilo_sanitized_urls = true;
+
+    for (const attr of [...element.attributes || []]
+	       .filter(attr => /^(href|src|data)$/i.test(attr.localName))
+	       .filter(attr => bad_url_reg.test(attr.value))) {
+	const replacement_value = /^href$/i.test(attr.localName) ?
+	      "javascript:void('blocked');" : "data:text/plain,blocked";
+	block_attribute(element, attr.localName, attr.namespaceURI,
+		       replacement_value);
+    }
+}
+
+function sanitize_tree_urls(root) {
+    root.querySelectorAll("*[href], *[src], *[data]")
+	.forEach(sanitize_element_urls);
+}
+
+function start_urls_sanitizing(doc) {
+    sanitize_tree_urls(doc);
+    if (!doc.content_loaded) {
+	const mutation_handler =
+	      m => m.addedNodes.forEach(sanitize_element_urls);
+	const mo = new MutationObserver(ms => ms.forEach(mutation_handler));
+	mo.observe(doc, {childList: true, subtree: true});
+	wait_loaded(doc).then(() => mo.disconnect());
+    }
+}
+
+#IF MOZILLA
+/*
+ * Normally, we block scripts with CSP. However, Mozilla does optimizations that
+ * cause part of the DOM to be loaded when our content scripts get to run. Thus,
+ * before the CSP rules we inject (for non-HTTP pages) become effective, we need
+ * to somehow block the execution of `<script>'s and intrinsics that were
+ * already there. Additionally, some browsers (IceCat 60) seem to have problems
+ * applying this CSP to non-inline `<scripts>' in certain scenarios.
+ */
+function prevent_script_execution(event) {
+    if (!event.target.haketilo_payload)
+	event.preventDefault();
+}
+#ENDIF
+
+/*
+ * Here we block all scripts of a document which might be either and
+ * HTMLDocument or an XMLDocument. Modifying an XML document might disrupt
+ * Mozilla's XML preview. This is an unfortunate thing we have to accept for
+ * now. XML documents *have to* be sanitized as well because they might
+ * contain `<script>' tags (or on* attributes) with namespace declared as
+ * "http://www.w3.org/1999/xhtml" or "http://www.w3.org/2000/svg" which allows
+ * javascript execution.
+ */
+async function sanitize_document(doc, policy) {
+#IF MOZILLA
+    /*
+     * Blocking of scripts that are in the DOM from the beginning. Needed for
+     * Mozilla.
+     */
+    const listener_args = ["beforescriptexecute", prevent_script_execution];
+    doc.addEventListener(...listener_args);
+    wait_loaded(doc).then(() => doc.removeEventListener(...listener_args));
+
+    for (const elem of doc.querySelectorAll("*")) {
+	[...elem.attributes].map(attr => attr.localName)
+	    .filter(attr => /^on/.test(attr) && elem.wrappedJSObject[attr])
+	    .forEach(attr => elem.wrappedJSObject[attr] = null);
+    }
+
+    sanitize_tree_urls(doc.documentElement);
+#ENDIF
+
+    /*
+     * Ensure our CSP rules are employed from the beginning. This CSP injection
+     * method is, when possible, going to be applied together with CSP rules
+     * injected using webRequest.
+     * Using elements namespaced as HTML makes this CSP injection also work for
+     * non-HTML documents.
+     */
+    const source = `\
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta http-equiv="Content-Security-Policy" content="${policy.csp}"/>
+  </head>
+  <body>
+    Loading...
+  </body>
+</html>`;
+    const html =
+	  new DOMParser().parseFromString(source, "text/html").documentElement;
+
+    /*
+     * Root node gets hijacked now, to be re-attached after <head> is loaded
+     * and sanitized.
+     */
+    const root = doc.documentElement;
+    root.replaceWith(html);
+
+    /*
+     * When we don't inject payload, we neither block document's CSP `<meta>'
+     * tags nor wait for `<head>' to be parsed.
+     */
+    if (policy.payload) {
+	await wait_for_head(doc, root);
+
+	root.querySelectorAll("head meta")
+	    .forEach(m => sanitize_meta(m, policy));
+    }
+
+    root.querySelectorAll("script").forEach(s => sanitize_script(s, policy));
+    sanitize_tree_urls(root);
+    html.replaceWith(root);
+    root.querySelectorAll("script").forEach(s => desanitize_script(s, policy));
+
+    start_urls_sanitizing(doc);
+}
+
+async function _disable_service_workers() {
+    if (!navigator.serviceWorker)
+	return;
+
+    const registrations = await navigator.serviceWorker.getRegistrations();
+    if (registrations.length === 0)
+	return;
+
+    console.warn("Service Workers detected on this page! Unregistering and reloading.");
+
+    try {
+	await Promise.all(registrations.map(r => r.unregister()));
+    } finally {
+	location.reload();
+    }
+
+    /* Never actually return! */
+    return new Promise(() => 0);
+}
+
+/*
+ * Trying to use servce workers APIs might result in exceptions, for example
+ * when in a non-HTML document. Because of this, we wrap the function that does
+ * the actual work in a try {} block.
+ */
+async function disable_service_workers() {
+    try {
+	await _disable_service_workers()
+    } catch (e) {
+	console.debug("Exception thrown during an attempt to detect and disable service workers.", e);
+    }
+}
+
+function enforce_blocking(policy) {
+    if (policy.allow)
+	return;
+
+    return Promise.all([
+	sanitize_document(document, policy),
+	disable_service_workers(),
+	wait_loaded(document)
+    ]);
+}
+#EXPORT enforce_blocking