/** * This file is part of Haketilo. * * Function: Enforcing script blocking rules on a given page, working from a * content script. * * Copyright (C) 2021,2022 Wojtek Kosior * Copyright (C) 2021 jahoti * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * As additional permission under GNU GPL version 3 section 7, you * may distribute forms of that code without the copy of the GNU * GPL normally required by section 4, provided you include this * license notice and, in case of non-source distribution, a URL * through which recipients can access the Corresponding Source. * If you modify file(s) with this exception, you may extend this * exception to your version of the file(s), but you are not * obligated to do so. If you do not wish to do so, delete this * exception statement from your version. * * As a special exception to the GPL, any HTML file which merely * makes function calls to this code, and for that purpose * includes it by reference shall be deemed a separate work for * copyright law purposes. If you modify this code, you may extend * this exception to your version of the code, but you are not * obligated to do so. If you do not wish to do so, delete this * exception statement from your version. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <https://www.gnu.org/licenses/>. * * I, Wojtek Kosior, thereby promise not to sue for violation of this file's * license. Although I request that you do not make use of this code in a * proprietary program, I am not going to enforce this in court. */ #FROM common/misc.js IMPORT csp_header_regex const html_ns = "http://www.w3.org/1999/xhtml"; const svg_ns = "http://www.w3.org/2000/svg"; document.content_loaded = document.readyState === "complete"; const wait_loaded = e => e.content_loaded ? Promise.resolve() : new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true})); wait_loaded(document).then(() => document.content_loaded = true); /* * In the case of HTML documents: * 1. When injecting some payload we need to sanitize <meta> CSP tags before * they reach the document. * 2. Only <meta> tags inside <head> are considered valid by the browser and * need to be considered. * 3. We want to detach <html> from document, wait until its <head> completes * loading, sanitize it and re-attach <html>. * 4. We shall wait for anything to appear in or after <body> and take that as * a sign <head> has finished loading. * 5. Otherwise, getting the `DOMContentLoaded' event on the document shall also * be a sign that <head> is fully loaded. */ function make_body_start_observer(DOM_element, waiting) { const observer = new MutationObserver(() => try_body_started(waiting)); observer.observe(DOM_element, {childList: true}); return observer; } function try_body_started(waiting) { const body = waiting.detached_html.querySelector("body"); if ((body && (body.firstChild || body.nextSibling)) || waiting.doc.documentElement.nextSibling) { finish_waiting(waiting); return true; } if (body && waiting.observers.length < 2) waiting.observers.push(make_body_start_observer(body, waiting)); } function finish_waiting(waiting) { if (waiting.finished) return; waiting.finished = true; waiting.observers.forEach(observer => observer.disconnect()); setTimeout(waiting.callback, 0); } function _wait_for_head(doc, detached_html, callback) { const waiting = {doc, detached_html, callback, observers: []}; if (try_body_started(waiting)) return; waiting.observers = [make_body_start_observer(detached_html, waiting)]; wait_loaded(doc).then(() => finish_waiting(waiting)); } function wait_for_head(doc, detached_html) { return new Promise(cb => _wait_for_head(doc, detached_html, cb)); } const blocked_str = "blocked"; function block_attribute(node, attr, ns=null, replace_with=null) { const [hasa, geta, seta, rema] = ["has", "get", "set", "remove"] .map(m => (n, ...args) => typeof ns === "string" ? n[`${m}AttributeNS`](ns, ...args) : n[`${m}Attribute`](...args)); /* * Disabling attributes by prepending `blocked-' allows them to still be * relatively easily accessed in case they contain some useful data. */ const construct_name = [attr]; while (hasa(node, construct_name.join("-"))) construct_name.unshift(blocked_str); while (construct_name.length > 1) { construct_name.shift(); const name = construct_name.join("-"); seta(node, `${blocked_str}-${name}`, geta(node, name)); } rema(node, attr); if (replace_with !== null) seta(node, attr, replace_with); } /* * Used to disable `<script>'s and `<meta>'s that have not yet been added to * live DOM (doesn't work for those already added). */ function sanitize_meta(meta) { if (csp_header_regex.test(meta.httpEquiv) && meta.content) block_attribute(meta, "content"); } function sanitize_script(script) { script.haketilo_blocked_type = script.getAttribute("type"); script.type = "text/plain"; } /* * Executed after `<script>' has been connected to the DOM, when it is no longer * eligible for being executed by the browser. */ function desanitize_script(script) { script.setAttribute("type", script.haketilo_blocked_type); if ([null, undefined].includes(script.haketilo_blocked_type)) script.removeAttribute("type"); delete script.haketilo_blocked_type; } /* The following will only be run on pages without payload. */ function force_noscript_tag(element) { if (element.tagName !== "NOSCRIPT") return; let under_head = false; let ancestor = element; while (true) { ancestor = ancestor.parentElement; if (ancestor === null) break; if (ancestor === document.head) { under_head = true; break; } } const replacement = document.createElement('haketilo-noscript'); replacement.innerHTML = element.innerHTML; for (const script of [...replacement.querySelectorAll('script')]) script.remove(); if (under_head) { for (const child of replacement.childNodes) element.before(child); element.remove(); } else { element.replaceWith(replacement); } } /* * Blocking certain attributes that might allow 'javascript:' URLs. Some of * these are: <iframe>'s 'src' attributes (would normally execute js in URL upon * frame's load), <object>'s 'data' attribute (would also execute upon load) and * <a>'s 'href' attribute (would execute upon link click). */ const bad_url_reg = /^data:([^,;]*ml|unknown-content-type)|^javascript:/i; function sanitize_element_urls(element) { if (element.haketilo_sanitized_urls) return; element.haketilo_sanitized_urls = true; let some_attr_blocked = false; const bad_attrs = [...(element.attributes || [])] .filter(attr => /^(href|src|data)$/i.test(attr.localName)) .filter(attr => bad_url_reg.test(attr.value)); for (const attr of bad_attrs) { /* * Under some browsers (Mozilla) removing attributes doesn't stop their * javascript from executing, but replacing them does. For 'src' and * 'data' I chose to replace the attribute with a 'data:' URL and have * it replace bad <iframe>'s/<object>'s contents with a "blocked" * string. For 'href' (which appears on <a>'s) I chose to use a * 'javascript:' URL to avoid having the page reloaded upon a link * click. */ const replacement_value = /^href$/i.test(attr.localName) ? "javascript:void('blocked');" : "data:text/plain,blocked"; some_attr_blocked = true; block_attribute(element, attr.localName, attr.namespaceURI, replacement_value); } /* * Trial and error shows that under certain browsers additional element * removal and re-addition might be necessary to prevent execution of a * 'javascript:' URL (Parabola's Iceweasel 75 requires it for 'src' URL of * an <iframe>). */ if (some_attr_blocked) { const replacement_elem = document.createElement("a"); /* Prevent this node from being processed by our observer. */ replacement_elem.haketilo_trusted_node = true; element.replaceWith(replacement_elem); replacement_elem.replaceWith(element); } } function sanitize_tree_urls(root) { root.querySelectorAll("*[href], *[src], *[data]") .forEach(sanitize_element_urls); } #IF MOZILLA function sanitize_element_onevent(element) { if (element.haketilo_sanitized_onevent) return; element.haketilo_sanitized_onevent = true; for (const attribute_node of [...(element.attributes || [])]) { const attr = attribute_node.localName, attr_lo = attr.toLowerCase(); if (!/^on/.test(attr_lo) || !(attr_lo in element)) continue; /* * Guard against redefined getter on DOM object property. This is a * supplemental security measure since page's own scripts should be * blocked and unable to redefine properties, anyway. */ if (Object.getOwnPropertyDescriptor(element.wrappedJSObject, attr)) { console.error("Haketilo: Redefined property on a DOM object! The page might have bypassed our script blocking measures!"); continue; } element.wrappedJSObject[attr] = null; block_attribute(element, attr, attribute_node.namespaceURI, "javascript:void('blocked');"); } } function sanitize_tree_onevent(root) { root.querySelectorAll("*") .forEach(sanitize_element_onevent); } #ENDIF /* * Sanitize elements on-the-fly and force <noscript> tags visible as they appear * using MutationObserver. * * Under Abrowser 97 it was observed that MutationObserver does not always work * as is should. When trying to observe nodes of an XMLDocument the behavior was * as if the "subtree" option to MutationObserver.observe() was ignored. To work * around this we avoid using the "subtree" option altogether and have the same * code work in all scenarios. */ function MOSanitizer(root, payload_present) { this.root = root; this.payload_present = payload_present; this.recursively_sanitize(root); this.mo = new MutationObserver(ms => this.handle_mutations(ms)); } MOSanitizer.prototype.observe = function() { this.mo.disconnect(); let elem = this.root; while (elem && !elem.haketilo_trusted_node) { this.mo.observe(elem, {childList: true}); elem = elem.lastElementChild; } } MOSanitizer.prototype.handle_mutations = function(mutations) { for (const mut of mutations) { for (const new_node of mut.addedNodes) this.recursively_sanitize(new_node); } this.observe(); } MOSanitizer.prototype.recursively_sanitize = function(elem) { const to_process = [elem]; while (to_process.length > 0) { const current_elem = to_process.pop(); if (current_elem.haketilo_trusted_node || current_elem.nodeType !== this.root.ELEMENT_NODE) continue; to_process.push(...current_elem.children); sanitize_element_urls(current_elem); #IF MOZILLA sanitize_element_onevent(current_elem); #ENDIF if (!this.payload_present) force_noscript_tag(current_elem); } } MOSanitizer.prototype.start = function() { this.recursively_sanitize(this.root); this.observe(); } MOSanitizer.prototype.stop = function() { this.mo.disconnect(); } #IF MOZILLA /* * Normally, we block scripts with CSP. However, Mozilla does optimizations that * cause part of the DOM to be loaded when our content scripts get to run. Thus, * before the CSP rules we inject (for non-HTTP pages) become effective, we need * to somehow block the execution of `<script>'s and intrinsics that were * already there. Additionally, some browsers (IceCat 60) seem to have problems * applying this CSP to non-inline `<scripts>' in certain scenarios. */ function prevent_script_execution(event) { event.preventDefault(); } #ENDIF /* * Here we block all scripts of a document which might be either an * HTMLDocument or an XMLDocument. Modifying an XML document might disrupt * Mozilla's XML preview. This is an unfortunate thing we have to accept for * now. XML documents *have to* be sanitized as well because they might * contain `<script>' tags (or on* attributes) with namespace declared as * "http://www.w3.org/1999/xhtml" or "http://www.w3.org/2000/svg" which allows * javascript execution. */ async function sanitize_document(doc, policy) { const root = doc.documentElement; const substitute_doc = new DOMParser().parseFromString("<!DOCTYPE html>", "text/html"); #IF MOZILLA /* * Blocking of scripts that are in the DOM from the beginning. Needed for * Mozilla. */ const listener_args = ["beforescriptexecute", prevent_script_execution]; doc.addEventListener(...listener_args); substitute_doc.addEventListener(...listener_args); wait_loaded(doc).then(() => doc.removeEventListener(...listener_args)); #ENDIF /* * Ensure our CSP rules are employed from the beginning. This CSP injection * method is, when possible, going to be applied together with CSP rules * injected using webRequest. * Using elements namespaced as HTML makes this CSP injection also work for * non-HTML documents. */ const source = `\ <!DOCTYPE html> <html> <head> <meta http-equiv="Content-Security-Policy" content="${policy.csp}"/> </head> <body> Loading... </body> </html>`; const temporary_html = new DOMParser().parseFromString(source, "text/html").documentElement; /* * Root node gets hijacked now, to be re-attached after <head> is loaded * and sanitized. */ root.replaceWith(temporary_html); #IF MOZILLA /* * To be able to handle the onbeforescriptexecute event for scripts that * appear under detached document. */ substitute_doc.documentElement.replaceWith(root); #ENDIF const sanitizer = new MOSanitizer(root, !!policy.payload); sanitizer.start(); wait_loaded(doc).then(() => sanitizer.stop()); /* * When we don't inject payload, we neither block document's CSP `<meta>' * tags nor wait for `<head>' to be parsed. */ if (policy.payload) { if (doc instanceof HTMLDocument) await wait_for_head(doc, root); root.querySelectorAll("head meta") .forEach(m => sanitize_meta(m, policy)); } const scripts = [...root.getElementsByTagNameNS(html_ns, "script"), ...root.getElementsByTagNameNS(svg_ns, "script")]; scripts.forEach(s => sanitize_script(s, policy)); temporary_html.replaceWith(root); scripts.forEach(s => desanitize_script(s, policy)); } async function _disable_service_workers() { if (!navigator.serviceWorker) return; const registrations = await navigator.serviceWorker.getRegistrations(); if (registrations.length === 0) return; console.warn("Haketilo: Service Workers detected on this page! Unregistering and reloading."); try { await Promise.all(registrations.map(r => r.unregister())); } finally { location.reload(); } /* Never actually return! */ return new Promise(() => 0); } /* * Trying to use service workers APIs might result in exceptions, for example * when in a non-HTML document. Because of this, we wrap the function that does * the actual work in a try {} block. */ async function disable_service_workers() { try { await _disable_service_workers() } catch (e) { console.warn("Haketilo: Exception thrown during an attempt to detect and disable service workers.", e); } } function enforce_blocking(policy) { if (policy.allow) return; return Promise.all([ sanitize_document(document, policy), disable_service_workers(), wait_loaded(document) ]); } #EXPORT enforce_blocking