/** * This file is part of Haketilo. * * Function: Main content script that runs in all frames. * * Copyright (C) 2021 Wojtek Kosior * Copyright (C) 2021 jahoti * Redistribution terms are gathered in the `copyright' file. */ /* * IMPORTS_START * IMPORT handle_page_actions * IMPORT extract_signed * IMPORT sign_data * IMPORT gen_nonce * IMPORT is_privileged_url * IMPORT is_chrome * IMPORT is_mozilla * IMPORT start_activity_info_server * IMPORT make_csp_rule * IMPORT csp_header_regex * IMPORTS_END */ document.content_loaded = document.readyState === "complete"; const wait_loaded = e => e.content_loaded ? Promise.resolve() : new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true})); wait_loaded(document).then(() => document.content_loaded = true); function extract_cookie_policy(cookie, min_time) { let best_result = {time: -1}; let policy = null; const extracted_signatures = []; for (const match of cookie.matchAll(/haketilo-(\w*)=([^;]*)/g)) { const new_result = extract_signed(...match.slice(1, 3)); if (new_result.fail) continue; extracted_signatures.push(match[1]); if (new_result.time < Math.max(min_time, best_result.time)) continue; /* This should succeed - it's our self-produced valid JSON. */ const new_policy = JSON.parse(decodeURIComponent(new_result.data)); if (new_policy.url !== document.URL) continue; best_result = new_result; policy = new_policy; } return [policy, extracted_signatures]; } function extract_url_policy(url, min_time) { const [base_url, payload, anchor] = /^([^#]*)#?([^#]*)(#?.*)$/.exec(url).splice(1, 4); const match = /^haketilo_([^_]+)_(.*)$/.exec(payload); if (!match) return [null, url]; const result = extract_signed(...match.slice(1, 3)); if (result.fail) return [null, url]; const original_url = base_url + anchor; const policy = result.time < min_time ? null : JSON.parse(decodeURIComponent(result.data)); return [policy.url === original_url ? policy : null, original_url]; } function employ_nonhttp_policy(policy) { if (!policy.allow) return; policy.nonce = gen_nonce(); const [base_url, target] = /^([^#]*)(#?.*)$/.exec(policy.url).slice(1, 3); const encoded_policy = encodeURIComponent(JSON.stringify(policy)); const payload = "haketilo_" + sign_data(encoded_policy, new Date().getTime()).join("_"); const resulting_url = `${base_url}#${payload}${target}`; location.href = resulting_url; location.reload(); } /* * In the case of HTML documents: * 1. When injecting some payload we need to sanitize <meta> CSP tags before * they reach the document. * 2. Only <meta> tags inside <head> are considered valid by the browser and * need to be considered. * 3. We want to detach <html> from document, wait until its <head> completes * loading, sanitize it and re-attach <html>. * 4. We shall wait for anything to appear in or after <body> and take that as * a sign <head> has finished loading. * 5. Otherwise, getting the `DOMContentLoaded' event on the document shall also * be a sign that <head> is fully loaded. */ function make_body_start_observer(DOM_element, waiting) { const observer = new MutationObserver(() => try_body_started(waiting)); observer.observe(DOM_element, {childList: true}); return observer; } function try_body_started(waiting) { const body = waiting.detached_html.querySelector("body"); if ((body && (body.firstChild || body.nextSibling)) || waiting.doc.documentElement.nextSibling) { finish_waiting(waiting); return true; } if (body && waiting.observers.length < 2) waiting.observers.push(make_body_start_observer(body, waiting)); } function finish_waiting(waiting) { if (waiting.finished) return; waiting.finished = true; waiting.observers.forEach(observer => observer.disconnect()); setTimeout(waiting.callback, 0); } function _wait_for_head(doc, detached_html, callback) { const waiting = {doc, detached_html, callback, observers: []}; if (try_body_started(waiting)) return; waiting.observers = [make_body_start_observer(detached_html, waiting)]; wait_loaded(doc).then(() => finish_waiting(waiting)); } function wait_for_head(doc, detached_html) { return new Promise(cb => _wait_for_head(doc, detached_html, cb)); } const blocked_str = "blocked"; function block_attribute(node, attr, ns=null) { const [hasa, geta, seta, rema] = ["has", "get", "set", "remove"] .map(m => (n, ...args) => typeof ns === "string" ? n[`${m}AttributeNS`](ns, ...args) : n[`${m}Attribute`](...args)); /* * Disabling attributes by prepending `-blocked' allows them to still be * relatively easily accessed in case they contain some useful data. */ const construct_name = [attr]; while (hasa(node, construct_name.join(""))) construct_name.unshift(blocked_str); while (construct_name.length > 1) { construct_name.shift(); const name = construct_name.join(""); seta(node, `${blocked_str}-${name}`, geta(node, name)); } rema(node, attr); } /* * Used to disable `<script>'s and `<meta>'s that have not yet been added to * live DOM (doesn't work for those already added). */ function sanitize_meta(meta) { if (csp_header_regex.test(meta.httpEquiv) && meta.content) block_attribute(meta, "content"); } function sanitize_script(script) { script.haketilo_blocked_type = script.getAttribute("type"); script.type = "text/plain"; } /* * Executed after `<script>' has been connected to the DOM, when it is no longer * eligible for being executed by the browser. */ function desanitize_script(script) { script.setAttribute("type", script.haketilo_blocked_type); if ([null, undefined].includes(script.haketilo_blocked_type)) script.removeAttribute("type"); delete script.haketilo_blocked_type; } const bad_url_reg = /^data:([^,;]*ml|unknown-content-type)/i; function sanitize_urls(element) { for (const attr of [...element.attributes || []] .filter(attr => /^(href|src|data)$/i.test(attr.localName)) .filter(attr => bad_url_reg.test(attr.value))) block_attribute(element, attr.localName, attr.namespaceURI); } function start_data_urls_sanitizing(doc) { doc.querySelectorAll("*[href], *[src], *[data]").forEach(sanitize_urls); if (!doc.content_loaded) { const mutation_handler = m => m.addedNodes.forEach(sanitize_urls); const mo = new MutationObserver(ms => ms.forEach(mutation_handler)); mo.observe(doc, {childList: true, subtree: true}); wait_loaded(doc).then(() => mo.disconnect()); } } /* * Normally, we block scripts with CSP. However, Mozilla does optimizations that * cause part of the DOM to be loaded when our content scripts get to run. Thus, * before the CSP rules we inject (for non-HTTP pages) become effective, we need * to somehow block the execution of `<script>'s and intrinsics that were * already there. Additionally, some browsers (IceCat 60) seem to have problems * applying this CSP to non-inline `<scripts>' in certain scenarios. */ function prevent_script_execution(event) { if (!event.target.haketilo_payload) event.preventDefault(); } function mozilla_initial_block(doc) { doc.addEventListener("beforescriptexecute", prevent_script_execution); for (const elem of doc.querySelectorAll("*")) { [...elem.attributes].map(attr => attr.localName) .filter(attr => /^on/.test(attr) && elem.wrappedJSObject[attr]) .forEach(attr => elem.wrappedJSObject[attr] = null); } } /* * Here we block all scripts of a document which might be either and * HTMLDocument or an XMLDocument. Modifying an XML document might disrupt * Mozilla's XML preview. This is an unfortunate thing we have to accept for * now. XML documents *have to* be sanitized as well because they might * contain `<script>' tags (or on* attributes) with namespace declared as * "http://www.w3.org/1999/xhtml" or "http://www.w3.org/2000/svg" which allows * javascript execution. */ async function sanitize_document(doc, policy) { /* * Blocking of scripts that are in the DOM from the beginning. Needed for * Mozilla. */ if (is_mozilla) mozilla_initial_block(doc); /* * Ensure our CSP rules are employed from the beginning. This CSP injection * method is, when possible, going to be applied together with CSP rules * injected using webRequest. * Using elements namespaced as HTML makes this CSP injection also work for * non-HTML documents. */ const html = new DOMParser().parseFromString(`<html><head><meta \ http-equiv="Content-Security-Policy" content="${make_csp_rule(policy)}"\ /></head><body>Loading...</body></html>`, "text/html").documentElement; /* * Root node gets hijacked now, to be re-attached after <head> is loaded * and sanitized. */ const root = doc.documentElement; root.replaceWith(html); /* * When we don't inject payload, we neither block document's CSP `<meta>' * tags nor wait for `<head>' to be parsed. */ if (policy.has_payload) { await wait_for_head(doc, root); root.querySelectorAll("head meta") .forEach(m => sanitize_meta(m, policy)); } root.querySelectorAll("script").forEach(s => sanitize_script(s, policy)); html.replaceWith(root); root.querySelectorAll("script").forEach(s => desanitize_script(s, policy)); start_data_urls_sanitizing(doc); } async function disable_service_workers() { if (!navigator.serviceWorker) return; const registrations = await navigator.serviceWorker.getRegistrations(); if (registrations.length === 0) return; console.warn("Service Workers detected on this page! Unregistering and reloading"); try { await Promise.all(registrations.map(r => r.unregister())); } finally { location.reload(); } /* Never actually return! */ return new Promise(() => 0); } if (!is_privileged_url(document.URL)) { let policy_received_callback = () => undefined; let policy; /* Signature valid for half an hour. */ const min_time = new Date().getTime() - 1800 * 1000; if (/^https?:/.test(document.URL)) { let signatures; [policy, signatures] = extract_cookie_policy(document.cookie, min_time); for (const signature of signatures) document.cookie = `haketilo-${signature}=; Max-Age=-1;`; } else { const scheme = /^([^:]*)/.exec(document.URL)[1]; const known_scheme = ["file", "ftp"].includes(scheme); if (!known_scheme) console.warn(`Unknown url scheme: \`${scheme}'!`); let original_url; [policy, original_url] = extract_url_policy(document.URL, min_time); history.replaceState(null, "", original_url); if (known_scheme && !policy) policy_received_callback = employ_nonhttp_policy; } if (!policy) { console.debug("Using fallback policy!"); policy = {allow: false, nonce: gen_nonce()}; } if (!(document instanceof HTMLDocument)) policy.has_payload = false; console.debug("current policy", policy); const doc_ready = Promise.all([ policy.allow ? Promise.resolve() : sanitize_document(document, policy), policy.allow ? Promise.resolve() : disable_service_workers(), wait_loaded(document) ]); handle_page_actions(policy.nonce, policy_received_callback, doc_ready); start_activity_info_server(); }