aboutsummaryrefslogtreecommitdiff
path: root/content/main.js
diff options
context:
space:
mode:
Diffstat (limited to 'content/main.js')
-rw-r--r--content/main.js339
1 files changed, 264 insertions, 75 deletions
diff --git a/content/main.js b/content/main.js
index 9ed557c..ce1ff7a 100644
--- a/content/main.js
+++ b/content/main.js
@@ -1,5 +1,7 @@
/**
- * Hachette main content script run in all frames
+ * This file is part of Haketilo.
+ *
+ * Function: Main content script that runs in all frames.
*
* Copyright (C) 2021 Wojtek Kosior
* Copyright (C) 2021 jahoti
@@ -9,123 +11,310 @@
/*
* IMPORTS_START
* IMPORT handle_page_actions
- * IMPORT url_extract_target
- * IMPORT gen_unique
* IMPORT gen_nonce
- * IMPORT csp_rule
* IMPORT is_privileged_url
- * IMPORT sanitize_attributes
- * IMPORT mozilla_suppress_scripts
+ * IMPORT browser
* IMPORT is_chrome
* IMPORT is_mozilla
* IMPORT start_activity_info_server
+ * IMPORT make_csp_rule
+ * IMPORT csp_header_regex
+ * IMPORT report_settings
* IMPORTS_END
*/
+document.content_loaded = document.readyState === "complete";
+const wait_loaded = e => e.content_loaded ? Promise.resolve() :
+ new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true}));
+
+wait_loaded(document).then(() => document.content_loaded = true);
+
/*
- * Due to some technical limitations the chosen method of whitelisting sites
- * is to smuggle whitelist indicator in page's url as a "magical" string
- * after '#'. Right now this is only supplemental in HTTP(s) pages where
- * blocking of native scripts also happens through CSP header injection but is
- * necessary for protocols like ftp:// and file://.
- *
- * The code that actually injects the magical string into ftp:// and file://
- * urls has not yet been added to the extension.
+ * In the case of HTML documents:
+ * 1. When injecting some payload we need to sanitize <meta> CSP tags before
+ * they reach the document.
+ * 2. Only <meta> tags inside <head> are considered valid by the browser and
+ * need to be considered.
+ * 3. We want to detach <html> from document, wait until its <head> completes
+ * loading, sanitize it and re-attach <html>.
+ * 4. We shall wait for anything to appear in or after <body> and take that as
+ * a sign <head> has finished loading.
+ * 5. Otherwise, getting the `DOMContentLoaded' event on the document shall also
+ * be a sign that <head> is fully loaded.
*/
-var nonce = undefined;
+function make_body_start_observer(DOM_element, waiting)
+{
+ const observer = new MutationObserver(() => try_body_started(waiting));
+ observer.observe(DOM_element, {childList: true});
+ return observer;
+}
-function handle_mutation(mutations, observer)
+function try_body_started(waiting)
{
- if (document.readyState === 'complete') {
- console.log("mutation handling complete");
- observer.disconnect();
- return;
- }
- for (const mutation of mutations) {
- for (const node of mutation.addedNodes)
- block_node(node);
+ const body = waiting.detached_html.querySelector("body");
+
+ if ((body && (body.firstChild || body.nextSibling)) ||
+ waiting.doc.documentElement.nextSibling) {
+ finish_waiting(waiting);
+ return true;
}
+
+ if (body && waiting.observers.length < 2)
+ waiting.observers.push(make_body_start_observer(body, waiting));
}
-function block_nodes_recursively(node)
+function finish_waiting(waiting)
{
- block_node(node);
- for (const child of node.children)
- block_nodes_recursively(child);
+ if (waiting.finished)
+ return;
+ waiting.finished = true;
+ waiting.observers.forEach(observer => observer.disconnect());
+ setTimeout(waiting.callback, 0);
}
-function block_node(node)
+function _wait_for_head(doc, detached_html, callback)
{
+ const waiting = {doc, detached_html, callback, observers: []};
+
+ if (try_body_started(waiting))
+ return;
+
+ waiting.observers = [make_body_start_observer(detached_html, waiting)];
+
+ wait_loaded(doc).then(() => finish_waiting(waiting));
+}
+
+function wait_for_head(doc, detached_html)
+{
+ return new Promise(cb => _wait_for_head(doc, detached_html, cb));
+}
+
+const blocked_str = "blocked";
+
+function block_attribute(node, attr, ns=null)
+{
+ const [hasa, geta, seta, rema] = ["has", "get", "set", "remove"]
+ .map(m => (n, ...args) => typeof ns === "string" ?
+ n[`${m}AttributeNS`](ns, ...args) : n[`${m}Attribute`](...args));
/*
- * Modifying <script> element doesn't always prevent its execution in some
- * Mozilla browsers. This is Chromium-specific code.
+ * Disabling attributes by prepending `-blocked' allows them to still be
+ * relatively easily accessed in case they contain some useful data.
*/
- if (node.tagName === "SCRIPT") {
- block_script(node);
- return;
+ const construct_name = [attr];
+ while (hasa(node, construct_name.join("")))
+ construct_name.unshift(blocked_str);
+
+ while (construct_name.length > 1) {
+ construct_name.shift();
+ const name = construct_name.join("");
+ seta(node, `${blocked_str}-${name}`, geta(node, name));
+ }
+
+ rema(node, attr);
+}
+
+/*
+ * Used to disable `<script>'s and `<meta>'s that have not yet been added to
+ * live DOM (doesn't work for those already added).
+ */
+function sanitize_meta(meta)
+{
+ if (csp_header_regex.test(meta.httpEquiv) && meta.content)
+ block_attribute(meta, "content");
+}
+
+function sanitize_script(script)
+{
+ script.haketilo_blocked_type = script.getAttribute("type");
+ script.type = "text/plain";
+}
+
+/*
+ * Executed after `<script>' has been connected to the DOM, when it is no longer
+ * eligible for being executed by the browser.
+ */
+function desanitize_script(script)
+{
+ script.setAttribute("type", script.haketilo_blocked_type);
+
+ if ([null, undefined].includes(script.haketilo_blocked_type))
+ script.removeAttribute("type");
+
+ delete script.haketilo_blocked_type;
+}
+
+const bad_url_reg = /^data:([^,;]*ml|unknown-content-type)/i;
+function sanitize_urls(element)
+{
+ for (const attr of [...element.attributes || []]
+ .filter(attr => /^(href|src|data)$/i.test(attr.localName))
+ .filter(attr => bad_url_reg.test(attr.value)))
+ block_attribute(element, attr.localName, attr.namespaceURI);
+}
+
+function start_data_urls_sanitizing(doc)
+{
+ doc.querySelectorAll("*[href], *[src], *[data]").forEach(sanitize_urls);
+ if (!doc.content_loaded) {
+ const mutation_handler = m => m.addedNodes.forEach(sanitize_urls);
+ const mo = new MutationObserver(ms => ms.forEach(mutation_handler));
+ mo.observe(doc, {childList: true, subtree: true});
+ wait_loaded(doc).then(() => mo.disconnect());
}
+}
+
+/*
+ * Normally, we block scripts with CSP. However, Mozilla does optimizations that
+ * cause part of the DOM to be loaded when our content scripts get to run. Thus,
+ * before the CSP rules we inject (for non-HTTP pages) become effective, we need
+ * to somehow block the execution of `<script>'s and intrinsics that were
+ * already there. Additionally, some browsers (IceCat 60) seem to have problems
+ * applying this CSP to non-inline `<scripts>' in certain scenarios.
+ */
+function prevent_script_execution(event)
+{
+ if (!event.target.haketilo_payload)
+ event.preventDefault();
+}
- sanitize_attributes(node);
+function mozilla_initial_block(doc)
+{
+ doc.addEventListener("beforescriptexecute", prevent_script_execution);
- if (node.tagName === "HEAD")
- inject_csp(node);
+ for (const elem of doc.querySelectorAll("*")) {
+ [...elem.attributes].map(attr => attr.localName)
+ .filter(attr => /^on/.test(attr) && elem.wrappedJSObject[attr])
+ .forEach(attr => elem.wrappedJSObject[attr] = null);
+ }
}
-function block_script(node)
+/*
+ * Here we block all scripts of a document which might be either and
+ * HTMLDocument or an XMLDocument. Modifying an XML document might disrupt
+ * Mozilla's XML preview. This is an unfortunate thing we have to accept for
+ * now. XML documents *have to* be sanitized as well because they might
+ * contain `<script>' tags (or on* attributes) with namespace declared as
+ * "http://www.w3.org/1999/xhtml" or "http://www.w3.org/2000/svg" which allows
+ * javascript execution.
+ */
+async function sanitize_document(doc, policy)
{
/*
- * Disabling scripts this way allows them to still be relatively
- * easily accessed in case they contain some useful data.
+ * Blocking of scripts that are in the DOM from the beginning. Needed for
+ * Mozilla.
*/
- if (node.hasAttribute("type"))
- node.setAttribute("blocked-type", node.getAttribute("type"));
- node.setAttribute("type", "application/json");
+ if (is_mozilla)
+ mozilla_initial_block(doc);
+
+ /*
+ * Ensure our CSP rules are employed from the beginning. This CSP injection
+ * method is, when possible, going to be applied together with CSP rules
+ * injected using webRequest.
+ * Using elements namespaced as HTML makes this CSP injection also work for
+ * non-HTML documents.
+ */
+ const html = new DOMParser().parseFromString(`<html><head><meta \
+http-equiv="Content-Security-Policy" content="${make_csp_rule(policy)}"\
+/></head><body>Loading...</body></html>`, "text/html").documentElement;
+
+ /*
+ * Root node gets hijacked now, to be re-attached after <head> is loaded
+ * and sanitized.
+ */
+ const root = doc.documentElement;
+ root.replaceWith(html);
+
+ /*
+ * When we don't inject payload, we neither block document's CSP `<meta>'
+ * tags nor wait for `<head>' to be parsed.
+ */
+ if (policy.has_payload) {
+ await wait_for_head(doc, root);
+
+ root.querySelectorAll("head meta")
+ .forEach(m => sanitize_meta(m, policy));
+ }
+
+ root.querySelectorAll("script").forEach(s => sanitize_script(s, policy));
+ html.replaceWith(root);
+ root.querySelectorAll("script").forEach(s => desanitize_script(s, policy));
+
+ start_data_urls_sanitizing(doc);
}
-function inject_csp(head)
+async function _disable_service_workers()
{
- console.log('injecting CSP');
+ if (!navigator.serviceWorker)
+ return;
- let meta = document.createElement("meta");
- meta.setAttribute("http-equiv", "Content-Security-Policy");
- meta.setAttribute("content", csp_rule(nonce));
+ const registrations = await navigator.serviceWorker.getRegistrations();
+ if (registrations.length === 0)
+ return;
+
+ console.warn("Service Workers detected on this page! Unregistering and reloading.");
- if (head.firstElementChild === null)
- head.appendChild(meta);
- else
- head.insertBefore(meta, head.firstElementChild);
+ try {
+ await Promise.all(registrations.map(r => r.unregister()));
+ } finally {
+ location.reload();
+ }
+
+ /* Never actually return! */
+ return new Promise(() => 0);
}
-if (!is_privileged_url(document.URL)) {
- const targets = url_extract_target(document.URL);
- if (targets.policy) {
- if (targets.target2)
- window.location.href = targets.base_url + targets.target2;
- else
- history.replaceState(null, "", targets.base_url);
+/*
+ * Trying to use servce workers APIs might result in exceptions, for example
+ * when in a non-HTML document. Because of this, we wrap the function that does
+ * the actual work in a try {} block.
+ */
+async function disable_service_workers()
+{
+ try {
+ await _disable_service_workers()
+ } catch (e) {
+ console.debug("Exception thrown during an attempt to detect and disable service workers.", e);
}
+}
- const policy = targets.current ? targets.policy : {};
+function synchronously_get_policy(url)
+{
+ const encoded_url = encodeURIComponent(url);
+ const request_url = `${browser.runtime.getURL("dummy")}?url=${encoded_url}`;
- nonce = policy.nonce || gen_nonce();
- handle_page_actions(nonce);
+ try {
+ var xhttp = new XMLHttpRequest();
+ xhttp.open("GET", request_url, false);
+ xhttp.send();
+ } catch(e) {
+ console.error("Failure to synchronously fetch policy for url.", e);
+ return {allow: false};
+ }
- if (!policy.allow) {
- block_nodes_recursively(document.documentElement);
+ const policy = /^[^?]*\?settings=(.*)$/.exec(xhttp.responseURL)[1];
+ return JSON.parse(decodeURIComponent(policy));
+}
- if (is_chrome) {
- var observer = new MutationObserver(handle_mutation);
- observer.observe(document.documentElement, {
- attributes: true,
- childList: true,
- subtree: true
- });
- }
+if (!is_privileged_url(document.URL)) {
+ const policy = synchronously_get_policy(document.URL);
- if (is_mozilla)
- addEventListener('beforescriptexecute', mozilla_suppress_scripts, true);
- }
+ if (!(document instanceof HTMLDocument))
+ delete policy.payload;
+
+ console.debug("current policy", policy);
+
+ report_settings(policy);
+
+ policy.nonce = gen_nonce();
+
+ const doc_ready = Promise.all([
+ policy.allow ? Promise.resolve() : sanitize_document(document, policy),
+ policy.allow ? Promise.resolve() : disable_service_workers(),
+ wait_loaded(document)
+ ]);
+
+ handle_page_actions(policy, doc_ready);
start_activity_info_server();
}