aboutsummaryrefslogtreecommitdiff
path: root/content
diff options
context:
space:
mode:
authorjahoti <jahoti@tilde.team>2021-09-21 00:00:00 +0000
committerjahoti <jahoti@tilde.team>2021-09-21 00:00:00 +0000
commit59fb32a341d42c685b5167c3d8b4d7b87c49fd18 (patch)
tree17143cd40a59eb06b4e698d6fd9ca9d02abaf2b9 /content
parentb1444d9c9ea065d7c97d5809c3ec5259cb01a1da (diff)
parent960363e7dd98a724246320e49c3fbaff9d68d1bd (diff)
downloadbrowser-extension-59fb32a341d42c685b5167c3d8b4d7b87c49fd18.tar.gz
browser-extension-59fb32a341d42c685b5167c3d8b4d7b87c49fd18.zip
Merge branch 'master' into jahoti-update
Diffstat (limited to 'content')
-rw-r--r--content/activity_info_server.js13
-rw-r--r--content/freezer.js64
-rw-r--r--content/main.js228
-rw-r--r--content/page_actions.js12
-rw-r--r--content/repo_query.js5
5 files changed, 173 insertions, 149 deletions
diff --git a/content/activity_info_server.js b/content/activity_info_server.js
index beecb1a..d1dfe36 100644
--- a/content/activity_info_server.js
+++ b/content/activity_info_server.js
@@ -1,7 +1,8 @@
/**
- * part of Hachette
- * Informing about activities performed by content script (script injection,
- * script blocking).
+ * This file is part of Haketilo.
+ *
+ * Function: Informing the popup about what happens in the content script
+ * (script injection, script blocking, etc.).
*
* Copyright (C) 2021 Wojtek Kosior
* Redistribution terms are gathered in the `copyright' file.
@@ -44,9 +45,9 @@ function report_settings(settings)
report_activity("settings", settings);
}
-function report_content_type(content_type)
+function report_document_type(is_html)
{
- report_activity("content_type", content_type);
+ report_activity("is_html", is_html);
}
function report_repo_query_action(update, port)
@@ -96,6 +97,6 @@ function start_activity_info_server()
* EXPORT start_activity_info_server
* EXPORT report_script
* EXPORT report_settings
- * EXPORT report_content_type
+ * EXPORT report_document_type
* EXPORTS_END
*/
diff --git a/content/freezer.js b/content/freezer.js
deleted file mode 100644
index 0ea362e..0000000
--- a/content/freezer.js
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * Helper functions for blocking scripts in pages, based off NoScript's lib/DocumentFreezer.js
- *
- * Copyright (C) 2005-2021 Giorgio Maone - https://maone.net
- * Copyright (C) 2021 jahoti
- * Redistribution terms are gathered in the `copyright' file.
- */
-
-const loaderAttributes = ["href", "src", "data"];
-const jsOrDataUrlRx = /^(?:data:(?:[^,;]*ml|unknown-content-type)|javascript:)/i;
-
-function sanitize_attributes(element) {
- if (element._frozen)
- return;
- let fa = [];
- let loaders = [];
- let attributes = element.attributes || [];
-
- for (let a of attributes) {
- let name = a.localName.toLowerCase();
- if (loaderAttributes.includes(name))
- if (jsOrDataUrlRx.test(a.value))
- loaders.push(a);
-
- else if (name.startsWith("on")) {
- console.debug("Removing", a, element.outerHTML);
- fa.push(a.cloneNode());
- a.value = "";
- element[name] = null;
- }
- }
- if (loaders.length) {
- for (let a of loaders) {
- fa.push(a.cloneNode());
- a.value = "javascript://frozen";
- }
- if ("contentWindow" in element)
- element.replaceWith(element = element.cloneNode(true));
-
- }
- if (fa.length)
- element._frozenAttributes = fa;
- element._frozen = true;
-}
-
-function mozilla_suppress_scripts(e) {
- if (document.readyState === 'complete') {
- removeEventListener('beforescriptexecute', blockExecute, true);
- console.log('Script suppressor has detached.');
- return;
- }
- console.log("script event", e);
- if (e.isTrusted && !e.target._hachette_payload) {
- e.preventDefault();
- console.log('Suppressed script', e.target);
- }
-};
-
-/*
- * EXPORTS_START
- * EXPORT mozilla_suppress_scripts
- * EXPORT sanitize_attributes
- * EXPORTS_END
- */
diff --git a/content/main.js b/content/main.js
index 3ebf093..cec9943 100644
--- a/content/main.js
+++ b/content/main.js
@@ -1,5 +1,7 @@
/**
- * Hachette main content script run in all frames
+ * This file is part of Haketilo.
+ *
+ * Function: Main content script that runs in all frames.
*
* Copyright (C) 2021 Wojtek Kosior
* Copyright (C) 2021 jahoti
@@ -13,23 +15,27 @@
* IMPORT sign_data
* IMPORT gen_nonce
* IMPORT is_privileged_url
- * IMPORT mozilla_suppress_scripts
* IMPORT is_chrome
* IMPORT is_mozilla
* IMPORT start_activity_info_server
* IMPORT make_csp_rule
- * IMPORT is_csp_header_name
- * IMPORT sanitize_csp_header
+ * IMPORT csp_header_regex
* IMPORTS_END
*/
+document.content_loaded = document.readyState === "complete";
+const wait_loaded = e => e.content_loaded ? Promise.resolve() :
+ new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true}));
+
+wait_loaded(document).then(() => document.content_loaded = true);
+
function extract_cookie_policy(cookie, min_time)
{
let best_result = {time: -1};
let policy = null;
const extracted_signatures = [];
- for (const match of cookie.matchAll(/hachette-(\w*)=([^;]*)/g)) {
+ for (const match of cookie.matchAll(/haketilo-(\w*)=([^;]*)/g)) {
const new_result = extract_signed(...match.slice(1, 3));
if (new_result.fail)
continue;
@@ -56,7 +62,7 @@ function extract_url_policy(url, min_time)
const [base_url, payload, anchor] =
/^([^#]*)#?([^#]*)(#?.*)$/.exec(url).splice(1, 4);
- const match = /^hachette_([^_]+)_(.*)$/.exec(payload);
+ const match = /^haketilo_([^_]+)_(.*)$/.exec(payload);
if (!match)
return [null, url];
@@ -79,7 +85,7 @@ function employ_nonhttp_policy(policy)
policy.nonce = gen_nonce();
const [base_url, target] = /^([^#]*)(#?.*)$/.exec(policy.url).slice(1, 3);
const encoded_policy = encodeURIComponent(JSON.stringify(policy));
- const payload = "hachette_" +
+ const payload = "haketilo_" +
sign_data(encoded_policy, new Date().getTime()).join("_");
const resulting_url = `${base_url}#${payload}${target}`;
location.href = resulting_url;
@@ -87,18 +93,17 @@ function employ_nonhttp_policy(policy)
}
/*
+ * In the case of HTML documents:
* 1. When injecting some payload we need to sanitize <meta> CSP tags before
* they reach the document.
* 2. Only <meta> tags inside <head> are considered valid by the browser and
* need to be considered.
* 3. We want to detach <html> from document, wait until its <head> completes
* loading, sanitize it and re-attach <html>.
- * 4. Browsers are eager to add <meta>'s that appear after `</head>' but before
- * `<body>'. Due to this behavior the `DOMContentLoaded' event is considered
- * unreliable (although it could still work properly, it is just problematic
- * to verify).
- * 5. We shall wait for anything to appear in or after <body> and take that as
- * a sign <head> has _really_ finished loading.
+ * 4. We shall wait for anything to appear in or after <body> and take that as
+ * a sign <head> has finished loading.
+ * 5. Otherwise, getting the `DOMContentLoaded' event on the document shall also
+ * be a sign that <head> is fully loaded.
*/
function make_body_start_observer(DOM_element, waiting)
@@ -124,20 +129,23 @@ function try_body_started(waiting)
function finish_waiting(waiting)
{
+ if (waiting.finished)
+ return;
+ waiting.finished = true;
waiting.observers.forEach(observer => observer.disconnect());
- waiting.doc.removeEventListener("DOMContentLoaded", waiting.loaded_cb);
setTimeout(waiting.callback, 0);
}
function _wait_for_head(doc, detached_html, callback)
{
const waiting = {doc, detached_html, callback, observers: []};
+
if (try_body_started(waiting))
return;
waiting.observers = [make_body_start_observer(detached_html, waiting)];
- waiting.loaded_cb = () => finish_waiting(waiting);
- doc.addEventListener("DOMContentLoaded", waiting.loaded_cb);
+
+ wait_loaded(doc).then(() => finish_waiting(waiting));
}
function wait_for_head(doc, detached_html)
@@ -147,105 +155,176 @@ function wait_for_head(doc, detached_html)
const blocked_str = "blocked";
-function block_attribute(node, attr)
+function block_attribute(node, attr, ns=null)
{
+ const [hasa, geta, seta, rema] = ["has", "get", "set", "remove"]
+ .map(m => (n, ...args) => typeof ns === "string" ?
+ n[`${m}AttributeNS`](ns, ...args) : n[`${m}Attribute`](...args));
/*
- * Disabling attributes this way allows them to still be relatively
- * easily accessed in case they contain some useful data.
+ * Disabling attributes by prepending `-blocked' allows them to still be
+ * relatively easily accessed in case they contain some useful data.
*/
const construct_name = [attr];
- while (node.hasAttribute(construct_name.join("")))
+ while (hasa(node, construct_name.join("")))
construct_name.unshift(blocked_str);
while (construct_name.length > 1) {
construct_name.shift();
const name = construct_name.join("");
- node.setAttribute(`${blocked_str}-${name}`, node.getAttribute(name));
+ seta(node, `${blocked_str}-${name}`, geta(node, name));
}
- node.removeAttribute(attr);
+ rema(node, attr);
}
-function sanitize_meta(meta, policy)
+/*
+ * Used to disable `<script>'s and `<meta>'s that have not yet been added to
+ * live DOM (doesn't work for those already added).
+ */
+function sanitize_meta(meta)
{
- const http_equiv = meta.getAttribute("http-equiv");
- const value = meta.content;
-
- if (!value || !is_csp_header_name(http_equiv, true))
- return;
-
- block_attribute(meta, "content");
+ if (csp_header_regex.test(meta.httpEquiv) && meta.content)
+ block_attribute(meta, "content");
}
function sanitize_script(script)
{
- script.hachette_blocked_type = script.type;
+ script.haketilo_blocked_type = script.getAttribute("type");
script.type = "text/plain";
}
/*
- * Executed after script has been connected to the DOM, when it is no longer
- * eligible for being executed by the browser
+ * Executed after `<script>' has been connected to the DOM, when it is no longer
+ * eligible for being executed by the browser.
*/
-function desanitize_script(script, policy)
+function desanitize_script(script)
{
- script.setAttribute("type", script.hachette_blocked_type);
+ script.setAttribute("type", script.haketilo_blocked_type);
- if (script.hachette_blocked_type === undefined)
+ if ([null, undefined].includes(script.haketilo_blocked_type))
script.removeAttribute("type");
- delete script.hachette_blocked_type;
+ delete script.haketilo_blocked_type;
+}
+
+const bad_url_reg = /^data:([^,;]*ml|unknown-content-type)/i;
+function sanitize_urls(element)
+{
+ for (const attr of [...element.attributes || []]
+ .filter(attr => /^(href|src|data)$/i.test(attr.localName))
+ .filter(attr => bad_url_reg.test(attr.value)))
+ block_attribute(element, attr.localName, attr.namespaceURI);
+}
+
+function start_data_urls_sanitizing(doc)
+{
+ doc.querySelectorAll("*[href], *[src], *[data]").forEach(sanitize_urls);
+ if (!doc.content_loaded) {
+ const mutation_handler = m => m.addedNodes.forEach(sanitize_urls);
+ const mo = new MutationObserver(ms => ms.forEach(mutation_handler));
+ mo.observe(doc, {childList: true, subtree: true});
+ wait_loaded(doc).then(() => mo.disconnect());
+ }
+}
+
+/*
+ * Normally, we block scripts with CSP. However, Mozilla does optimizations that
+ * cause part of the DOM to be loaded when our content scripts get to run. Thus,
+ * before the CSP rules we inject (for non-HTTP pages) become effective, we need
+ * to somehow block the execution of `<script>'s and intrinsics that were
+ * already there. Additionally, some browsers (IceCat 60) seem to have problems
+ * applying this CSP to non-inline `<scripts>' in certain scenarios.
+ */
+function prevent_script_execution(event)
+{
+ if (!event.target.haketilo_payload)
+ event.preventDefault();
}
-function apply_hachette_csp_rules(doc, policy)
+function mozilla_initial_block(doc)
{
- const meta = doc.createElement("meta");
- meta.setAttribute("http-equiv", "Content-Security-Policy");
- meta.setAttribute("content", make_csp_rule(policy));
- doc.head.append(meta);
- /* CSP is already in effect, we can remove the <meta> now. */
- meta.remove();
+ doc.addEventListener("beforescriptexecute", prevent_script_execution);
+
+ for (const elem of doc.querySelectorAll("*")) {
+ [...elem.attributes].map(attr => attr.localName)
+ .filter(attr => /^on/.test(attr) && elem.wrappedJSObject[attr])
+ .forEach(attr => elem.wrappedJSObject[attr] = null);
+ }
}
+/*
+ * Here we block all scripts of a document which might be either and
+ * HTMLDocument or an XMLDocument. Modifying an XML document might disrupt
+ * Mozilla's XML preview. This is an unfortunate thing we have to accept for
+ * now. XML documents *have to* be sanitized as well because they might
+ * contain `<script>' tags (or on* attributes) with namespace declared as
+ * "http://www.w3.org/1999/xhtml" or "http://www.w3.org/2000/svg" which allows
+ * javascript execution.
+ */
async function sanitize_document(doc, policy)
{
/*
+ * Blocking of scripts that are in the DOM from the beginning. Needed for
+ * Mozilla.
+ */
+ if (is_mozilla)
+ mozilla_initial_block(doc);
+
+ /*
* Ensure our CSP rules are employed from the beginning. This CSP injection
* method is, when possible, going to be applied together with CSP rules
* injected using webRequest.
+ * Using elements namespaced as HTML makes this CSP injection also work for
+ * non-HTML documents.
*/
- const has_own_head = doc.head;
- if (!has_own_head)
- doc.documentElement.prepend(doc.createElement("head"));
-
- apply_hachette_csp_rules(doc, policy);
-
- /* Probably not needed, but...: proceed with DOM in its initial state. */
- if (!has_own_head)
- doc.head.remove();
+ const html = new DOMParser().parseFromString(`<html><head><meta \
+http-equiv="Content-Security-Policy" content="${make_csp_rule(policy)}"\
+/></head><body>Loading...</body></html>`, "text/html").documentElement;
/*
- * <html> node gets hijacked now, to be re-attached after <head> is loaded
+ * Root node gets hijacked now, to be re-attached after <head> is loaded
* and sanitized.
*/
- const old_html = doc.documentElement;
- const new_html = doc.createElement("html");
- old_html.replaceWith(new_html);
+ const root = doc.documentElement;
+ root.replaceWith(html);
- await wait_for_head(doc, old_html);
+ /*
+ * When we don't inject payload, we neither block document's CSP `<meta>'
+ * tags nor wait for `<head>' to be parsed.
+ */
+ if (policy.has_payload) {
+ await wait_for_head(doc, root);
- for (const meta of old_html.querySelectorAll("head meta"))
- sanitize_meta(meta, policy);
+ root.querySelectorAll("head meta")
+ .forEach(m => sanitize_meta(m, policy));
+ }
- if (!policy.allow)
- for (const script of old_html.querySelectorAll("script"))
- sanitize_script(script, policy);
+ root.querySelectorAll("script").forEach(s => sanitize_script(s, policy));
+ html.replaceWith(root);
+ root.querySelectorAll("script").forEach(s => desanitize_script(s, policy));
- new_html.replaceWith(old_html);
+ start_data_urls_sanitizing(doc);
+}
- if (!policy.allow)
- for (const script of old_html.querySelectorAll("script"))
- desanitize_script(script, policy);
+async function disable_service_workers()
+{
+ if (!navigator.serviceWorker)
+ return;
+
+ const registrations = await navigator.serviceWorker.getRegistrations();
+ if (registrations.length === 0)
+ return;
+
+ console.warn("Service Workers detected on this page! Unregistering and reloading");
+
+ try {
+ await Promise.all(registrations.map(r => r.unregister()));
+ } finally {
+ location.reload();
+ }
+
+ /* Never actually return! */
+ return new Promise(() => 0);
}
if (!is_privileged_url(document.URL)) {
@@ -259,7 +338,7 @@ if (!is_privileged_url(document.URL)) {
let signatures;
[policy, signatures] = extract_cookie_policy(document.cookie, min_time);
for (const signature of signatures)
- document.cookie = `hachette-${signature}=; Max-Age=-1;`;
+ document.cookie = `haketilo-${signature}=; Max-Age=-1;`;
} else {
const scheme = /^([^:]*)/.exec(document.URL)[1];
const known_scheme = ["file", "ftp"].includes(scheme);
@@ -276,14 +355,19 @@ if (!is_privileged_url(document.URL)) {
}
if (!policy) {
- console.warn("Using fallback policy!");
+ console.debug("Using fallback policy!");
policy = {allow: false, nonce: gen_nonce()};
}
+ if (!(document instanceof HTMLDocument))
+ policy.has_payload = false;
+
+ console.debug("current policy", policy);
+
const doc_ready = Promise.all([
- (policy.allow && !policy.has_payload) ? Promise.resolve : sanitize_document(document, policy),
- new Promise(cb => document.addEventListener("DOMContentLoaded",
- cb, {once: true}))
+ policy.allow ? Promise.resolve() : sanitize_document(document, policy),
+ policy.allow ? Promise.resolve() : disable_service_workers(),
+ wait_loaded(document)
]);
handle_page_actions(policy.nonce, policy_received_callback, doc_ready);
diff --git a/content/page_actions.js b/content/page_actions.js
index 8057541..db7c352 100644
--- a/content/page_actions.js
+++ b/content/page_actions.js
@@ -1,5 +1,7 @@
/**
- * Hachette handling of page actions in content scripts
+ * This file is part of Haketilo.
+ *
+ * Function: Handle page actions in a content script.
*
* Copyright (C) 2021 Wojtek Kosior
* Redistribution terms are gathered in the `copyright' file.
@@ -11,7 +13,7 @@
* IMPORT browser
* IMPORT report_script
* IMPORT report_settings
- * IMPORT report_content_type
+ * IMPORT report_document_type
* IMPORTS_END
*/
@@ -60,7 +62,7 @@ function add_script(script_text)
let script = document.createElement("script");
script.textContent = script_text;
script.setAttribute("nonce", nonce);
- script._hachette_payload = true;
+ script.haketilo_payload = true;
document.body.appendChild(script);
report_script(script_text);
@@ -70,8 +72,8 @@ function handle_page_actions(script_nonce, policy_received_cb,
doc_ready_promise) {
policy_received_callback = policy_received_cb;
url = document.URL;
- is_html = /html/.test(document.contentType);
- report_content_type(document.contentType);
+ is_html = document instanceof HTMLDocument;
+ report_document_type(is_html);
doc_ready_promise.then(document_ready);
diff --git a/content/repo_query.js b/content/repo_query.js
index 3708108..637282c 100644
--- a/content/repo_query.js
+++ b/content/repo_query.js
@@ -1,6 +1,7 @@
/**
- * part of Hachette
- * Getting available content for site from remote repositories.
+ * This file is part of Haketilo.
+ *
+ * Function: Getting available content for site from remote repositories.
*
* Copyright (C) 2021 Wojtek Kosior
* Redistribution terms are gathered in the `copyright' file.