summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--background/policy_injector.js23
-rw-r--r--common/misc.js27
-rw-r--r--content/main.js99
-rw-r--r--content/sanitize_document.js209
4 files changed, 256 insertions, 102 deletions
diff --git a/background/policy_injector.js b/background/policy_injector.js
index 702f879..3398b53 100644
--- a/background/policy_injector.js
+++ b/background/policy_injector.js
@@ -18,19 +18,12 @@
* IMPORT query_best
* IMPORT sanitize_csp_header
* IMPORT csp_rule
+ * IMPORT is_csp_header_name
* IMPORTS_END
*/
var storage;
-const csp_header_names = new Set([
- "content-security-policy",
- "x-webkit-csp",
- "x-content-security-policy"
-]);
-
-const report_only = "content-security-policy-report-only";
-
function headers_inject(details)
{
const url = details.url;
@@ -40,7 +33,6 @@ function headers_inject(details)
const [pattern, settings] = query_best(storage, url);
const allow = !!(settings && settings.allow);
const nonce = gen_nonce();
- const rule = `'nonce-${nonce}'`;
let orig_csp_headers;
let old_signature;
@@ -70,20 +62,19 @@ function headers_inject(details)
}
orig_csp_headers = orig_csp_headers ||
- headers.filter(h => csp_header_names.has(h.name.toLowerCase()));
- headers = headers.filter(h => !csp_header_names.has(h.name.toLowerCase()));
+ headers.filter(h => is_csp_header_name(h.name));
- /* Remove headers that only snitch on us */
- if (!allow)
- headers = headers.filter(h => h.name.toLowerCase() !== report_only);
+ /* When blocking remove report-only CSP headers that snitch on us. */
+ headers = headers.filter(h => !is_csp_header_name(h.name, !allow));
if (old_signature)
headers = headers.filter(h => h.name.search(old_signature) === -1);
- const sanitizer = h => sanitize_csp_header(h, rule, allow);
+ const policy_object = {allow, nonce, url};
+ const sanitizer = h => sanitize_csp_header(h, policy_object);
headers.push(...orig_csp_headers.map(sanitizer));
- const policy = encodeURIComponent(JSON.stringify({allow, nonce, url}));
+ const policy = encodeURIComponent(JSON.stringify(policy_object));
const policy_signature = sign_data(policy, new Date());
const later_30sec = new Date(new Date().getTime() + 30000).toGMTString();
headers.push({
diff --git a/common/misc.js b/common/misc.js
index 6e825d6..8894d60 100644
--- a/common/misc.js
+++ b/common/misc.js
@@ -78,6 +78,23 @@ function csp_rule(nonce)
return `script-src ${rule}; script-src-elem ${rule}; script-src-attr 'none'; prefetch-src 'none';`;
}
+/* Check if some HTTP header might define CSP rules. */
+const csp_header_names = new Set([
+ "content-security-policy",
+ "x-webkit-csp",
+ "x-content-security-policy"
+]);
+
+const report_only_header_name = "content-security-policy-report-only";
+
+function is_csp_header_name(string, include_report_only)
+{
+ string = string && string.toLowerCase() || "";
+
+ return (include_report_only && string === report_only_header_name) ||
+ csp_header_names.has(string);
+}
+
/*
* Print item together with type, e.g.
* nice_name("s", "hello") → "hello (script)"
@@ -127,11 +144,12 @@ function parse_csp(csp) {
}
/* Make CSP headers do our bidding, not interfere */
-function sanitize_csp_header(header, rule, allow)
+function sanitize_csp_header(header, policy)
{
+ const rule = `'nonce-${policy.nonce}'`;
const csp = parse_csp(header.value);
- if (!allow) {
+ if (!policy.allow) {
/* No snitching */
delete csp['report-to'];
delete csp['report-uri'];
@@ -153,11 +171,11 @@ function sanitize_csp_header(header, rule, allow)
else
csp['script-src-elem'] = [rule];
- const new_policy = Object.entries(csp).map(
+ const new_csp = Object.entries(csp).map(
i => `${i[0]} ${i[1].join(' ')};`
);
- return {name: header.name, value: new_policy.join('')};
+ return {name: header.name, value: new_csp.join('')};
}
/* Regexes and objest to use as/in schemas for parse_json_with_schema(). */
@@ -178,6 +196,7 @@ const matchers = {
* EXPORT extract_signed
* EXPORT sign_data
* EXPORT csp_rule
+ * EXPORT is_csp_header_name
* EXPORT nice_name
* EXPORT open_in_settings
* EXPORT is_privileged_url
diff --git a/content/main.js b/content/main.js
index 164ebe3..441636c 100644
--- a/content/main.js
+++ b/content/main.js
@@ -11,87 +11,24 @@
* IMPORT handle_page_actions
* IMPORT extract_signed
* IMPORT gen_nonce
- * IMPORT csp_rule
* IMPORT is_privileged_url
- * IMPORT sanitize_attributes
* IMPORT mozilla_suppress_scripts
* IMPORT is_chrome
* IMPORT is_mozilla
* IMPORT start_activity_info_server
+ * IMPORT modify_on_the_fly
* IMPORTS_END
*/
-/*
- * Due to some technical limitations the chosen method of whitelisting sites
- * is to smuggle whitelist indicator in page's url as a "magical" string
- * after '#'. Right now this is only supplemental in HTTP(s) pages where
- * blocking of native scripts also happens through CSP header injection but is
- * necessary for protocols like ftp:// and file://.
- *
- * The code that actually injects the magical string into ftp:// and file://
- * urls has not yet been added to the extension.
- */
-
-var nonce = undefined;
-
-function handle_mutation(mutations, observer)
-{
- if (document.readyState === 'complete') {
- console.log("mutation handling complete");
- observer.disconnect();
- return;
- }
- for (const mutation of mutations) {
- for (const node of mutation.addedNodes)
- block_node(node);
- }
-}
-
-function block_nodes_recursively(node)
-{
- block_node(node);
- for (const child of node.children)
- block_nodes_recursively(child);
-}
-
-function block_node(node)
+function accept_node(node, parent)
{
+ const clone = document.importNode(node, false);
+ node.hachette_corresponding = clone;
/*
- * Modifying <script> element doesn't always prevent its execution in some
- * Mozilla browsers. This is Chromium-specific code.
+ * TODO: Stop page's own issues like "Error parsing a meta element's
+ * content:" from appearing as extension's errors.
*/
- if (node.tagName === "SCRIPT") {
- block_script(node);
- return;
- }
-
- sanitize_attributes(node);
-
- if (node.tagName === "HEAD")
- inject_csp(node);
-}
-
-function block_script(node)
-{
- /*
- * Disabling scripts this way allows them to still be relatively
- * easily accessed in case they contain some useful data.
- */
- if (node.hasAttribute("type"))
- node.setAttribute("blocked-type", node.getAttribute("type"));
- node.setAttribute("type", "application/json");
-}
-
-function inject_csp(head)
-{
- let meta = document.createElement("meta");
- meta.setAttribute("http-equiv", "Content-Security-Policy");
- meta.setAttribute("content", csp_rule(nonce));
-
- if (head.firstElementChild === null)
- head.appendChild(meta);
- else
- head.insertBefore(meta, head.firstElementChild);
+ parent.hachette_corresponding.appendChild(clone);
}
if (!is_privileged_url(document.URL)) {
@@ -110,20 +47,18 @@ if (!is_privileged_url(document.URL)) {
handle_page_actions(policy.nonce);
- if (!policy.allow) {
- block_nodes_recursively(document.documentElement);
+ if (!policy.allow && is_mozilla)
+ addEventListener('beforescriptexecute', mozilla_suppress_scripts, true);
- if (is_chrome) {
- var observer = new MutationObserver(handle_mutation);
- observer.observe(document.documentElement, {
- attributes: true,
- childList: true,
- subtree: true
- });
- }
+ if (!policy.allow && is_chrome) {
+ const old_html = document.documentElement;
+ const new_html = document.createElement("html");
+ old_html.replaceWith(new_html);
+ old_html.hachette_corresponding = new_html;
- if (is_mozilla)
- addEventListener('beforescriptexecute', mozilla_suppress_scripts, true);
+ const modify_end =
+ modify_on_the_fly(old_html, policy, {node_eater: accept_node});
+ document.addEventListener("DOMContentLoaded", modify_end);
}
start_activity_info_server();
diff --git a/content/sanitize_document.js b/content/sanitize_document.js
new file mode 100644
index 0000000..1533526
--- /dev/null
+++ b/content/sanitize_document.js
@@ -0,0 +1,209 @@
+/**
+ * Hachette modify HTML document as it loads and reconstruct HTML code from it
+ *
+ * Copyright (C) 2021 Wojtek Kosior
+ * Redistribution terms are gathered in the `copyright' file.
+ */
+
+/*
+ * IMPORTS_START
+ * IMPORT gen_nonce
+ * IMPORT csp_rule
+ * IMPORT is_csp_header_name
+ * IMPORT sanitize_csp_header
+ * IMPORT sanitize_attributes
+ * IMPORTS_END
+ */
+
+/*
+ * Functions that sanitize elements. The script blocking measures are, when
+ * possible, going to be applied together with CSP rules injected using
+ * webRequest.
+ */
+
+const blocked = "blocked";
+
+function block_attribute(node, attr)
+{
+ /*
+ * Disabling attributed this way allows them to still be relatively
+ * easily accessed in case they contain some useful data.
+ */
+
+ const construct_name = [attr];
+ while (node.hasAttribute(construct_name.join("")))
+ construct_name.unshift(blocked);
+
+ while (construct_name.length > 1) {
+ construct_name.shift();
+ const name = construct_name.join("");
+ node.setAttribute(`${blocked}-${name}`, node.getAttribute(name));
+ }
+
+ node.removeAttribute(attr);
+}
+
+function sanitize_script(script, policy)
+{
+ if (policy.allow)
+ return;
+
+ block_attribute(script, "type");
+ script.setAttribute("type", "application/json");
+}
+
+function inject_csp(head, policy)
+{
+ if (policy.allow)
+ return;
+
+ const meta = document.createElement("meta");
+ meta.setAttribute("http-equiv", "Content-Security-Policy");
+ meta.setAttribute("content", csp_rule(policy.nonce));
+ meta.hachette_ignore = true;
+ head.prepend(meta);
+}
+
+function sanitize_http_equiv_csp_rule(meta, policy)
+{
+ const http_equiv = meta.getAttribute("http-equiv");
+
+ if (!is_csp_header_name(http_equiv, !policy.allow))
+ return;
+
+ if (policy.allow || is_csp_header_name(http_equiv, false)) {
+ let value = meta.getAttribute("content");
+ block_attribute(meta, "content");
+ if (value) {
+ value = sanitize_csp_header({value}, policy).value;
+ meta.setAttribute("content", value);
+ }
+ return;
+ }
+
+ block_attribute(meta, "http-equiv");
+}
+
+function sanitize_node(node, policy)
+{
+ if (node.tagName === "SCRIPT")
+ sanitize_script(node, policy);
+
+ if (node.tagName === "HEAD")
+ inject_csp(node, policy);
+
+ if (node.tagName === "META")
+ sanitize_http_equiv_csp_rule(node, policy);
+
+ if (!policy.allow)
+ sanitize_attributes(node, policy);
+}
+
+const serializer = new XMLSerializer();
+
+function start_node(node, data)
+{
+ if (!data.writer)
+ return;
+
+ node.hachette_started = true;
+ const clone = node.cloneNode(false);
+ clone.textContent = data.uniq;
+ data.writer(data.uniq_reg.exec(clone.outerHTML)[1]);
+}
+
+function finish_node(node, data)
+{
+ const nodes_to_process = [node];
+
+ while (true) {
+ node = nodes_to_process.pop();
+ if (!node)
+ break;
+
+ nodes_to_process.push(node, node.hachette_last_added);
+ }
+
+ while (nodes_to_process.length > 0) {
+ const node = nodes_to_process.pop();
+ node.remove();
+
+ if (!data.writer)
+ continue;
+
+ if (node.hachette_started) {
+ node.textContent = data.uniq;
+ data.writer(data.uniq_reg.exec(node.outerHTML)[2]);
+ continue;
+ }
+
+ data.writer(node.outerHTML || serializer.serializeToString(node));
+ }
+}
+
+/*
+ * Important! Due to some weirdness node.parentElement is not alway correct
+ * under Chromium. Track node relations manually.
+ */
+function handle_added_node(node, true_parent, data)
+{
+ if (node.hachette_ignore || true_parent.hachette_ignore)
+ return;
+
+ if (!true_parent.hachette_started)
+ start_node(true_parent, data)
+
+ sanitize_node(node, data.policy);
+
+ if (data.node_eater)
+ data.node_eater(node, true_parent);
+
+ finish_node(true_parent.hachette_last_added, data);
+
+ true_parent.hachette_last_added = node;
+}
+
+function handle_mutation(mutations, data)
+{
+ /*
+ * Chromium: for an unknown reason mutation.target is not always the same as
+ * node.parentElement. The former is the correct one.
+ */
+ for (const mutation of mutations) {
+ for (const node of mutation.addedNodes)
+ handle_added_node(node, mutation.target, data);
+ }
+}
+
+function finish_processing(data)
+{
+ handle_mutation(data.observer.takeRecords(), data);
+ finish_node(data.html_element, data);
+ data.observer.disconnect();
+}
+
+function modify_on_the_fly(html_element, policy, consumers)
+{
+ const uniq = gen_nonce();
+ const uniq_reg = new RegExp(`^(.*)${uniq}(.*)$`);
+ const data = {policy, html_element, uniq, uniq_reg, ...consumers};
+
+ start_node(data.html_element, data);
+
+ var observer = new MutationObserver(m => handle_mutation(m, data));
+ observer.observe(data.html_element, {
+ attributes: true,
+ childList: true,
+ subtree: true
+ });
+
+ data.observer = observer;
+
+ return () => finish_processing(data);
+}
+
+/*
+ * EXPORTS_START
+ * EXPORT modify_on_the_fly
+ * EXPORTS_END
+ */