summaryrefslogtreecommitdiff
path: root/content
diff options
context:
space:
mode:
Diffstat (limited to 'content')
-rw-r--r--content/main.js99
-rw-r--r--content/sanitize_document.js209
2 files changed, 226 insertions, 82 deletions
diff --git a/content/main.js b/content/main.js
index 164ebe3..441636c 100644
--- a/content/main.js
+++ b/content/main.js
@@ -11,87 +11,24 @@
* IMPORT handle_page_actions
* IMPORT extract_signed
* IMPORT gen_nonce
- * IMPORT csp_rule
* IMPORT is_privileged_url
- * IMPORT sanitize_attributes
* IMPORT mozilla_suppress_scripts
* IMPORT is_chrome
* IMPORT is_mozilla
* IMPORT start_activity_info_server
+ * IMPORT modify_on_the_fly
* IMPORTS_END
*/
-/*
- * Due to some technical limitations the chosen method of whitelisting sites
- * is to smuggle whitelist indicator in page's url as a "magical" string
- * after '#'. Right now this is only supplemental in HTTP(s) pages where
- * blocking of native scripts also happens through CSP header injection but is
- * necessary for protocols like ftp:// and file://.
- *
- * The code that actually injects the magical string into ftp:// and file://
- * urls has not yet been added to the extension.
- */
-
-var nonce = undefined;
-
-function handle_mutation(mutations, observer)
-{
- if (document.readyState === 'complete') {
- console.log("mutation handling complete");
- observer.disconnect();
- return;
- }
- for (const mutation of mutations) {
- for (const node of mutation.addedNodes)
- block_node(node);
- }
-}
-
-function block_nodes_recursively(node)
-{
- block_node(node);
- for (const child of node.children)
- block_nodes_recursively(child);
-}
-
-function block_node(node)
+function accept_node(node, parent)
{
+ const clone = document.importNode(node, false);
+ node.hachette_corresponding = clone;
/*
- * Modifying <script> element doesn't always prevent its execution in some
- * Mozilla browsers. This is Chromium-specific code.
+ * TODO: Stop page's own issues like "Error parsing a meta element's
+ * content:" from appearing as extension's errors.
*/
- if (node.tagName === "SCRIPT") {
- block_script(node);
- return;
- }
-
- sanitize_attributes(node);
-
- if (node.tagName === "HEAD")
- inject_csp(node);
-}
-
-function block_script(node)
-{
- /*
- * Disabling scripts this way allows them to still be relatively
- * easily accessed in case they contain some useful data.
- */
- if (node.hasAttribute("type"))
- node.setAttribute("blocked-type", node.getAttribute("type"));
- node.setAttribute("type", "application/json");
-}
-
-function inject_csp(head)
-{
- let meta = document.createElement("meta");
- meta.setAttribute("http-equiv", "Content-Security-Policy");
- meta.setAttribute("content", csp_rule(nonce));
-
- if (head.firstElementChild === null)
- head.appendChild(meta);
- else
- head.insertBefore(meta, head.firstElementChild);
+ parent.hachette_corresponding.appendChild(clone);
}
if (!is_privileged_url(document.URL)) {
@@ -110,20 +47,18 @@ if (!is_privileged_url(document.URL)) {
handle_page_actions(policy.nonce);
- if (!policy.allow) {
- block_nodes_recursively(document.documentElement);
+ if (!policy.allow && is_mozilla)
+ addEventListener('beforescriptexecute', mozilla_suppress_scripts, true);
- if (is_chrome) {
- var observer = new MutationObserver(handle_mutation);
- observer.observe(document.documentElement, {
- attributes: true,
- childList: true,
- subtree: true
- });
- }
+ if (!policy.allow && is_chrome) {
+ const old_html = document.documentElement;
+ const new_html = document.createElement("html");
+ old_html.replaceWith(new_html);
+ old_html.hachette_corresponding = new_html;
- if (is_mozilla)
- addEventListener('beforescriptexecute', mozilla_suppress_scripts, true);
+ const modify_end =
+ modify_on_the_fly(old_html, policy, {node_eater: accept_node});
+ document.addEventListener("DOMContentLoaded", modify_end);
}
start_activity_info_server();
diff --git a/content/sanitize_document.js b/content/sanitize_document.js
new file mode 100644
index 0000000..1533526
--- /dev/null
+++ b/content/sanitize_document.js
@@ -0,0 +1,209 @@
+/**
+ * Hachette modify HTML document as it loads and reconstruct HTML code from it
+ *
+ * Copyright (C) 2021 Wojtek Kosior
+ * Redistribution terms are gathered in the `copyright' file.
+ */
+
+/*
+ * IMPORTS_START
+ * IMPORT gen_nonce
+ * IMPORT csp_rule
+ * IMPORT is_csp_header_name
+ * IMPORT sanitize_csp_header
+ * IMPORT sanitize_attributes
+ * IMPORTS_END
+ */
+
+/*
+ * Functions that sanitize elements. The script blocking measures are, when
+ * possible, going to be applied together with CSP rules injected using
+ * webRequest.
+ */
+
+const blocked = "blocked";
+
+function block_attribute(node, attr)
+{
+ /*
+ * Disabling attributed this way allows them to still be relatively
+ * easily accessed in case they contain some useful data.
+ */
+
+ const construct_name = [attr];
+ while (node.hasAttribute(construct_name.join("")))
+ construct_name.unshift(blocked);
+
+ while (construct_name.length > 1) {
+ construct_name.shift();
+ const name = construct_name.join("");
+ node.setAttribute(`${blocked}-${name}`, node.getAttribute(name));
+ }
+
+ node.removeAttribute(attr);
+}
+
+function sanitize_script(script, policy)
+{
+ if (policy.allow)
+ return;
+
+ block_attribute(script, "type");
+ script.setAttribute("type", "application/json");
+}
+
+function inject_csp(head, policy)
+{
+ if (policy.allow)
+ return;
+
+ const meta = document.createElement("meta");
+ meta.setAttribute("http-equiv", "Content-Security-Policy");
+ meta.setAttribute("content", csp_rule(policy.nonce));
+ meta.hachette_ignore = true;
+ head.prepend(meta);
+}
+
+function sanitize_http_equiv_csp_rule(meta, policy)
+{
+ const http_equiv = meta.getAttribute("http-equiv");
+
+ if (!is_csp_header_name(http_equiv, !policy.allow))
+ return;
+
+ if (policy.allow || is_csp_header_name(http_equiv, false)) {
+ let value = meta.getAttribute("content");
+ block_attribute(meta, "content");
+ if (value) {
+ value = sanitize_csp_header({value}, policy).value;
+ meta.setAttribute("content", value);
+ }
+ return;
+ }
+
+ block_attribute(meta, "http-equiv");
+}
+
+function sanitize_node(node, policy)
+{
+ if (node.tagName === "SCRIPT")
+ sanitize_script(node, policy);
+
+ if (node.tagName === "HEAD")
+ inject_csp(node, policy);
+
+ if (node.tagName === "META")
+ sanitize_http_equiv_csp_rule(node, policy);
+
+ if (!policy.allow)
+ sanitize_attributes(node, policy);
+}
+
+const serializer = new XMLSerializer();
+
+function start_node(node, data)
+{
+ if (!data.writer)
+ return;
+
+ node.hachette_started = true;
+ const clone = node.cloneNode(false);
+ clone.textContent = data.uniq;
+ data.writer(data.uniq_reg.exec(clone.outerHTML)[1]);
+}
+
+function finish_node(node, data)
+{
+ const nodes_to_process = [node];
+
+ while (true) {
+ node = nodes_to_process.pop();
+ if (!node)
+ break;
+
+ nodes_to_process.push(node, node.hachette_last_added);
+ }
+
+ while (nodes_to_process.length > 0) {
+ const node = nodes_to_process.pop();
+ node.remove();
+
+ if (!data.writer)
+ continue;
+
+ if (node.hachette_started) {
+ node.textContent = data.uniq;
+ data.writer(data.uniq_reg.exec(node.outerHTML)[2]);
+ continue;
+ }
+
+ data.writer(node.outerHTML || serializer.serializeToString(node));
+ }
+}
+
+/*
+ * Important! Due to some weirdness node.parentElement is not alway correct
+ * under Chromium. Track node relations manually.
+ */
+function handle_added_node(node, true_parent, data)
+{
+ if (node.hachette_ignore || true_parent.hachette_ignore)
+ return;
+
+ if (!true_parent.hachette_started)
+ start_node(true_parent, data)
+
+ sanitize_node(node, data.policy);
+
+ if (data.node_eater)
+ data.node_eater(node, true_parent);
+
+ finish_node(true_parent.hachette_last_added, data);
+
+ true_parent.hachette_last_added = node;
+}
+
+function handle_mutation(mutations, data)
+{
+ /*
+ * Chromium: for an unknown reason mutation.target is not always the same as
+ * node.parentElement. The former is the correct one.
+ */
+ for (const mutation of mutations) {
+ for (const node of mutation.addedNodes)
+ handle_added_node(node, mutation.target, data);
+ }
+}
+
+function finish_processing(data)
+{
+ handle_mutation(data.observer.takeRecords(), data);
+ finish_node(data.html_element, data);
+ data.observer.disconnect();
+}
+
+function modify_on_the_fly(html_element, policy, consumers)
+{
+ const uniq = gen_nonce();
+ const uniq_reg = new RegExp(`^(.*)${uniq}(.*)$`);
+ const data = {policy, html_element, uniq, uniq_reg, ...consumers};
+
+ start_node(data.html_element, data);
+
+ var observer = new MutationObserver(m => handle_mutation(m, data));
+ observer.observe(data.html_element, {
+ attributes: true,
+ childList: true,
+ subtree: true
+ });
+
+ data.observer = observer;
+
+ return () => finish_processing(data);
+}
+
+/*
+ * EXPORTS_START
+ * EXPORT modify_on_the_fly
+ * EXPORTS_END
+ */