aboutsummaryrefslogtreecommitdiff
path: root/content/sanitize_document.js
diff options
context:
space:
mode:
Diffstat (limited to 'content/sanitize_document.js')
-rw-r--r--content/sanitize_document.js209
1 files changed, 209 insertions, 0 deletions
diff --git a/content/sanitize_document.js b/content/sanitize_document.js
new file mode 100644
index 0000000..1533526
--- /dev/null
+++ b/content/sanitize_document.js
@@ -0,0 +1,209 @@
+/**
+ * Hachette modify HTML document as it loads and reconstruct HTML code from it
+ *
+ * Copyright (C) 2021 Wojtek Kosior
+ * Redistribution terms are gathered in the `copyright' file.
+ */
+
+/*
+ * IMPORTS_START
+ * IMPORT gen_nonce
+ * IMPORT csp_rule
+ * IMPORT is_csp_header_name
+ * IMPORT sanitize_csp_header
+ * IMPORT sanitize_attributes
+ * IMPORTS_END
+ */
+
+/*
+ * Functions that sanitize elements. The script blocking measures are, when
+ * possible, going to be applied together with CSP rules injected using
+ * webRequest.
+ */
+
+const blocked = "blocked";
+
+function block_attribute(node, attr)
+{
+ /*
+ * Disabling attributed this way allows them to still be relatively
+ * easily accessed in case they contain some useful data.
+ */
+
+ const construct_name = [attr];
+ while (node.hasAttribute(construct_name.join("")))
+ construct_name.unshift(blocked);
+
+ while (construct_name.length > 1) {
+ construct_name.shift();
+ const name = construct_name.join("");
+ node.setAttribute(`${blocked}-${name}`, node.getAttribute(name));
+ }
+
+ node.removeAttribute(attr);
+}
+
+function sanitize_script(script, policy)
+{
+ if (policy.allow)
+ return;
+
+ block_attribute(script, "type");
+ script.setAttribute("type", "application/json");
+}
+
+function inject_csp(head, policy)
+{
+ if (policy.allow)
+ return;
+
+ const meta = document.createElement("meta");
+ meta.setAttribute("http-equiv", "Content-Security-Policy");
+ meta.setAttribute("content", csp_rule(policy.nonce));
+ meta.hachette_ignore = true;
+ head.prepend(meta);
+}
+
+function sanitize_http_equiv_csp_rule(meta, policy)
+{
+ const http_equiv = meta.getAttribute("http-equiv");
+
+ if (!is_csp_header_name(http_equiv, !policy.allow))
+ return;
+
+ if (policy.allow || is_csp_header_name(http_equiv, false)) {
+ let value = meta.getAttribute("content");
+ block_attribute(meta, "content");
+ if (value) {
+ value = sanitize_csp_header({value}, policy).value;
+ meta.setAttribute("content", value);
+ }
+ return;
+ }
+
+ block_attribute(meta, "http-equiv");
+}
+
+function sanitize_node(node, policy)
+{
+ if (node.tagName === "SCRIPT")
+ sanitize_script(node, policy);
+
+ if (node.tagName === "HEAD")
+ inject_csp(node, policy);
+
+ if (node.tagName === "META")
+ sanitize_http_equiv_csp_rule(node, policy);
+
+ if (!policy.allow)
+ sanitize_attributes(node, policy);
+}
+
+const serializer = new XMLSerializer();
+
+function start_node(node, data)
+{
+ if (!data.writer)
+ return;
+
+ node.hachette_started = true;
+ const clone = node.cloneNode(false);
+ clone.textContent = data.uniq;
+ data.writer(data.uniq_reg.exec(clone.outerHTML)[1]);
+}
+
+function finish_node(node, data)
+{
+ const nodes_to_process = [node];
+
+ while (true) {
+ node = nodes_to_process.pop();
+ if (!node)
+ break;
+
+ nodes_to_process.push(node, node.hachette_last_added);
+ }
+
+ while (nodes_to_process.length > 0) {
+ const node = nodes_to_process.pop();
+ node.remove();
+
+ if (!data.writer)
+ continue;
+
+ if (node.hachette_started) {
+ node.textContent = data.uniq;
+ data.writer(data.uniq_reg.exec(node.outerHTML)[2]);
+ continue;
+ }
+
+ data.writer(node.outerHTML || serializer.serializeToString(node));
+ }
+}
+
+/*
+ * Important! Due to some weirdness node.parentElement is not alway correct
+ * under Chromium. Track node relations manually.
+ */
+function handle_added_node(node, true_parent, data)
+{
+ if (node.hachette_ignore || true_parent.hachette_ignore)
+ return;
+
+ if (!true_parent.hachette_started)
+ start_node(true_parent, data)
+
+ sanitize_node(node, data.policy);
+
+ if (data.node_eater)
+ data.node_eater(node, true_parent);
+
+ finish_node(true_parent.hachette_last_added, data);
+
+ true_parent.hachette_last_added = node;
+}
+
+function handle_mutation(mutations, data)
+{
+ /*
+ * Chromium: for an unknown reason mutation.target is not always the same as
+ * node.parentElement. The former is the correct one.
+ */
+ for (const mutation of mutations) {
+ for (const node of mutation.addedNodes)
+ handle_added_node(node, mutation.target, data);
+ }
+}
+
+function finish_processing(data)
+{
+ handle_mutation(data.observer.takeRecords(), data);
+ finish_node(data.html_element, data);
+ data.observer.disconnect();
+}
+
+function modify_on_the_fly(html_element, policy, consumers)
+{
+ const uniq = gen_nonce();
+ const uniq_reg = new RegExp(`^(.*)${uniq}(.*)$`);
+ const data = {policy, html_element, uniq, uniq_reg, ...consumers};
+
+ start_node(data.html_element, data);
+
+ var observer = new MutationObserver(m => handle_mutation(m, data));
+ observer.observe(data.html_element, {
+ attributes: true,
+ childList: true,
+ subtree: true
+ });
+
+ data.observer = observer;
+
+ return () => finish_processing(data);
+}
+
+/*
+ * EXPORTS_START
+ * EXPORT modify_on_the_fly
+ * EXPORTS_END
+ */