From d09b7ee10541b5a81430d2e11abb3a9a09643ade Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Fri, 20 Aug 2021 12:57:48 +0200 Subject: sanitize `' tags containing CSP rules under Chromium This commit adds a mechanism of hijacking document when it loads and injecting sanitized nodes to the DOM from the level of content script. --- content/sanitize_document.js | 209 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 209 insertions(+) create mode 100644 content/sanitize_document.js (limited to 'content/sanitize_document.js') diff --git a/content/sanitize_document.js b/content/sanitize_document.js new file mode 100644 index 0000000..1533526 --- /dev/null +++ b/content/sanitize_document.js @@ -0,0 +1,209 @@ +/** + * Hachette modify HTML document as it loads and reconstruct HTML code from it + * + * Copyright (C) 2021 Wojtek Kosior + * Redistribution terms are gathered in the `copyright' file. + */ + +/* + * IMPORTS_START + * IMPORT gen_nonce + * IMPORT csp_rule + * IMPORT is_csp_header_name + * IMPORT sanitize_csp_header + * IMPORT sanitize_attributes + * IMPORTS_END + */ + +/* + * Functions that sanitize elements. The script blocking measures are, when + * possible, going to be applied together with CSP rules injected using + * webRequest. + */ + +const blocked = "blocked"; + +function block_attribute(node, attr) +{ + /* + * Disabling attributed this way allows them to still be relatively + * easily accessed in case they contain some useful data. + */ + + const construct_name = [attr]; + while (node.hasAttribute(construct_name.join(""))) + construct_name.unshift(blocked); + + while (construct_name.length > 1) { + construct_name.shift(); + const name = construct_name.join(""); + node.setAttribute(`${blocked}-${name}`, node.getAttribute(name)); + } + + node.removeAttribute(attr); +} + +function sanitize_script(script, policy) +{ + if (policy.allow) + return; + + block_attribute(script, "type"); + script.setAttribute("type", "application/json"); +} + +function inject_csp(head, policy) +{ + if (policy.allow) + return; + + const meta = document.createElement("meta"); + meta.setAttribute("http-equiv", "Content-Security-Policy"); + meta.setAttribute("content", csp_rule(policy.nonce)); + meta.hachette_ignore = true; + head.prepend(meta); +} + +function sanitize_http_equiv_csp_rule(meta, policy) +{ + const http_equiv = meta.getAttribute("http-equiv"); + + if (!is_csp_header_name(http_equiv, !policy.allow)) + return; + + if (policy.allow || is_csp_header_name(http_equiv, false)) { + let value = meta.getAttribute("content"); + block_attribute(meta, "content"); + if (value) { + value = sanitize_csp_header({value}, policy).value; + meta.setAttribute("content", value); + } + return; + } + + block_attribute(meta, "http-equiv"); +} + +function sanitize_node(node, policy) +{ + if (node.tagName === "SCRIPT") + sanitize_script(node, policy); + + if (node.tagName === "HEAD") + inject_csp(node, policy); + + if (node.tagName === "META") + sanitize_http_equiv_csp_rule(node, policy); + + if (!policy.allow) + sanitize_attributes(node, policy); +} + +const serializer = new XMLSerializer(); + +function start_node(node, data) +{ + if (!data.writer) + return; + + node.hachette_started = true; + const clone = node.cloneNode(false); + clone.textContent = data.uniq; + data.writer(data.uniq_reg.exec(clone.outerHTML)[1]); +} + +function finish_node(node, data) +{ + const nodes_to_process = [node]; + + while (true) { + node = nodes_to_process.pop(); + if (!node) + break; + + nodes_to_process.push(node, node.hachette_last_added); + } + + while (nodes_to_process.length > 0) { + const node = nodes_to_process.pop(); + node.remove(); + + if (!data.writer) + continue; + + if (node.hachette_started) { + node.textContent = data.uniq; + data.writer(data.uniq_reg.exec(node.outerHTML)[2]); + continue; + } + + data.writer(node.outerHTML || serializer.serializeToString(node)); + } +} + +/* + * Important! Due to some weirdness node.parentElement is not alway correct + * under Chromium. Track node relations manually. + */ +function handle_added_node(node, true_parent, data) +{ + if (node.hachette_ignore || true_parent.hachette_ignore) + return; + + if (!true_parent.hachette_started) + start_node(true_parent, data) + + sanitize_node(node, data.policy); + + if (data.node_eater) + data.node_eater(node, true_parent); + + finish_node(true_parent.hachette_last_added, data); + + true_parent.hachette_last_added = node; +} + +function handle_mutation(mutations, data) +{ + /* + * Chromium: for an unknown reason mutation.target is not always the same as + * node.parentElement. The former is the correct one. + */ + for (const mutation of mutations) { + for (const node of mutation.addedNodes) + handle_added_node(node, mutation.target, data); + } +} + +function finish_processing(data) +{ + handle_mutation(data.observer.takeRecords(), data); + finish_node(data.html_element, data); + data.observer.disconnect(); +} + +function modify_on_the_fly(html_element, policy, consumers) +{ + const uniq = gen_nonce(); + const uniq_reg = new RegExp(`^(.*)${uniq}(.*)$`); + const data = {policy, html_element, uniq, uniq_reg, ...consumers}; + + start_node(data.html_element, data); + + var observer = new MutationObserver(m => handle_mutation(m, data)); + observer.observe(data.html_element, { + attributes: true, + childList: true, + subtree: true + }); + + data.observer = observer; + + return () => finish_processing(data); +} + +/* + * EXPORTS_START + * EXPORT modify_on_the_fly + * EXPORTS_END + */ -- cgit v1.2.3