diff options
author | Wojtek Kosior <koszko@koszko.org> | 2021-08-23 11:05:51 +0200 |
---|---|---|
committer | Wojtek Kosior <koszko@koszko.org> | 2021-08-23 11:05:51 +0200 |
commit | 6b53d6c840140fc5df6d7638808b978d96502a35 (patch) | |
tree | 87a76576824c49b65b1aa9f4213b63926ba7d243 /content/sanitize_document.js | |
parent | d09b7ee10541b5a81430d2e11abb3a9a09643ade (diff) | |
download | browser-extension-6b53d6c840140fc5df6d7638808b978d96502a35.tar.gz browser-extension-6b53d6c840140fc5df6d7638808b978d96502a35.zip |
use StreamFilter under Mozilla to prevent csp <meta> tags from blocking our injected scripts
Diffstat (limited to 'content/sanitize_document.js')
-rw-r--r-- | content/sanitize_document.js | 229 |
1 files changed, 182 insertions, 47 deletions
diff --git a/content/sanitize_document.js b/content/sanitize_document.js index 1533526..727bb6c 100644 --- a/content/sanitize_document.js +++ b/content/sanitize_document.js @@ -43,76 +43,100 @@ function block_attribute(node, attr) node.removeAttribute(attr); } -function sanitize_script(script, policy) +function sanitize_script(script, data) { - if (policy.allow) + if (script.getAttribute("data-hachette-deleteme") === data.policy.nonce) { + script.remove(); + script.hachette_deleted = true; + script.hachette_ignore = true; + } + + if (data.policy.allow) return; block_attribute(script, "type"); script.setAttribute("type", "application/json"); } -function inject_csp(head, policy) +function inject_csp(head, data) { - if (policy.allow) + if (data.policy.allow) return; const meta = document.createElement("meta"); meta.setAttribute("http-equiv", "Content-Security-Policy"); - meta.setAttribute("content", csp_rule(policy.nonce)); + meta.setAttribute("content", csp_rule(data.policy.nonce)); meta.hachette_ignore = true; head.prepend(meta); + + data.new_added.unshift([meta, head]); } -function sanitize_http_equiv_csp_rule(meta, policy) +function sanitize_http_equiv_csp_rule(meta, data) { const http_equiv = meta.getAttribute("http-equiv"); + const value = meta.content; - if (!is_csp_header_name(http_equiv, !policy.allow)) + if (!value || !is_csp_header_name(http_equiv, !data.policy.allow)) return; - if (policy.allow || is_csp_header_name(http_equiv, false)) { - let value = meta.getAttribute("content"); - block_attribute(meta, "content"); - if (value) { - value = sanitize_csp_header({value}, policy).value; - meta.setAttribute("content", value); - } - return; - } + block_attribute(meta, "content"); - block_attribute(meta, "http-equiv"); + if (data.policy.allow || is_csp_header_name(http_equiv, false)) + meta.content = sanitize_csp_header({value}, data.policy).value; } -function sanitize_node(node, policy) +function sanitize_node(node, data) { if (node.tagName === "SCRIPT") - sanitize_script(node, policy); + sanitize_script(node, data); if (node.tagName === "HEAD") - inject_csp(node, policy); + inject_csp(node, data); if (node.tagName === "META") - sanitize_http_equiv_csp_rule(node, policy); + sanitize_http_equiv_csp_rule(node, data); + + if (!data.policy.allow) + sanitize_attributes(node, data); +} - if (!policy.allow) - sanitize_attributes(node, policy); +/* + * Instead of calling writer directly with multiple small chunks of reconstruced + * HTML code, we utilize `setTimeout()' to only have it called once, + * asynchronously. + */ +function do_write_callback(data) +{ + data.writer(data.chunks.join("")); + data.chunks = []; + + if (data.finished && data.finisher) + data.finisher(); +} + +function do_write(chunk, data) +{ + data.chunks.push(chunk); + clearTimeout(data.write_timeout); + data.write_timeout = setTimeout(() => do_write_callback(data), 0); } const serializer = new XMLSerializer(); -function start_node(node, data) +function start_serializing_node(node, data) { + node.hachette_started = true; + if (!data.writer) return; - node.hachette_started = true; const clone = node.cloneNode(false); clone.textContent = data.uniq; - data.writer(data.uniq_reg.exec(clone.outerHTML)[1]); + do_write(data.uniq_reg.exec(clone.outerHTML)[1], data); } -function finish_node(node, data) +function finish_serializing_node(node, data) { const nodes_to_process = [node]; @@ -127,40 +151,103 @@ function finish_node(node, data) while (nodes_to_process.length > 0) { const node = nodes_to_process.pop(); node.remove(); + node.hachette_ignore = true; if (!data.writer) continue; if (node.hachette_started) { node.textContent = data.uniq; - data.writer(data.uniq_reg.exec(node.outerHTML)[2]); + do_write(data.uniq_reg.exec(node.outerHTML)[2], data); + continue; + } + + do_write(node.outerHTML || serializer.serializeToString(node), data); + } +} + +function process_initial_nodes(node, data) +{ + if (data.processed_initial_nodes) + return; + + data.processed_initial_nodes = true; + + start_serializing_node(data.html_root, data); + + const new_added = []; + const nodes_to_process = [data.html_root]; + + let i = 0; + while (nodes_to_process.length > 0) { + let current = nodes_to_process.shift(); + + if (current.firstChild) { + if (current.firstChild === node) + break; + nodes_to_process.unshift(current.firstChild, current); + new_added.push([current.firstChild, current]); continue; } - data.writer(node.outerHTML || serializer.serializeToString(node)); + while (current && !current.nextSibling) + current = nodes_to_process.shift(); + + if (!current || current.nextSibling === node) + break; + + nodes_to_process.unshift(current.nextSibling); + new_added.push([current.nextSibling, nodes_to_process[1]]); } + + data.new_added.unshift(...new_added); } /* * Important! Due to some weirdness node.parentElement is not alway correct - * under Chromium. Track node relations manually. + * in MutationRecords under Chromium. Track node relations manually. */ function handle_added_node(node, true_parent, data) { - if (node.hachette_ignore || true_parent.hachette_ignore) - return; + /* + * Functions we call here might cause new nodes to be injected or found + * that require processing before the one we got in function argument. + * We rely on those functions putting the node(s) they create/find at the + * very beginning of the `new_added' queue and (for created nodes) setting + * their `hachette_ignore' property, based on which their MutationRecord + * will not be processed. A function can also mark a node already in the + * `new_added' queue as not eligible for processing by setting its + * `hachette_deleted' property. + */ - if (!true_parent.hachette_started) - start_node(true_parent, data) + process_initial_nodes(node, data); - sanitize_node(node, data.policy); + data.new_added.push([node, true_parent]); - if (data.node_eater) - data.node_eater(node, true_parent); + while (data.new_added.length > 0) { + [node, true_parent] = data.new_added.shift(); - finish_node(true_parent.hachette_last_added, data); + if (true_parent.hachette_deleted) + node.hachette_deleted = true; + if (node.hachette_deleted) + continue; + + if (!true_parent.hachette_started) + start_serializing_node(true_parent, data) + + if (!node.hachette_ignore) + sanitize_node(node, data); + + if (node.hachette_deleted) + continue; + + if (data.node_eater) + data.node_eater(node, true_parent); - true_parent.hachette_last_added = node; + finish_serializing_node(true_parent.hachette_last_added, data); + + true_parent.hachette_last_added = node; + } } function handle_mutation(mutations, data) @@ -170,28 +257,76 @@ function handle_mutation(mutations, data) * node.parentElement. The former is the correct one. */ for (const mutation of mutations) { - for (const node of mutation.addedNodes) + for (const node of mutation.addedNodes) { + /* Check for nodes added by ourselves. */ + if (mutation.target.hachette_ignore) + node.hachette_ignore = true; + if (node.hachette_ignore) + continue; + handle_added_node(node, mutation.target, data); + } } } function finish_processing(data) { + process_initial_nodes(undefined, data); + + /* + * The `finisher' callback should be called, if provided. Normally our + * function that performs the last write does it after seeing `finished' + * set to `true'. If, however, there's no `writer' callback and hence no + * writes to perform, we need to take care of calling `finisher' here. + */ + data.finished = true; handle_mutation(data.observer.takeRecords(), data); - finish_node(data.html_element, data); data.observer.disconnect(); + + /* + * Additional whitespace that was after `</body>' gets appended to body. + * Although it's a minor issue, it is not what we want. There's no way to + * tell exactly what part of that whitespace was after `</body>' and what + * was before, so we just replace it with a single newline which looks good + * when printed. + */ + const body = data.html_root.lastChild; + const text = body && body.tagName === "BODY" && body.lastChild; + if (text && text.nodeName === "#text") { + const new_content = /^([\S\s]*\S)?\s*$/.exec(text.textContent)[1] || ""; + text.textContent = new_content + "\n"; + } + + finish_serializing_node(data.html_root, data); + if (!data.writer && data.finisher) + setTimeout(data.finisher, 0); } -function modify_on_the_fly(html_element, policy, consumers) +/* + * This function sanitizes `html_root' according to `policy'. It is capable of + * working on an HTML document that is being written to, sanitizing new nodes + * as they appear. + * + * `consumers' object may contain 3 optional callback functions: `writer', + * `node_eater' and `finisher'. The first one, if present, is called with chunks + * of reconstructed HTML code. The second one, if present, gets called for every + * added node with 2 arguments: that node and its parent. The third one is + * called at the end, after all processing has been done. + * + * `modify_on_the_fly()' returns a callback that should be called (with no + * arguments) once the document of html_root has finished being written to. + * Unfortunately, due to specifics behavior of document that has had its + * documentElement replaced + */ +function modify_on_the_fly(html_root, policy, consumers) { const uniq = gen_nonce(); - const uniq_reg = new RegExp(`^(.*)${uniq}(.*)$`); - const data = {policy, html_element, uniq, uniq_reg, ...consumers}; - - start_node(data.html_element, data); + const uniq_reg = new RegExp(`^([\\s\\S]*)${uniq}([\\s\\S]*)$`); + const data = {policy, html_root, uniq, uniq_reg, chunks: [], new_added: []}; + Object.assign(data, consumers); var observer = new MutationObserver(m => handle_mutation(m, data)); - observer.observe(data.html_element, { + observer.observe(data.html_root, { attributes: true, childList: true, subtree: true |