diff options
author | Wojtek Kosior <koszko@koszko.org> | 2021-09-08 19:55:33 +0200 |
---|---|---|
committer | Wojtek Kosior <koszko@koszko.org> | 2021-09-08 19:55:33 +0200 |
commit | e2d26bad35bbe3876862b482f7963d713238313b (patch) | |
tree | 47e6647c239e8f85fa764cfa5750e0f73e1efd74 | |
parent | 704f2da0673dc714f72b9bb82f6bf648795d4335 (diff) | |
download | browser-extension-e2d26bad35bbe3876862b482f7963d713238313b.tar.gz browser-extension-e2d26bad35bbe3876862b482f7963d713238313b.zip |
Fix sanitizing of non-HTML XMLDocument's
-rwxr-xr-x | build.sh | 4 | ||||
-rw-r--r-- | common/misc.js | 4 | ||||
-rw-r--r-- | content/activity_info_server.js | 6 | ||||
-rw-r--r-- | content/main.js | 195 | ||||
-rw-r--r-- | content/page_actions.js | 6 | ||||
-rw-r--r-- | html/display-panel.js | 4 |
6 files changed, 109 insertions, 110 deletions
@@ -201,9 +201,7 @@ main() { if [ "$BROWSER" = "chromium" ]; then CHROMIUM_KEY="$(dd if=/dev/urandom bs=32 count=1 2>/dev/null | base64)" - echo "chromium key is" $CHROMIUM_KEY - CHROMIUM_KEY="chromium-key-dummy-file-$CHROMIUM_KEY" - CHROMIUM_KEY=$(echo $CHROMIUM_KEY | tr / -); + CHROMIUM_KEY=$(echo chromium-key-dummy-file-$CHROMIUM_KEY | tr / -) touch $BUILDDIR/$CHROMIUM_KEY CHROMIUM_KEY="\n\ diff --git a/common/misc.js b/common/misc.js index 91d60d2..6adaf1e 100644 --- a/common/misc.js +++ b/common/misc.js @@ -36,9 +36,9 @@ function Uint8toHex(data) return returnValue; } -function gen_nonce(length) // Default 16 +function gen_nonce(length=16) { - let randomData = new Uint8Array(length || 16); + let randomData = new Uint8Array(length); crypto.getRandomValues(randomData); return Uint8toHex(randomData); } diff --git a/content/activity_info_server.js b/content/activity_info_server.js index beecb1a..1b69703 100644 --- a/content/activity_info_server.js +++ b/content/activity_info_server.js @@ -44,9 +44,9 @@ function report_settings(settings) report_activity("settings", settings); } -function report_content_type(content_type) +function report_document_type(is_html) { - report_activity("content_type", content_type); + report_activity("is_html", is_html); } function report_repo_query_action(update, port) @@ -96,6 +96,6 @@ function start_activity_info_server() * EXPORT start_activity_info_server * EXPORT report_script * EXPORT report_settings - * EXPORT report_content_type + * EXPORT report_document_type * EXPORTS_END */ diff --git a/content/main.js b/content/main.js index a183913..fb334dd 100644 --- a/content/main.js +++ b/content/main.js @@ -22,6 +22,12 @@ * IMPORTS_END */ +document.content_loaded = document.readyState === "complete"; +const wait_loaded = e => e.content_loaded ? Promise.resolve() : + new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true})); + +wait_loaded(document).then(() => document.content_loaded = true); + function extract_cookie_policy(cookie, min_time) { let best_result = {time: -1}; @@ -86,18 +92,17 @@ function employ_nonhttp_policy(policy) } /* + * In the case of HTML documents: * 1. When injecting some payload we need to sanitize <meta> CSP tags before * they reach the document. * 2. Only <meta> tags inside <head> are considered valid by the browser and * need to be considered. * 3. We want to detach <html> from document, wait until its <head> completes * loading, sanitize it and re-attach <html>. - * 4. Browsers are eager to add <meta>'s that appear after `</head>' but before - * `<body>'. Due to this behavior the `DOMContentLoaded' event is considered - * unreliable (although it could still work properly, it is just problematic - * to verify). - * 5. We shall wait for anything to appear in or after <body> and take that as - * a sign <head> has _really_ finished loading. + * 4. We shall wait for anything to appear in or after <body> and take that as + * a sign <head> has finished loading. + * 5. Otherwise, getting the `DOMContentLoaded' event on the document shall also + * be a sign that <head> is fully loaded. */ function make_body_start_observer(DOM_element, waiting) @@ -123,8 +128,10 @@ function try_body_started(waiting) function finish_waiting(waiting) { + if (waiting.finished) + return; + waiting.finished = true; waiting.observers.forEach(observer => observer.disconnect()); - waiting.doc.removeEventListener("DOMContentLoaded", waiting.loaded_cb); setTimeout(waiting.callback, 0); } @@ -132,19 +139,12 @@ function _wait_for_head(doc, detached_html, callback) { const waiting = {doc, detached_html, callback, observers: []}; - /* - * For XML and SVG documents, instead of waiting for `<head>', we wait - * for the entire document to finish loading. - */ - if (doc instanceof HTMLDocument) { - if (try_body_started(waiting)) - return; + if (try_body_started(waiting)) + return; - waiting.observers = [make_body_start_observer(detached_html, waiting)]; - } + waiting.observers = [make_body_start_observer(detached_html, waiting)]; - waiting.loaded_cb = () => finish_waiting(waiting); - doc.addEventListener("DOMContentLoaded", waiting.loaded_cb); + wait_loaded(doc).then(() => finish_waiting(waiting)); } function wait_for_head(doc, detached_html) @@ -154,42 +154,43 @@ function wait_for_head(doc, detached_html) const blocked_str = "blocked"; -function block_attribute(node, attr) +function block_attribute(node, attr, ns=null) { + const [hasa, geta, seta, rema] = ["has", "get", "set", "remove"] + .map(m => (n, ...args) => typeof ns === "string" ? + n[`${m}AttributeNS`](ns, ...args) : n[`${m}Attribute`](...args)); /* - * Disabling attributes this way allows them to still be relatively - * easily accessed in case they contain some useful data. + * Disabling attributes by prepending `-blocked' allows them to still be + * relatively easily accessed in case they contain some useful data. */ const construct_name = [attr]; - while (node.hasAttribute(construct_name.join(""))) + while (hasa(node, construct_name.join(""))) construct_name.unshift(blocked_str); while (construct_name.length > 1) { construct_name.shift(); const name = construct_name.join(""); - node.setAttribute(`${blocked_str}-${name}`, node.getAttribute(name)); + seta(node, `${blocked_str}-${name}`, geta(node, name)); } - - node.removeAttribute(attr); } function sanitize_meta(meta, policy) { - const http_equiv = meta.getAttribute("http-equiv"); - const value = meta.content; + const value = meta.content || ""; - if (!value || !is_csp_header_name(http_equiv, true)) + if (!value || !is_csp_header_name(meta.httpEquiv || "", true)) return; block_attribute(meta, "content"); - - if (is_csp_header_name(http_equiv, false)) - meta.content = sanitize_csp_header({value}, policy).value; } +/* + * Used to disable <script> that has not yet been added to live DOM (doesn't + * work for those already added). + */ function sanitize_script(script) { - script.hachette_blocked_type = script.type; + script.hachette_blocked_type = script.getAttribute("type"); script.type = "text/plain"; } @@ -201,102 +202,101 @@ function desanitize_script(script, policy) { script.setAttribute("type", script.hachette_blocked_type); - if (script.hachette_blocked_type === undefined) + if (script.hachette_blocked_type === null) script.removeAttribute("type"); delete script.hachette_blocked_type; } -function apply_hachette_csp_rules(doc, head, policy) -{ - const meta = doc.createElement("meta"); - meta.setAttribute("http-equiv", "Content-Security-Policy"); - meta.setAttribute("content", csp_rule(policy.nonce)); - head.append(meta); - /* CSP is already in effect, we can remove the <meta> now. */ - meta.remove(); -} - +const bad_url_reg = /^data:([^,;]*ml|unknown-content-type)/i; function sanitize_urls(element) { - for (const attribute of [...element.attributes]) { - if (/^(href|src|data)$/i.test(attribute.localName) && - /^data:([^,;]*ml|unknown-content-type)/i.test(attribute.value)) - block_attribute(element, attribute.localName); - } + for (const attr of [...element.attributes || []] + .filter(attr => /^(href|src|data)$/i.test(attr.localName)) + .filter(attr => bad_url_reg.test(attr.value))) + block_attribute(element, attr.localName, attr.namespaceURI); } function start_data_urls_sanitizing(doc) { doc.querySelectorAll("*[href], *[src], *[data]").forEach(sanitize_urls); - const mutation_handler = m => m.addedNodes.forEach(sanitize_urls); - const mo = new MutationObserver(ms => ms.forEach(mutation_handler)); - mo.observe(doc, {childList: true, subtree: true}); + if (!doc.content_loaded) { + const mutation_handler = m => m.addedNodes.forEach(sanitize_urls); + const mo = new MutationObserver(ms => ms.forEach(mutation_handler)); + mo.observe(doc, {childList: true, subtree: true}); + wait_loaded(doc).then(() => mo.disconnect()); + } } -function apply_intrinsics_sanitizing(root_element) +/* + * Normally, we block scripts with CSP. However, Mozilla does optimizations that + * cause part of the DOM to be loaded when our content scripts get to run. Thus, + * before the CSP rules we inject (for non-HTTP pages) become effective, we need + * to somehow block the execution of `<script>'s and intrinsics that were + * already there. + */ +function mozilla_initial_block(doc) { - for (const subelem of root_element.querySelectorAll("*")) { - [...subelem.attributes] - .filter(a => /^on/i.test(a.localName)) - .filter(a => /^javascript:/i.test(a.value)) - .forEach(a => block_attribute(subelem, a.localName)); - } + const blocker = e => e.preventDefault(); + doc.addEventListener("beforescriptexecute", blocker); + setTimeout(() => doc.removeEventListener("beforescriptexecute", blocker)); + + [...doc.all].flatMap(ele => [...ele.attributes].map(attr => [ele, attr])) + .map(([ele, attr]) => [ele, attr.localName]) + .filter(([ele, attr]) => /^on/.test(attr) && ele.wrappedJSObject[attr]) + .forEach(([ele, attr]) => ele.wrappedJSObject[attr] = null); } +/* + * Here we block all scripts of a document which might be either and + * HTMLDocument or an XMLDocument. Modifying an XML document might disrupt + * Mozilla's XML preview. This is an unfortunate thing we have to accept for + * now. XML documents *have to* be sanitized as well because they might + * contain `<script>' tags (or on* attributes) with namespace declared as + * "http://www.w3.org/1999/xhtml" or "http://www.w3.org/2000/svg" which allows + * javascript execution. + */ async function sanitize_document(doc, policy) { /* * Blocking of scripts that are in the DOM from the beginning. Needed for - * Mozilla, harmless on Chromium. - * Note that at least in SVG documents the `src' attr on `<script>'s seems - * to be ignored by Firefox, so we don't need to sanitize it. + * Mozilla. */ - for (const script of document.getElementsByTagName("script")) { - const old_children = [...script.childNodes]; - script.innerHTML = ""; - setTimeout(() => old_children.forEach(c => script.append(c)), 0); - } + if (is_mozilla) + mozilla_initial_block(doc); /* * Ensure our CSP rules are employed from the beginning. This CSP injection * method is, when possible, going to be applied together with CSP rules * injected using webRequest. - * For non-HTML documents this is just a dummy operation of adding and - * removing `head'. + * Using elements namespaced as HTML makes this CSP injection also work for + * non-HTML documents. */ - let added_head = doc.createElement("head"); - if (!doc.head) - doc.documentElement.prepend(added_head); - - apply_hachette_csp_rules(doc, added_head, policy); - - /* Proceed with DOM in its initial state. */ - added_head.remove(); + const html = new DOMParser().parseFromString(`<html><head><meta \ +http-equiv="Content-Security-Policy" content="${csp_rule(policy.nonce)}"\ +/></head><body>Loading...</body></html>`, "text/html").documentElement; /* - * <html> node gets hijacked now, to be re-attached after <head> is loaded + * Root node gets hijacked now, to be re-attached after <head> is loaded * and sanitized. */ - const old_html = doc.documentElement; - const new_html = doc.createElement("html"); - old_html.replaceWith(new_html); + const root = doc.documentElement; + root.replaceWith(html); - await wait_for_head(doc, old_html); - - for (const meta of old_html.querySelectorAll("head meta")) - sanitize_meta(meta, policy); - - for (const script of old_html.querySelectorAll("script")) - sanitize_script(script, policy); - - if (!(doc instanceof HTMLDocument)) - apply_intrinsics_sanitizing(old_html); + /* + * For XML documents, we don't intend to inject payload, so we neither block + * document's CSP `<meta>' tags nor wait for `<head>' to be parsed. + */ + if (document instanceof HTMLDocument) { + await wait_for_head(doc, root); - new_html.replaceWith(old_html); + root.querySelectorAll("head meta") + .forEach(m => sanitize_meta(m, policy)); + } - for (const script of old_html.querySelectorAll("script")) - desanitize_script(script, policy); + root.querySelectorAll("script").forEach(s => sanitize_script(s, policy)); + html.replaceWith(root); + root.querySelectorAll("script").forEach(s => desanitize_script(s, policy)); start_data_urls_sanitizing(doc); } @@ -329,14 +329,15 @@ if (!is_privileged_url(document.URL)) { } if (!policy) { - console.warn("Using fallback policy!"); + console.debug("Using fallback policy!"); policy = {allow: false, nonce: gen_nonce()}; } + console.debug("current policy", policy); + const doc_ready = Promise.all([ - policy.allow ? Promise.resolve : sanitize_document(document, policy), - new Promise(cb => document.addEventListener("DOMContentLoaded", - cb, {once: true})) + policy.allow ? Promise.resolve() : sanitize_document(document, policy), + wait_loaded(document) ]); handle_page_actions(policy.nonce, policy_received_callback, doc_ready); diff --git a/content/page_actions.js b/content/page_actions.js index 8057541..040b4ab 100644 --- a/content/page_actions.js +++ b/content/page_actions.js @@ -11,7 +11,7 @@ * IMPORT browser * IMPORT report_script * IMPORT report_settings - * IMPORT report_content_type + * IMPORT report_document_type * IMPORTS_END */ @@ -70,8 +70,8 @@ function handle_page_actions(script_nonce, policy_received_cb, doc_ready_promise) { policy_received_callback = policy_received_cb; url = document.URL; - is_html = /html/.test(document.contentType); - report_content_type(document.contentType); + is_html = document instanceof HTMLDocument; + report_document_type(is_html); doc_ready_promise.then(document_ready); diff --git a/html/display-panel.js b/html/display-panel.js index 7d801c9..623ff36 100644 --- a/html/display-panel.js +++ b/html/display-panel.js @@ -276,8 +276,8 @@ function handle_activity_report(message) template.script_contents.textContent = data; container_for_injected.appendChild(template.div); } - if (type === "content_type") { - if (!/html/.test(data)) + if (type === "is_html") { + if (!data) content_type_cell.classList.remove("hide"); } if (type === "repo_query_action") { |