From e2d26bad35bbe3876862b482f7963d713238313b Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Wed, 8 Sep 2021 19:55:33 +0200 Subject: Fix sanitizing of non-HTML XMLDocument's --- content/activity_info_server.js | 6 +- content/main.js | 195 ++++++++++++++++++++-------------------- content/page_actions.js | 6 +- 3 files changed, 104 insertions(+), 103 deletions(-) (limited to 'content') diff --git a/content/activity_info_server.js b/content/activity_info_server.js index beecb1a..1b69703 100644 --- a/content/activity_info_server.js +++ b/content/activity_info_server.js @@ -44,9 +44,9 @@ function report_settings(settings) report_activity("settings", settings); } -function report_content_type(content_type) +function report_document_type(is_html) { - report_activity("content_type", content_type); + report_activity("is_html", is_html); } function report_repo_query_action(update, port) @@ -96,6 +96,6 @@ function start_activity_info_server() * EXPORT start_activity_info_server * EXPORT report_script * EXPORT report_settings - * EXPORT report_content_type + * EXPORT report_document_type * EXPORTS_END */ diff --git a/content/main.js b/content/main.js index a183913..fb334dd 100644 --- a/content/main.js +++ b/content/main.js @@ -22,6 +22,12 @@ * IMPORTS_END */ +document.content_loaded = document.readyState === "complete"; +const wait_loaded = e => e.content_loaded ? Promise.resolve() : + new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true})); + +wait_loaded(document).then(() => document.content_loaded = true); + function extract_cookie_policy(cookie, min_time) { let best_result = {time: -1}; @@ -86,18 +92,17 @@ function employ_nonhttp_policy(policy) } /* + * In the case of HTML documents: * 1. When injecting some payload we need to sanitize CSP tags before * they reach the document. * 2. Only tags inside are considered valid by the browser and * need to be considered. * 3. We want to detach from document, wait until its completes * loading, sanitize it and re-attach . - * 4. Browsers are eager to add 's that appear after `' but before - * `'. Due to this behavior the `DOMContentLoaded' event is considered - * unreliable (although it could still work properly, it is just problematic - * to verify). - * 5. We shall wait for anything to appear in or after and take that as - * a sign has _really_ finished loading. + * 4. We shall wait for anything to appear in or after and take that as + * a sign has finished loading. + * 5. Otherwise, getting the `DOMContentLoaded' event on the document shall also + * be a sign that is fully loaded. */ function make_body_start_observer(DOM_element, waiting) @@ -123,8 +128,10 @@ function try_body_started(waiting) function finish_waiting(waiting) { + if (waiting.finished) + return; + waiting.finished = true; waiting.observers.forEach(observer => observer.disconnect()); - waiting.doc.removeEventListener("DOMContentLoaded", waiting.loaded_cb); setTimeout(waiting.callback, 0); } @@ -132,19 +139,12 @@ function _wait_for_head(doc, detached_html, callback) { const waiting = {doc, detached_html, callback, observers: []}; - /* - * For XML and SVG documents, instead of waiting for `', we wait - * for the entire document to finish loading. - */ - if (doc instanceof HTMLDocument) { - if (try_body_started(waiting)) - return; + if (try_body_started(waiting)) + return; - waiting.observers = [make_body_start_observer(detached_html, waiting)]; - } + waiting.observers = [make_body_start_observer(detached_html, waiting)]; - waiting.loaded_cb = () => finish_waiting(waiting); - doc.addEventListener("DOMContentLoaded", waiting.loaded_cb); + wait_loaded(doc).then(() => finish_waiting(waiting)); } function wait_for_head(doc, detached_html) @@ -154,42 +154,43 @@ function wait_for_head(doc, detached_html) const blocked_str = "blocked"; -function block_attribute(node, attr) +function block_attribute(node, attr, ns=null) { + const [hasa, geta, seta, rema] = ["has", "get", "set", "remove"] + .map(m => (n, ...args) => typeof ns === "string" ? + n[`${m}AttributeNS`](ns, ...args) : n[`${m}Attribute`](...args)); /* - * Disabling attributes this way allows them to still be relatively - * easily accessed in case they contain some useful data. + * Disabling attributes by prepending `-blocked' allows them to still be + * relatively easily accessed in case they contain some useful data. */ const construct_name = [attr]; - while (node.hasAttribute(construct_name.join(""))) + while (hasa(node, construct_name.join(""))) construct_name.unshift(blocked_str); while (construct_name.length > 1) { construct_name.shift(); const name = construct_name.join(""); - node.setAttribute(`${blocked_str}-${name}`, node.getAttribute(name)); + seta(node, `${blocked_str}-${name}`, geta(node, name)); } - - node.removeAttribute(attr); } function sanitize_meta(meta, policy) { - const http_equiv = meta.getAttribute("http-equiv"); - const value = meta.content; + const value = meta.content || ""; - if (!value || !is_csp_header_name(http_equiv, true)) + if (!value || !is_csp_header_name(meta.httpEquiv || "", true)) return; block_attribute(meta, "content"); - - if (is_csp_header_name(http_equiv, false)) - meta.content = sanitize_csp_header({value}, policy).value; } +/* + * Used to disable