From 96efcc335bbd9f2ad098e694d6cff6c1c22b4ce8 Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Sat, 5 Mar 2022 15:54:53 +0100 Subject: improve script blocking in non-HTML documents (XML) --- content/policy_enforcing.js | 114 +++++++++++++++++---- .../data/pages/scripts_to_block_1.html | 33 +++--- .../data/pages/scripts_to_block_2.xml | 71 +++++++++++++ test/haketilo_test/unit/test_policy_enforcing.py | 66 +++++++++++- test/haketilo_test/unit/utils.py | 5 +- test/haketilo_test/world_wide_library.py | 2 + 6 files changed, 250 insertions(+), 41 deletions(-) create mode 100644 test/haketilo_test/data/pages/scripts_to_block_2.xml diff --git a/content/policy_enforcing.js b/content/policy_enforcing.js index 29990b8..53f418f 100644 --- a/content/policy_enforcing.js +++ b/content/policy_enforcing.js @@ -45,6 +45,9 @@ #FROM common/misc.js IMPORT gen_nonce, csp_header_regex +const html_ns = "http://www.w3.org/1999/xhtml"; +const svg_ns = "http://www.w3.org/2000/svg"; + document.content_loaded = document.readyState === "complete"; const wait_loaded = e => e.content_loaded ? Promise.resolve() : new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true})); @@ -203,6 +206,10 @@ function sanitize_element_urls(element) { */ if (some_attr_blocked) { const replacement_elem = document.createElement("a"); + + /* Prevent this node from being processed by our observer. */ + replacement_elem.haketilo_trusted_node = true; + element.replaceWith(replacement_elem); replacement_elem.replaceWith(element); } @@ -221,8 +228,8 @@ function sanitize_element_onevent(element) { element.haketilo_sanitized_onevent = true; for (const attribute_node of [...(element.attributes || [])]) { - const attr = attribute_node.localName, attr_lo = attr.toLowerCase();; - if (!/^on/.test(attr_lo) || !(attr_lo in element.wrappedJSObject)) + const attr = attribute_node.localName, attr_lo = attr.toLowerCase(); + if (!/^on/.test(attr_lo) || !(attr_lo in element)) continue; /* @@ -246,20 +253,69 @@ function sanitize_tree_onevent(root) { } #ENDIF -function start_mo_sanitizing(doc) { - if (!doc.content_loaded) { - function mutation_handler(mutation) { - mutation.addedNodes.forEach(sanitize_element_urls); +/* + * Sanitize elements on-the-fly as they appear using MutationObserver. + * + * Under Abrowser 97 it was observed that MutationObserver does not always work + * as is should. When trying to observe nodes of an XMLDocument the behavior was + * as if the "subtree" option to MutationObserver.observe() was ignored. To work + * around this we avoid using the "subtree" option altogether and have the same + * code work in all scenarios. + */ +function MOSanitizer(root) { + this.root = root; + + this.recursively_sanitize(root); + + this.mo = new MutationObserver(ms => this.handle_mutations(ms)); +} + +MOSanitizer.prototype.observe = function() { + let elem = this.root; + while (elem && !elem.haketilo_trusted_node) { + this.mo.observe(elem, {childList: true}); + elem = elem.lastElementChild; + } +} + +MOSanitizer.prototype.handle_mutations = function(mutations) { + for (const mut of mutations) { + for (const new_node of mut.addedNodes) + this.recursively_sanitize(new_node); + } + + this.mo.disconnect(); + this.observe(); +} + +MOSanitizer.prototype.recursively_sanitize = function(elem) { + const to_process = [elem]; + + while (to_process.length > 0) { + const current_elem = to_process.pop(); + + if (current_elem.haketilo_trusted_node || + current_elem.nodeType !== this.root.ELEMENT_NODE) + continue; + + to_process.push(...current_elem.children); + + sanitize_element_urls(current_elem); #IF MOZILLA - mutation.addedNodes.forEach(sanitize_element_onevent); + sanitize_element_onevent(current_elem); #ENDIF - } - const mo = new MutationObserver(ms => ms.forEach(mutation_handler)); - mo.observe(doc, {childList: true, subtree: true}); - wait_loaded(doc).then(() => mo.disconnect()); } } +MOSanitizer.prototype.start = function() { + this.recursively_sanitize(this.root); + this.observe(); +} + +MOSanitizer.prototype.stop = function() { + this.mo.disconnect(); +} + #IF MOZILLA /* * Normally, we block scripts with CSP. However, Mozilla does optimizations that @@ -270,8 +326,7 @@ function start_mo_sanitizing(doc) { * applying this CSP to non-inline `' in certain scenarios. */ function prevent_script_execution(event) { - if (!event.target.haketilo_payload) - event.preventDefault(); + event.preventDefault(); } #ENDIF @@ -285,19 +340,32 @@ function prevent_script_execution(event) { * javascript execution. */ async function sanitize_document(doc, policy) { + const root = doc.documentElement; + const substitute_doc = + new DOMParser().parseFromString("", "text/html"); + #IF MOZILLA /* * Blocking of scripts that are in the DOM from the beginning. Needed for * Mozilla. */ const listener_args = ["beforescriptexecute", prevent_script_execution]; + doc.addEventListener(...listener_args); + substitute_doc.addEventListener(...listener_args); + wait_loaded(doc).then(() => doc.removeEventListener(...listener_args)); sanitize_tree_urls(doc.documentElement); sanitize_tree_onevent(doc.documentElement); #ENDIF + if (!doc.content_loaded) { + const sanitizer = new MOSanitizer(doc.documentElement); + sanitizer.start(); + wait_loaded(doc).then(() => sanitizer.stop()); + } + /* * Ensure our CSP rules are employed from the beginning. This CSP injection * method is, when possible, going to be applied together with CSP rules @@ -322,8 +390,14 @@ async function sanitize_document(doc, policy) { * Root node gets hijacked now, to be re-attached after is loaded * and sanitized. */ - const root = doc.documentElement; root.replaceWith(temporary_html); +#IF MOZILLA + /* + * To be able to handle the onbeforescriptexecute event for scripts that + * appear under detached document. + */ + substitute_doc.documentElement.replaceWith(root); +#ENDIF /* * When we don't inject payload, we neither block document's CSP `' @@ -336,15 +410,11 @@ async function sanitize_document(doc, policy) { .forEach(m => sanitize_meta(m, policy)); } - sanitize_tree_urls(root); - root.querySelectorAll("script").forEach(s => sanitize_script(s, policy)); + const scripts = [...root.getElementsByTagNameNS(html_ns, "script"), + ...root.getElementsByTagNameNS(svg_ns, "svg")]; + scripts.forEach(s => sanitize_script(s, policy)); temporary_html.replaceWith(root); - root.querySelectorAll("script").forEach(s => desanitize_script(s, policy)); -#IF MOZILLA - sanitize_tree_onevent(root); -#ENDIF - - start_mo_sanitizing(doc); + scripts.forEach(s => desanitize_script(s, policy)); } async function _disable_service_workers() { diff --git a/test/haketilo_test/data/pages/scripts_to_block_1.html b/test/haketilo_test/data/pages/scripts_to_block_1.html index e7793ee..67bff5e 100644 --- a/test/haketilo_test/data/pages/scripts_to_block_1.html +++ b/test/haketilo_test/data/pages/scripts_to_block_1.html @@ -29,18 +29,25 @@ - - - Click Meee! - - - - + +
+ + + Click Meee! + + + + +
diff --git a/test/haketilo_test/data/pages/scripts_to_block_2.xml b/test/haketilo_test/data/pages/scripts_to_block_2.xml new file mode 100644 index 0000000..6433a1d --- /dev/null +++ b/test/haketilo_test/data/pages/scripts_to_block_2.xml @@ -0,0 +1,71 @@ + + + + + + + + + + + + + + window.__run = [...(window.__run || []), 'grape']; + + + + + window.__run = [...(window.__run || []), 'raspberry']; + + + + + + + + + + + + + + + + + + + + diff --git a/test/haketilo_test/unit/test_policy_enforcing.py b/test/haketilo_test/unit/test_policy_enforcing.py index c5dd20e..98b5044 100644 --- a/test/haketilo_test/unit/test_policy_enforcing.py +++ b/test/haketilo_test/unit/test_policy_enforcing.py @@ -73,12 +73,15 @@ def get(driver, page, what_to_do): @pytest.mark.parametrize('csp_off_setting', [{}, {'csp_off': True}]) def test_policy_enforcing_html(driver, execute_in_page, csp_off_setting): """ - A test case of sanitizing