aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWojtek Kosior <koszko@koszko.org>2022-03-05 15:54:53 +0100
committerWojtek Kosior <koszko@koszko.org>2022-03-05 15:54:53 +0100
commit96efcc335bbd9f2ad098e694d6cff6c1c22b4ce8 (patch)
treecf8120ca6658c04c62e63dc66a8a5b39dbec4c2d
parent709238294ea83525e62476ce59d734c57c11fd3f (diff)
downloadbrowser-extension-96efcc335bbd9f2ad098e694d6cff6c1c22b4ce8.tar.gz
browser-extension-96efcc335bbd9f2ad098e694d6cff6c1c22b4ce8.zip
improve script blocking in non-HTML documents (XML)
-rw-r--r--content/policy_enforcing.js114
-rw-r--r--test/haketilo_test/data/pages/scripts_to_block_1.html33
-rw-r--r--test/haketilo_test/data/pages/scripts_to_block_2.xml71
-rw-r--r--test/haketilo_test/unit/test_policy_enforcing.py66
-rw-r--r--test/haketilo_test/unit/utils.py5
-rw-r--r--test/haketilo_test/world_wide_library.py2
6 files changed, 250 insertions, 41 deletions
diff --git a/content/policy_enforcing.js b/content/policy_enforcing.js
index 29990b8..53f418f 100644
--- a/content/policy_enforcing.js
+++ b/content/policy_enforcing.js
@@ -45,6 +45,9 @@
#FROM common/misc.js IMPORT gen_nonce, csp_header_regex
+const html_ns = "http://www.w3.org/1999/xhtml";
+const svg_ns = "http://www.w3.org/2000/svg";
+
document.content_loaded = document.readyState === "complete";
const wait_loaded = e => e.content_loaded ? Promise.resolve() :
new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true}));
@@ -203,6 +206,10 @@ function sanitize_element_urls(element) {
*/
if (some_attr_blocked) {
const replacement_elem = document.createElement("a");
+
+ /* Prevent this node from being processed by our observer. */
+ replacement_elem.haketilo_trusted_node = true;
+
element.replaceWith(replacement_elem);
replacement_elem.replaceWith(element);
}
@@ -221,8 +228,8 @@ function sanitize_element_onevent(element) {
element.haketilo_sanitized_onevent = true;
for (const attribute_node of [...(element.attributes || [])]) {
- const attr = attribute_node.localName, attr_lo = attr.toLowerCase();;
- if (!/^on/.test(attr_lo) || !(attr_lo in element.wrappedJSObject))
+ const attr = attribute_node.localName, attr_lo = attr.toLowerCase();
+ if (!/^on/.test(attr_lo) || !(attr_lo in element))
continue;
/*
@@ -246,20 +253,69 @@ function sanitize_tree_onevent(root) {
}
#ENDIF
-function start_mo_sanitizing(doc) {
- if (!doc.content_loaded) {
- function mutation_handler(mutation) {
- mutation.addedNodes.forEach(sanitize_element_urls);
+/*
+ * Sanitize elements on-the-fly as they appear using MutationObserver.
+ *
+ * Under Abrowser 97 it was observed that MutationObserver does not always work
+ * as is should. When trying to observe nodes of an XMLDocument the behavior was
+ * as if the "subtree" option to MutationObserver.observe() was ignored. To work
+ * around this we avoid using the "subtree" option altogether and have the same
+ * code work in all scenarios.
+ */
+function MOSanitizer(root) {
+ this.root = root;
+
+ this.recursively_sanitize(root);
+
+ this.mo = new MutationObserver(ms => this.handle_mutations(ms));
+}
+
+MOSanitizer.prototype.observe = function() {
+ let elem = this.root;
+ while (elem && !elem.haketilo_trusted_node) {
+ this.mo.observe(elem, {childList: true});
+ elem = elem.lastElementChild;
+ }
+}
+
+MOSanitizer.prototype.handle_mutations = function(mutations) {
+ for (const mut of mutations) {
+ for (const new_node of mut.addedNodes)
+ this.recursively_sanitize(new_node);
+ }
+
+ this.mo.disconnect();
+ this.observe();
+}
+
+MOSanitizer.prototype.recursively_sanitize = function(elem) {
+ const to_process = [elem];
+
+ while (to_process.length > 0) {
+ const current_elem = to_process.pop();
+
+ if (current_elem.haketilo_trusted_node ||
+ current_elem.nodeType !== this.root.ELEMENT_NODE)
+ continue;
+
+ to_process.push(...current_elem.children);
+
+ sanitize_element_urls(current_elem);
#IF MOZILLA
- mutation.addedNodes.forEach(sanitize_element_onevent);
+ sanitize_element_onevent(current_elem);
#ENDIF
- }
- const mo = new MutationObserver(ms => ms.forEach(mutation_handler));
- mo.observe(doc, {childList: true, subtree: true});
- wait_loaded(doc).then(() => mo.disconnect());
}
}
+MOSanitizer.prototype.start = function() {
+ this.recursively_sanitize(this.root);
+ this.observe();
+}
+
+MOSanitizer.prototype.stop = function() {
+ this.mo.disconnect();
+}
+
#IF MOZILLA
/*
* Normally, we block scripts with CSP. However, Mozilla does optimizations that
@@ -270,8 +326,7 @@ function start_mo_sanitizing(doc) {
* applying this CSP to non-inline `<scripts>' in certain scenarios.
*/
function prevent_script_execution(event) {
- if (!event.target.haketilo_payload)
- event.preventDefault();
+ event.preventDefault();
}
#ENDIF
@@ -285,19 +340,32 @@ function prevent_script_execution(event) {
* javascript execution.
*/
async function sanitize_document(doc, policy) {
+ const root = doc.documentElement;
+ const substitute_doc =
+ new DOMParser().parseFromString("<!DOCTYPE html>", "text/html");
+
#IF MOZILLA
/*
* Blocking of scripts that are in the DOM from the beginning. Needed for
* Mozilla.
*/
const listener_args = ["beforescriptexecute", prevent_script_execution];
+
doc.addEventListener(...listener_args);
+ substitute_doc.addEventListener(...listener_args);
+
wait_loaded(doc).then(() => doc.removeEventListener(...listener_args));
sanitize_tree_urls(doc.documentElement);
sanitize_tree_onevent(doc.documentElement);
#ENDIF
+ if (!doc.content_loaded) {
+ const sanitizer = new MOSanitizer(doc.documentElement);
+ sanitizer.start();
+ wait_loaded(doc).then(() => sanitizer.stop());
+ }
+
/*
* Ensure our CSP rules are employed from the beginning. This CSP injection
* method is, when possible, going to be applied together with CSP rules
@@ -322,8 +390,14 @@ async function sanitize_document(doc, policy) {
* Root node gets hijacked now, to be re-attached after <head> is loaded
* and sanitized.
*/
- const root = doc.documentElement;
root.replaceWith(temporary_html);
+#IF MOZILLA
+ /*
+ * To be able to handle the onbeforescriptexecute event for scripts that
+ * appear under detached document.
+ */
+ substitute_doc.documentElement.replaceWith(root);
+#ENDIF
/*
* When we don't inject payload, we neither block document's CSP `<meta>'
@@ -336,15 +410,11 @@ async function sanitize_document(doc, policy) {
.forEach(m => sanitize_meta(m, policy));
}
- sanitize_tree_urls(root);
- root.querySelectorAll("script").forEach(s => sanitize_script(s, policy));
+ const scripts = [...root.getElementsByTagNameNS(html_ns, "script"),
+ ...root.getElementsByTagNameNS(svg_ns, "svg")];
+ scripts.forEach(s => sanitize_script(s, policy));
temporary_html.replaceWith(root);
- root.querySelectorAll("script").forEach(s => desanitize_script(s, policy));
-#IF MOZILLA
- sanitize_tree_onevent(root);
-#ENDIF
-
- start_mo_sanitizing(doc);
+ scripts.forEach(s => desanitize_script(s, policy));
}
async function _disable_service_workers() {
diff --git a/test/haketilo_test/data/pages/scripts_to_block_1.html b/test/haketilo_test/data/pages/scripts_to_block_1.html
index e7793ee..67bff5e 100644
--- a/test/haketilo_test/data/pages/scripts_to_block_1.html
+++ b/test/haketilo_test/data/pages/scripts_to_block_1.html
@@ -29,18 +29,25 @@
</script>
</head>
<body>
- <button id="clickme1"
- onclick="window.__run = [...(window.__run || []), 'on'];"
- blocked-onclick="some useful data">
- Click Meee!
- </button>
- <a id="clickme2"
- href="javascript:window.__run = [...(window.__run || []), 'href'];void(0);">
- Click Meee!
- </a>
- <iframe src="javascript:void(window.parent.__run = [...(window.parent.__run || []), 'src']);">
- </iframe>
- <object data="javascript:window.__run = [...(window.__run || []), 'data'];">
- </object>
+ <!--
+ Put all objects under a <div> to make sure the Mutation Observer does
+ indeed correctly report changes in subtrees (there are problems with
+ this in XML documents).
+ -->
+ <div>
+ <button id="clickme1"
+ onclick="window.__run = [...(window.__run || []), 'on'];"
+ blocked-onclick="some useful data">
+ Click Meee!
+ </button>
+ <a id="clickme2"
+ href="javascript:window.__run = [...(window.__run || []), 'href'];void(0);">
+ Click Meee!
+ </a>
+ <iframe src="javascript:void(window.parent.__run = [...(window.parent.__run || []), 'src']);">
+ </iframe>
+ <object data="javascript:window.__run = [...(window.__run || []), 'data'];">
+ </object>
+ </div>
</body>
</html>
diff --git a/test/haketilo_test/data/pages/scripts_to_block_2.xml b/test/haketilo_test/data/pages/scripts_to_block_2.xml
new file mode 100644
index 0000000..6433a1d
--- /dev/null
+++ b/test/haketilo_test/data/pages/scripts_to_block_2.xml
@@ -0,0 +1,71 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ SPDX-License-Identifier: CC0-1.0
+
+ A testing XML document with various scripts that need to get blocked.
+
+ This file is part of Haketilo.
+
+ Copyright (C) 2021, 2022 Wojtek Kosior <koszko@koszko.org>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the CC0 1.0 Universal License as published by
+ the Creative Commons Corporation.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ CC0 1.0 Universal License for more details.
+ -->
+
+<fruits>
+
+ <!--
+ The following will not execute since it is not recognized as either HTML
+ or SVG script.
+ -->
+ <script>
+ window.__run = [...(window.__run || []), 'banana'];
+ </script>
+
+ <html:img xmlns:html="http://www.w3.org/1999/xhtml"
+ src="data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciPjwvc3ZnPg=="
+ onload="window.__run = [...(window.__run || []), 'melon'];console.log('delme melon')">
+ </html:img>
+
+ <!-- Will execute -->
+ <html:script xmlns:html="http://www.w3.org/1999/xhtml">
+ window.__run = [...(window.__run || []), 'grape'];
+ </html:script>
+
+ <!-- Will also execute -->
+ <vector-graphics:script xmlns:vector-graphics="http://www.w3.org/2000/svg">
+ window.__run = [...(window.__run || []), 'raspberry'];
+ </vector-graphics:script>
+
+ <apple>
+ <svg viewBox="0 0 10 14" xmlns="http://www.w3.org/2000/svg">
+ <!-- Will run when clicked -->
+ <circle id="idaret_circle" cx="5" cy="5" r="4"
+ onclick="window.__run = [...(window.__run || []), 'idaret'];" />
+ <!-- Will *NOT* run when clicked -->
+ <circle id="nowamak_circle" cx="5" cy="13" r="4"
+ some-unknown:onclick="window.__run = [...(window.__run || []), 'nowamak'];"
+ xmlns:some-unknown="https://example.org/blah/blah" />
+ </svg>
+ </apple>
+ <!--
+ In case of wrong namespace URI (or lack thereof), svg subtree will not
+ be recognized as SVG at all
+ -->
+ <svg>
+ <!-- Will neither run nor be drawn by the browser -->
+ <circle id="mango_circle" cx="5" cy="5" r="4"
+ onclick="window.__run = [...(window.__run || []), 'mango'];" />
+ </svg>
+ <svg viewBox="0 0 10" xmlns="http://www.w3.org/2000/sv">
+ <!-- Will neither run nor be drawn by the browser -->
+ <circle id="annoying_circle" cx="5" cy="5" r="4"
+ onclick="window.__run = [...(window.__run || []), 'orange'];" />
+ </svg>
+</fruits>
diff --git a/test/haketilo_test/unit/test_policy_enforcing.py b/test/haketilo_test/unit/test_policy_enforcing.py
index c5dd20e..98b5044 100644
--- a/test/haketilo_test/unit/test_policy_enforcing.py
+++ b/test/haketilo_test/unit/test_policy_enforcing.py
@@ -73,12 +73,15 @@ def get(driver, page, what_to_do):
@pytest.mark.parametrize('csp_off_setting', [{}, {'csp_off': True}])
def test_policy_enforcing_html(driver, execute_in_page, csp_off_setting):
"""
- A test case of sanitizing <script>s and intrinsic javascript in pages.
+ A test case of sanitizing <script>s and intrinsic JavaScript in HTML pages.
"""
- def assert_properly_blocked():
+ def click_all():
for i in range(1, 3):
driver.find_element_by_id(f'clickme{i}').click()
+ def assert_properly_blocked():
+ click_all()
+
assert set(driver.execute_script('return window.__run || [];')) == set()
assert bool(csp_off_setting) == are_scripts_allowed(driver)
@@ -98,8 +101,7 @@ def test_policy_enforcing_html(driver, execute_in_page, csp_off_setting):
**csp_off_setting
})
- for i in range(1, 3):
- driver.find_element_by_id(f'clickme{i}').click()
+ click_all()
assert set(driver.execute_script('return window.__run || [];')) == \
{'inline', 'on', 'href', 'src', 'data'}
@@ -121,3 +123,59 @@ def test_policy_enforcing_html(driver, execute_in_page, csp_off_setting):
assert_properly_blocked()
assert are_scripts_allowed(driver, nonce)
+
+# Test function analogous to that for HTML page.
+@pytest.mark.ext_data({'content_script': content_script})
+@pytest.mark.usefixtures('webextension')
+@pytest.mark.parametrize('csp_off_setting', [{}, {'csp_off': True}])
+def test_policy_enforcing_xml(driver, execute_in_page, csp_off_setting):
+ """
+ A test case of sanitizing <script>s and intrinsic JavaScript in XML
+ documents.
+ """
+ def click_all():
+ for name in ('idaret', 'nowamak', 'mango', 'annoying'):
+ elem = driver.find_element_by_id(f'{name}_circle')
+ try:
+ elem.click()
+ except:
+ pass
+
+ def assert_properly_blocked():
+ click_all()
+
+ try:
+ assert set(driver.execute_script('return window.__run || [];')) == set()
+ except:
+ from time import sleep
+ sleep(100000)
+ assert bool(csp_off_setting) == are_scripts_allowed(driver)
+
+ # First, see if scripts run when not blocked.
+ get(driver, 'https://gotmyowndoma.in/scripts_to_block_2.xml', {
+ 'policy': allow_policy,
+ **csp_off_setting
+ })
+
+ click_all()
+
+ assert set(driver.execute_script('return window.__run || [];')) == \
+ {'grape', 'raspberry', 'idaret', 'melon'}
+ assert are_scripts_allowed(driver)
+
+ # Now, verify scripts don't run when blocked.
+ get(driver, 'https://gotmyowndoma.in/scripts_to_block_2.xml', {
+ 'policy': block_policy,
+ **csp_off_setting
+ })
+
+ assert_properly_blocked()
+
+ # Now, verify only scripts with nonce can run when payload is injected.
+ get(driver, 'https://gotmyowndoma.in/scripts_to_block_2.xml', {
+ 'policy': payload_policy,
+ **csp_off_setting
+ })
+
+ assert_properly_blocked()
+ assert are_scripts_allowed(driver, nonce)
diff --git a/test/haketilo_test/unit/utils.py b/test/haketilo_test/unit/utils.py
index b27a209..7ddf92a 100644
--- a/test/haketilo_test/unit/utils.py
+++ b/test/haketilo_test/unit/utils.py
@@ -228,11 +228,12 @@ def are_scripts_allowed(driver, nonce=None):
return driver.execute_script(
'''
document.haketilo_scripts_allowed = false;
- const script = document.createElement("script");
+ const html_ns = "http://www.w3.org/1999/xhtml";
+ const script = document.createElementNS(html_ns, "script");
script.innerHTML = "document.haketilo_scripts_allowed = true;";
if (arguments[0])
script.setAttribute("nonce", arguments[0]);
- document.head.append(script);
+ (document.head || document.documentElement).append(script);
return document.haketilo_scripts_allowed;
''',
nonce)
diff --git a/test/haketilo_test/world_wide_library.py b/test/haketilo_test/world_wide_library.py
index fedfeb6..1a90c42 100644
--- a/test/haketilo_test/world_wide_library.py
+++ b/test/haketilo_test/world_wide_library.py
@@ -234,6 +234,8 @@ catalog = {
'https://gotmyowndoma.in/scripts_to_block_1.html':
(200, {}, here / 'data' / 'pages' / 'scripts_to_block_1.html'),
+ 'https://gotmyowndoma.in/scripts_to_block_2.xml':
+ (200, {}, here / 'data' / 'pages' / 'scripts_to_block_2.xml'),
'https://anotherdoma.in/resource/blocked/by/CORS.json':
lambda command, get_params, post_params: (200, {}, some_data),