diff options
author | Wojtek Kosior <koszko@koszko.org> | 2022-01-17 11:20:52 +0100 |
---|---|---|
committer | Wojtek Kosior <koszko@koszko.org> | 2022-01-17 11:24:56 +0100 |
commit | 7bedbcbd80eba9359d2e905b7693923c76ce563d (patch) | |
tree | 5059ac406e29b1b1e81639fc11316dde280fe218 | |
parent | ede3a55ba22d2560ec7c0deebffd73623488acc1 (diff) | |
download | browser-extension-7bedbcbd80eba9359d2e905b7693923c76ce563d.tar.gz browser-extension-7bedbcbd80eba9359d2e905b7693923c76ce563d.zip |
move policy enforcing code to a new file, include basic test
-rw-r--r-- | content/policy_enforcing.js | 326 | ||||
-rw-r--r-- | copyright | 8 | ||||
-rw-r--r-- | test/data/pages/gotmyowndomain.html | 2 | ||||
-rw-r--r-- | test/data/pages/gotmyowndomain_https.html | 4 | ||||
-rw-r--r-- | test/data/pages/scripts_to_block_1.html | 44 | ||||
-rw-r--r-- | test/unit/test_policy_enforcing.py | 110 | ||||
-rw-r--r-- | test/unit/test_webrequest.py | 14 | ||||
-rw-r--r-- | test/unit/utils.py | 13 | ||||
-rw-r--r-- | test/world_wide_library.py | 3 |
9 files changed, 504 insertions, 20 deletions
diff --git a/content/policy_enforcing.js b/content/policy_enforcing.js new file mode 100644 index 0000000..25c8b6b --- /dev/null +++ b/content/policy_enforcing.js @@ -0,0 +1,326 @@ +/** + * This file is part of Haketilo. + * + * Function: Enforcing script blocking rules on a given page, working from a + * content script. + * + * Copyright (C) 2021,2022 Wojtek Kosior + * Copyright (C) 2021 jahoti + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * As additional permission under GNU GPL version 3 section 7, you + * may distribute forms of that code without the copy of the GNU + * GPL normally required by section 4, provided you include this + * license notice and, in case of non-source distribution, a URL + * through which recipients can access the Corresponding Source. + * If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not + * obligated to do so. If you do not wish to do so, delete this + * exception statement from your version. + * + * As a special exception to the GPL, any HTML file which merely + * makes function calls to this code, and for that purpose + * includes it by reference shall be deemed a separate work for + * copyright law purposes. If you modify this code, you may extend + * this exception to your version of the code, but you are not + * obligated to do so. If you do not wish to do so, delete this + * exception statement from your version. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + * + * I, Wojtek Kosior, thereby promise not to sue for violation of this file's + * license. Although I request that you do not make use of this code in a + * proprietary program, I am not going to enforce this in court. + */ + +#FROM common/misc.js IMPORT gen_nonce + +document.content_loaded = document.readyState === "complete"; +const wait_loaded = e => e.content_loaded ? Promise.resolve() : + new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true})); + +wait_loaded(document).then(() => document.content_loaded = true); + +/* + * In the case of HTML documents: + * 1. When injecting some payload we need to sanitize <meta> CSP tags before + * they reach the document. + * 2. Only <meta> tags inside <head> are considered valid by the browser and + * need to be considered. + * 3. We want to detach <html> from document, wait until its <head> completes + * loading, sanitize it and re-attach <html>. + * 4. We shall wait for anything to appear in or after <body> and take that as + * a sign <head> has finished loading. + * 5. Otherwise, getting the `DOMContentLoaded' event on the document shall also + * be a sign that <head> is fully loaded. + */ + +function make_body_start_observer(DOM_element, waiting) { + const observer = new MutationObserver(() => try_body_started(waiting)); + observer.observe(DOM_element, {childList: true}); + return observer; +} + +function try_body_started(waiting) { + const body = waiting.detached_html.querySelector("body"); + + if ((body && (body.firstChild || body.nextSibling)) || + waiting.doc.documentElement.nextSibling) { + finish_waiting(waiting); + return true; + } + + if (body && waiting.observers.length < 2) + waiting.observers.push(make_body_start_observer(body, waiting)); +} + +function finish_waiting(waiting) { + if (waiting.finished) + return; + waiting.finished = true; + waiting.observers.forEach(observer => observer.disconnect()); + setTimeout(waiting.callback, 0); +} + +function _wait_for_head(doc, detached_html, callback) { + const waiting = {doc, detached_html, callback, observers: []}; + + if (try_body_started(waiting)) + return; + + waiting.observers = [make_body_start_observer(detached_html, waiting)]; + + wait_loaded(doc).then(() => finish_waiting(waiting)); +} + +function wait_for_head(doc, detached_html) { + return new Promise(cb => _wait_for_head(doc, detached_html, cb)); +} + +const blocked_str = "blocked"; + +function block_attribute(node, attr, ns=null, replace_with="") { + const [hasa, geta, seta, rema] = ["has", "get", "set", "remove"] + .map(m => (n, ...args) => typeof ns === "string" ? + n[`${m}AttributeNS`](ns, ...args) : n[`${m}Attribute`](...args)); + /* + * Disabling attributes by prepending `blocked-' allows them to still be + * relatively easily accessed in case they contain some useful data. + */ + const construct_name = [attr]; + while (hasa(node, construct_name.join(""))) + construct_name.unshift(blocked_str); + + while (construct_name.length > 1) { + construct_name.shift(); + const name = construct_name.join(""); + seta(node, `${blocked_str}-${name}`, geta(node, name)); + } + + rema(node, attr); + seta(node, attr, replace_with); +} + +/* + * Used to disable `<script>'s and `<meta>'s that have not yet been added to + * live DOM (doesn't work for those already added). + */ +function sanitize_meta(meta) { + if (csp_header_regex.test(meta.httpEquiv) && meta.content) + block_attribute(meta, "content"); +} + +function sanitize_script(script) { + script.haketilo_blocked_type = script.getAttribute("type"); + script.type = "text/plain"; +} + +/* + * Executed after `<script>' has been connected to the DOM, when it is no longer + * eligible for being executed by the browser. + */ +function desanitize_script(script) { + script.setAttribute("type", script.haketilo_blocked_type); + + if ([null, undefined].includes(script.haketilo_blocked_type)) + script.removeAttribute("type"); + + delete script.haketilo_blocked_type; +} + +const bad_url_reg = /^data:([^,;]*ml|unknown-content-type)|^javascript:/i; +function sanitize_element_urls(element) { + if (element.haketilo_sanitized_urls) + return; + + element.haketilo_sanitized_urls = true; + + for (const attr of [...element.attributes || []] + .filter(attr => /^(href|src|data)$/i.test(attr.localName)) + .filter(attr => bad_url_reg.test(attr.value))) { + const replacement_value = /^href$/i.test(attr.localName) ? + "javascript:void('blocked');" : "data:text/plain,blocked"; + block_attribute(element, attr.localName, attr.namespaceURI, + replacement_value); + } +} + +function sanitize_tree_urls(root) { + root.querySelectorAll("*[href], *[src], *[data]") + .forEach(sanitize_element_urls); +} + +function start_urls_sanitizing(doc) { + sanitize_tree_urls(doc); + if (!doc.content_loaded) { + const mutation_handler = + m => m.addedNodes.forEach(sanitize_element_urls); + const mo = new MutationObserver(ms => ms.forEach(mutation_handler)); + mo.observe(doc, {childList: true, subtree: true}); + wait_loaded(doc).then(() => mo.disconnect()); + } +} + +#IF MOZILLA +/* + * Normally, we block scripts with CSP. However, Mozilla does optimizations that + * cause part of the DOM to be loaded when our content scripts get to run. Thus, + * before the CSP rules we inject (for non-HTTP pages) become effective, we need + * to somehow block the execution of `<script>'s and intrinsics that were + * already there. Additionally, some browsers (IceCat 60) seem to have problems + * applying this CSP to non-inline `<scripts>' in certain scenarios. + */ +function prevent_script_execution(event) { + if (!event.target.haketilo_payload) + event.preventDefault(); +} +#ENDIF + +/* + * Here we block all scripts of a document which might be either and + * HTMLDocument or an XMLDocument. Modifying an XML document might disrupt + * Mozilla's XML preview. This is an unfortunate thing we have to accept for + * now. XML documents *have to* be sanitized as well because they might + * contain `<script>' tags (or on* attributes) with namespace declared as + * "http://www.w3.org/1999/xhtml" or "http://www.w3.org/2000/svg" which allows + * javascript execution. + */ +async function sanitize_document(doc, policy) { +#IF MOZILLA + /* + * Blocking of scripts that are in the DOM from the beginning. Needed for + * Mozilla. + */ + const listener_args = ["beforescriptexecute", prevent_script_execution]; + doc.addEventListener(...listener_args); + wait_loaded(doc).then(() => doc.removeEventListener(...listener_args)); + + for (const elem of doc.querySelectorAll("*")) { + [...elem.attributes].map(attr => attr.localName) + .filter(attr => /^on/.test(attr) && elem.wrappedJSObject[attr]) + .forEach(attr => elem.wrappedJSObject[attr] = null); + } + + sanitize_tree_urls(doc.documentElement); +#ENDIF + + /* + * Ensure our CSP rules are employed from the beginning. This CSP injection + * method is, when possible, going to be applied together with CSP rules + * injected using webRequest. + * Using elements namespaced as HTML makes this CSP injection also work for + * non-HTML documents. + */ + const source = `\ +<!DOCTYPE html> +<html> + <head> + <meta http-equiv="Content-Security-Policy" content="${policy.csp}"/> + </head> + <body> + Loading... + </body> +</html>`; + const html = + new DOMParser().parseFromString(source, "text/html").documentElement; + + /* + * Root node gets hijacked now, to be re-attached after <head> is loaded + * and sanitized. + */ + const root = doc.documentElement; + root.replaceWith(html); + + /* + * When we don't inject payload, we neither block document's CSP `<meta>' + * tags nor wait for `<head>' to be parsed. + */ + if (policy.payload) { + await wait_for_head(doc, root); + + root.querySelectorAll("head meta") + .forEach(m => sanitize_meta(m, policy)); + } + + root.querySelectorAll("script").forEach(s => sanitize_script(s, policy)); + sanitize_tree_urls(root); + html.replaceWith(root); + root.querySelectorAll("script").forEach(s => desanitize_script(s, policy)); + + start_urls_sanitizing(doc); +} + +async function _disable_service_workers() { + if (!navigator.serviceWorker) + return; + + const registrations = await navigator.serviceWorker.getRegistrations(); + if (registrations.length === 0) + return; + + console.warn("Service Workers detected on this page! Unregistering and reloading."); + + try { + await Promise.all(registrations.map(r => r.unregister())); + } finally { + location.reload(); + } + + /* Never actually return! */ + return new Promise(() => 0); +} + +/* + * Trying to use servce workers APIs might result in exceptions, for example + * when in a non-HTML document. Because of this, we wrap the function that does + * the actual work in a try {} block. + */ +async function disable_service_workers() { + try { + await _disable_service_workers() + } catch (e) { + console.debug("Exception thrown during an attempt to detect and disable service workers.", e); + } +} + +function enforce_blocking(policy) { + if (policy.allow) + return; + + return Promise.all([ + sanitize_document(document, policy), + disable_service_workers(), + wait_loaded(document) + ]); +} +#EXPORT enforce_blocking @@ -3,7 +3,7 @@ Upstream-Name: Haketilo Source: https://git.koszko.org/browser-extension/ Files: * -Copyright: 2021 Wojtek Kosior <koszko@koszko.org> +Copyright: 2021,2022 Wojtek Kosior <koszko@koszko.org> License: GPL-3+-javascript Comment: Wojtek Kosior promises not to sue even in case of violations of the license. @@ -78,14 +78,14 @@ License: AGPL-3+ Comment: Wojtek Kosior promises not to sue even in case of violations of the license. -Files: test/__init__.py test/unit/* +Files: test/__init__.py test/unit/* test/data/pages/scripts_to_block_1.html test/default_profiles/icecat_empty/extensions.json -Copyright: 2021 Wojtek Kosior <koszko@koszko.org> +Copyright: 2021,2022 Wojtek Kosior <koszko@koszko.org> License: CC0 Files: test/profiles.py test/script_loader.py test/unit/conftest.py test/extension_crafting.py test/unit/utils.py -Copyright: 2021 Wojtek Kosior <koszko@koszko.org> +Copyright: 2021,2022 Wojtek Kosior <koszko@koszko.org> License: GPL-3+ Comment: Wojtek Kosior promises not to sue even in case of violations of the license. diff --git a/test/data/pages/gotmyowndomain.html b/test/data/pages/gotmyowndomain.html index 42c26cc..390cbcc 100644 --- a/test/data/pages/gotmyowndomain.html +++ b/test/data/pages/gotmyowndomain.html @@ -2,7 +2,7 @@ <!-- SPDX-License-Identifier: AGPL-3.0-or-later - Sample testig page + Sample testing page This file is part of Haketilo. diff --git a/test/data/pages/gotmyowndomain_https.html b/test/data/pages/gotmyowndomain_https.html index 95c0be4..f602950 100644 --- a/test/data/pages/gotmyowndomain_https.html +++ b/test/data/pages/gotmyowndomain_https.html @@ -2,7 +2,7 @@ <!-- SPDX-License-Identifier: AGPL-3.0-or-later - Sample testig page to serve over HTTPS + Sample testing page to serve over HTTPS This file is part of Haketilo. @@ -23,7 +23,7 @@ --> <html> <head> - <meta name=charset value="latin1"> + <meta name="charset" value="latin1"> <title>Schrodinger's Document</title> </head> <body> diff --git a/test/data/pages/scripts_to_block_1.html b/test/data/pages/scripts_to_block_1.html new file mode 100644 index 0000000..6d868dd --- /dev/null +++ b/test/data/pages/scripts_to_block_1.html @@ -0,0 +1,44 @@ +<!DOCTYPE html> +<!-- + SPDX-License-Identifier: CC0-1.0 + + A testing page with various scripts that need to get blocked. + + This file is part of Haketilo. + + Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org> + + This program is free software: you can redistribute it and/or modify + it under the terms of the CC0 1.0 Universal License as published by + the Creative Commons Corporation. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + CC0 1.0 Universal License for more details. + --> +<html> + <head> + <script> + window.__run = [...(window.__run || []), 'inline']; + </script> + <!-- the one below shall not execute even when blocking is off... --> + <script type="application/json"> + window.__run = [...(window.__run || []), 'json']; + </script> + </head> + <body> + <button id="clickme1" + onclick="window.__run = [...(window.__run || []), 'on'];"> + Click Meee! + </button> + <a id="clickme2" + href="javascript:window.__run = [...(window.__run || []), 'href'];void(0);"> + Click Meee! + </a> + <iframe src="javascript:window.parent.__run = [...(window.parent.__run || []), 'src'];"> + </iframe> + <object data="javascript:window.__run = [...(window.__run || []), 'data'];"> + </object> + </body> +</html> diff --git a/test/unit/test_policy_enforcing.py b/test/unit/test_policy_enforcing.py new file mode 100644 index 0000000..2f7bc80 --- /dev/null +++ b/test/unit/test_policy_enforcing.py @@ -0,0 +1,110 @@ +# SPDX-License-Identifier: CC0-1.0 + +""" +Haketilo unit tests - enforcing script blocking policy from content script +""" + +# This file is part of Haketilo +# +# Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the CC0 1.0 Universal License as published by +# the Creative Commons Corporation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# CC0 1.0 Universal License for more details. + +import pytest +import json +import urllib.parse +from selenium.webdriver.support.ui import WebDriverWait + +from ..script_loader import load_script +from .utils import are_scripts_allowed + +# For simplicity, we'll use one nonce in all test cases. +nonce = 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' + +allow_policy = {'allow': True} +block_policy = { + 'allow': False, + 'csp': f"prefetch-src 'none'; script-src-attr 'none'; script-src 'none'; script-src-elem 'none'; frame-src http://* https://*;" +} +payload_policy = { + 'mapping': 'somemapping', + 'payload': {'identifier': 'someresource'}, + 'csp': f"prefetch-src 'none'; script-src-attr 'none'; script-src 'nonce-{nonce}'; script-src-elem 'nonce-{nonce}';" +} + +content_script = load_script('content/policy_enforcing.js') + ''';{ +const smuggled_what_to_do = /^[^#]*#?(.*)$/.exec(document.URL)[1]; +const what_to_do = smuggled_what_to_do === "" ? {allow: true} : + JSON.parse(decodeURIComponent(smuggled_what_to_do)); + +if (what_to_do.csp_off) { + const orig_DOMParser = window.DOMParser; + window.DOMParser = function() { + parser = new orig_DOMParser(); + this.parseFromString = () => parser.parseFromString('', 'text/html'); + } +} + +if (what_to_do.onbeforescriptexecute_off) + prevent_script_execution = () => {}; + +if (what_to_do.sanitize_script_off) { + sanitize_script = () => {}; + desanitize_script = () => {}; +} + +enforce_blocking(what_to_do.policy); +}''' + +def get(driver, page, what_to_do): + driver.get(page + '#' + urllib.parse.quote(json.dumps(what_to_do))) + driver.execute_script('window.before_reload = true; location.reload();') + done = lambda _: not driver.execute_script('return window.before_reload;') + WebDriverWait(driver, 10).until(done) + +@pytest.mark.ext_data({'content_script': content_script}) +@pytest.mark.usefixtures('webextension') +def test_policy_enforcing(driver, execute_in_page): + """ + A test case of sanitizing <script>s and <meta>s in pages. + """ + # First, see if scripts run when not blocked. + get(driver, 'https://gotmyowndoma.in/scripts_to_block_1.html', { + 'policy': allow_policy + }) + + for i in range(1, 3): + driver.find_element_by_id(f'clickme{i}').click() + + assert set(driver.execute_script('return window.__run || [];')) == \ + {'inline', 'on', 'href', 'src', 'data'} + + # Now, verify scripts don't run when blocked. + get(driver, 'https://gotmyowndoma.in/scripts_to_block_1.html', { + 'policy': block_policy + }) + + for i in range(1, 3): + driver.find_element_by_id(f'clickme{i}').click() + + assert set(driver.execute_script('return window.__run || [];')) == set() + assert not are_scripts_allowed(driver) + + # Now, verify only scripts with nonce can run when payload is injected. + get(driver, 'https://gotmyowndoma.in/scripts_to_block_1.html', { + 'policy': payload_policy + }) + + for i in range(1, 3): + driver.find_element_by_id(f'clickme{i}').click() + + assert set(driver.execute_script('return window.__run || [];')) == set() + assert not are_scripts_allowed(driver) + assert are_scripts_allowed(driver, nonce) diff --git a/test/unit/test_webrequest.py b/test/unit/test_webrequest.py index ae617aa..598f43b 100644 --- a/test/unit/test_webrequest.py +++ b/test/unit/test_webrequest.py @@ -22,6 +22,7 @@ from hashlib import sha256 import pytest from ..script_loader import load_script +from .utils import are_scripts_allowed def webrequest_js(): return (load_script('background/webrequest.js', @@ -50,19 +51,6 @@ def webrequest_js(): start("somesecret"); ''') -def are_scripts_allowed(driver, nonce=None): - return driver.execute_script( - ''' - document.scripts_allowed = false; - const script = document.createElement("script"); - script.innerHTML = "document.scripts_allowed = true;"; - if (arguments[0]) - script.setAttribute("nonce", arguments[0]); - document.head.append(script); - return document.scripts_allowed; - ''', - nonce) - @pytest.mark.ext_data({'background_script': webrequest_js}) @pytest.mark.usefixtures('webextension') def test_on_headers_received(driver, execute_in_page): diff --git a/test/unit/utils.py b/test/unit/utils.py index 96ebf60..8e04d91 100644 --- a/test/unit/utils.py +++ b/test/unit/utils.py @@ -187,3 +187,16 @@ def is_prime(n): return n > 1 and all([n % i != 0 for i in range(2, n)]) broker_js = lambda: load_script('background/broadcast_broker.js') + ';start();' + +def are_scripts_allowed(driver, nonce=None): + return driver.execute_script( + ''' + document.scripts_allowed = false; + const script = document.createElement("script"); + script.innerHTML = "document.scripts_allowed = true;"; + if (arguments[0]) + script.setAttribute("nonce", arguments[0]); + document.head.append(script); + return document.scripts_allowed; + ''', + nonce) diff --git a/test/world_wide_library.py b/test/world_wide_library.py index 4865b0a..f66a6d5 100644 --- a/test/world_wide_library.py +++ b/test/world_wide_library.py @@ -100,6 +100,9 @@ catalog = { 'https://gotmyowndoma.in/index.html': (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'), + 'https://gotmyowndoma.in/scripts_to_block_1.html': + (200, {}, here / 'data' / 'pages' / 'scripts_to_block_1.html'), + 'https://serve.scrip.ts/': serve_script, 'https://site.with.scripts.block.ed': |