aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWojtek Kosior <koszko@koszko.org>2022-01-17 11:20:52 +0100
committerWojtek Kosior <koszko@koszko.org>2022-01-17 11:24:56 +0100
commit7bedbcbd80eba9359d2e905b7693923c76ce563d (patch)
tree5059ac406e29b1b1e81639fc11316dde280fe218
parentede3a55ba22d2560ec7c0deebffd73623488acc1 (diff)
downloadbrowser-extension-7bedbcbd80eba9359d2e905b7693923c76ce563d.tar.gz
browser-extension-7bedbcbd80eba9359d2e905b7693923c76ce563d.zip
move policy enforcing code to a new file, include basic test
-rw-r--r--content/policy_enforcing.js326
-rw-r--r--copyright8
-rw-r--r--test/data/pages/gotmyowndomain.html2
-rw-r--r--test/data/pages/gotmyowndomain_https.html4
-rw-r--r--test/data/pages/scripts_to_block_1.html44
-rw-r--r--test/unit/test_policy_enforcing.py110
-rw-r--r--test/unit/test_webrequest.py14
-rw-r--r--test/unit/utils.py13
-rw-r--r--test/world_wide_library.py3
9 files changed, 504 insertions, 20 deletions
diff --git a/content/policy_enforcing.js b/content/policy_enforcing.js
new file mode 100644
index 0000000..25c8b6b
--- /dev/null
+++ b/content/policy_enforcing.js
@@ -0,0 +1,326 @@
+/**
+ * This file is part of Haketilo.
+ *
+ * Function: Enforcing script blocking rules on a given page, working from a
+ * content script.
+ *
+ * Copyright (C) 2021,2022 Wojtek Kosior
+ * Copyright (C) 2021 jahoti
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * As additional permission under GNU GPL version 3 section 7, you
+ * may distribute forms of that code without the copy of the GNU
+ * GPL normally required by section 4, provided you include this
+ * license notice and, in case of non-source distribution, a URL
+ * through which recipients can access the Corresponding Source.
+ * If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not
+ * obligated to do so. If you do not wish to do so, delete this
+ * exception statement from your version.
+ *
+ * As a special exception to the GPL, any HTML file which merely
+ * makes function calls to this code, and for that purpose
+ * includes it by reference shall be deemed a separate work for
+ * copyright law purposes. If you modify this code, you may extend
+ * this exception to your version of the code, but you are not
+ * obligated to do so. If you do not wish to do so, delete this
+ * exception statement from your version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * I, Wojtek Kosior, thereby promise not to sue for violation of this file's
+ * license. Although I request that you do not make use of this code in a
+ * proprietary program, I am not going to enforce this in court.
+ */
+
+#FROM common/misc.js IMPORT gen_nonce
+
+document.content_loaded = document.readyState === "complete";
+const wait_loaded = e => e.content_loaded ? Promise.resolve() :
+ new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true}));
+
+wait_loaded(document).then(() => document.content_loaded = true);
+
+/*
+ * In the case of HTML documents:
+ * 1. When injecting some payload we need to sanitize <meta> CSP tags before
+ * they reach the document.
+ * 2. Only <meta> tags inside <head> are considered valid by the browser and
+ * need to be considered.
+ * 3. We want to detach <html> from document, wait until its <head> completes
+ * loading, sanitize it and re-attach <html>.
+ * 4. We shall wait for anything to appear in or after <body> and take that as
+ * a sign <head> has finished loading.
+ * 5. Otherwise, getting the `DOMContentLoaded' event on the document shall also
+ * be a sign that <head> is fully loaded.
+ */
+
+function make_body_start_observer(DOM_element, waiting) {
+ const observer = new MutationObserver(() => try_body_started(waiting));
+ observer.observe(DOM_element, {childList: true});
+ return observer;
+}
+
+function try_body_started(waiting) {
+ const body = waiting.detached_html.querySelector("body");
+
+ if ((body && (body.firstChild || body.nextSibling)) ||
+ waiting.doc.documentElement.nextSibling) {
+ finish_waiting(waiting);
+ return true;
+ }
+
+ if (body && waiting.observers.length < 2)
+ waiting.observers.push(make_body_start_observer(body, waiting));
+}
+
+function finish_waiting(waiting) {
+ if (waiting.finished)
+ return;
+ waiting.finished = true;
+ waiting.observers.forEach(observer => observer.disconnect());
+ setTimeout(waiting.callback, 0);
+}
+
+function _wait_for_head(doc, detached_html, callback) {
+ const waiting = {doc, detached_html, callback, observers: []};
+
+ if (try_body_started(waiting))
+ return;
+
+ waiting.observers = [make_body_start_observer(detached_html, waiting)];
+
+ wait_loaded(doc).then(() => finish_waiting(waiting));
+}
+
+function wait_for_head(doc, detached_html) {
+ return new Promise(cb => _wait_for_head(doc, detached_html, cb));
+}
+
+const blocked_str = "blocked";
+
+function block_attribute(node, attr, ns=null, replace_with="") {
+ const [hasa, geta, seta, rema] = ["has", "get", "set", "remove"]
+ .map(m => (n, ...args) => typeof ns === "string" ?
+ n[`${m}AttributeNS`](ns, ...args) : n[`${m}Attribute`](...args));
+ /*
+ * Disabling attributes by prepending `blocked-' allows them to still be
+ * relatively easily accessed in case they contain some useful data.
+ */
+ const construct_name = [attr];
+ while (hasa(node, construct_name.join("")))
+ construct_name.unshift(blocked_str);
+
+ while (construct_name.length > 1) {
+ construct_name.shift();
+ const name = construct_name.join("");
+ seta(node, `${blocked_str}-${name}`, geta(node, name));
+ }
+
+ rema(node, attr);
+ seta(node, attr, replace_with);
+}
+
+/*
+ * Used to disable `<script>'s and `<meta>'s that have not yet been added to
+ * live DOM (doesn't work for those already added).
+ */
+function sanitize_meta(meta) {
+ if (csp_header_regex.test(meta.httpEquiv) && meta.content)
+ block_attribute(meta, "content");
+}
+
+function sanitize_script(script) {
+ script.haketilo_blocked_type = script.getAttribute("type");
+ script.type = "text/plain";
+}
+
+/*
+ * Executed after `<script>' has been connected to the DOM, when it is no longer
+ * eligible for being executed by the browser.
+ */
+function desanitize_script(script) {
+ script.setAttribute("type", script.haketilo_blocked_type);
+
+ if ([null, undefined].includes(script.haketilo_blocked_type))
+ script.removeAttribute("type");
+
+ delete script.haketilo_blocked_type;
+}
+
+const bad_url_reg = /^data:([^,;]*ml|unknown-content-type)|^javascript:/i;
+function sanitize_element_urls(element) {
+ if (element.haketilo_sanitized_urls)
+ return;
+
+ element.haketilo_sanitized_urls = true;
+
+ for (const attr of [...element.attributes || []]
+ .filter(attr => /^(href|src|data)$/i.test(attr.localName))
+ .filter(attr => bad_url_reg.test(attr.value))) {
+ const replacement_value = /^href$/i.test(attr.localName) ?
+ "javascript:void('blocked');" : "data:text/plain,blocked";
+ block_attribute(element, attr.localName, attr.namespaceURI,
+ replacement_value);
+ }
+}
+
+function sanitize_tree_urls(root) {
+ root.querySelectorAll("*[href], *[src], *[data]")
+ .forEach(sanitize_element_urls);
+}
+
+function start_urls_sanitizing(doc) {
+ sanitize_tree_urls(doc);
+ if (!doc.content_loaded) {
+ const mutation_handler =
+ m => m.addedNodes.forEach(sanitize_element_urls);
+ const mo = new MutationObserver(ms => ms.forEach(mutation_handler));
+ mo.observe(doc, {childList: true, subtree: true});
+ wait_loaded(doc).then(() => mo.disconnect());
+ }
+}
+
+#IF MOZILLA
+/*
+ * Normally, we block scripts with CSP. However, Mozilla does optimizations that
+ * cause part of the DOM to be loaded when our content scripts get to run. Thus,
+ * before the CSP rules we inject (for non-HTTP pages) become effective, we need
+ * to somehow block the execution of `<script>'s and intrinsics that were
+ * already there. Additionally, some browsers (IceCat 60) seem to have problems
+ * applying this CSP to non-inline `<scripts>' in certain scenarios.
+ */
+function prevent_script_execution(event) {
+ if (!event.target.haketilo_payload)
+ event.preventDefault();
+}
+#ENDIF
+
+/*
+ * Here we block all scripts of a document which might be either and
+ * HTMLDocument or an XMLDocument. Modifying an XML document might disrupt
+ * Mozilla's XML preview. This is an unfortunate thing we have to accept for
+ * now. XML documents *have to* be sanitized as well because they might
+ * contain `<script>' tags (or on* attributes) with namespace declared as
+ * "http://www.w3.org/1999/xhtml" or "http://www.w3.org/2000/svg" which allows
+ * javascript execution.
+ */
+async function sanitize_document(doc, policy) {
+#IF MOZILLA
+ /*
+ * Blocking of scripts that are in the DOM from the beginning. Needed for
+ * Mozilla.
+ */
+ const listener_args = ["beforescriptexecute", prevent_script_execution];
+ doc.addEventListener(...listener_args);
+ wait_loaded(doc).then(() => doc.removeEventListener(...listener_args));
+
+ for (const elem of doc.querySelectorAll("*")) {
+ [...elem.attributes].map(attr => attr.localName)
+ .filter(attr => /^on/.test(attr) && elem.wrappedJSObject[attr])
+ .forEach(attr => elem.wrappedJSObject[attr] = null);
+ }
+
+ sanitize_tree_urls(doc.documentElement);
+#ENDIF
+
+ /*
+ * Ensure our CSP rules are employed from the beginning. This CSP injection
+ * method is, when possible, going to be applied together with CSP rules
+ * injected using webRequest.
+ * Using elements namespaced as HTML makes this CSP injection also work for
+ * non-HTML documents.
+ */
+ const source = `\
+<!DOCTYPE html>
+<html>
+ <head>
+ <meta http-equiv="Content-Security-Policy" content="${policy.csp}"/>
+ </head>
+ <body>
+ Loading...
+ </body>
+</html>`;
+ const html =
+ new DOMParser().parseFromString(source, "text/html").documentElement;
+
+ /*
+ * Root node gets hijacked now, to be re-attached after <head> is loaded
+ * and sanitized.
+ */
+ const root = doc.documentElement;
+ root.replaceWith(html);
+
+ /*
+ * When we don't inject payload, we neither block document's CSP `<meta>'
+ * tags nor wait for `<head>' to be parsed.
+ */
+ if (policy.payload) {
+ await wait_for_head(doc, root);
+
+ root.querySelectorAll("head meta")
+ .forEach(m => sanitize_meta(m, policy));
+ }
+
+ root.querySelectorAll("script").forEach(s => sanitize_script(s, policy));
+ sanitize_tree_urls(root);
+ html.replaceWith(root);
+ root.querySelectorAll("script").forEach(s => desanitize_script(s, policy));
+
+ start_urls_sanitizing(doc);
+}
+
+async function _disable_service_workers() {
+ if (!navigator.serviceWorker)
+ return;
+
+ const registrations = await navigator.serviceWorker.getRegistrations();
+ if (registrations.length === 0)
+ return;
+
+ console.warn("Service Workers detected on this page! Unregistering and reloading.");
+
+ try {
+ await Promise.all(registrations.map(r => r.unregister()));
+ } finally {
+ location.reload();
+ }
+
+ /* Never actually return! */
+ return new Promise(() => 0);
+}
+
+/*
+ * Trying to use servce workers APIs might result in exceptions, for example
+ * when in a non-HTML document. Because of this, we wrap the function that does
+ * the actual work in a try {} block.
+ */
+async function disable_service_workers() {
+ try {
+ await _disable_service_workers()
+ } catch (e) {
+ console.debug("Exception thrown during an attempt to detect and disable service workers.", e);
+ }
+}
+
+function enforce_blocking(policy) {
+ if (policy.allow)
+ return;
+
+ return Promise.all([
+ sanitize_document(document, policy),
+ disable_service_workers(),
+ wait_loaded(document)
+ ]);
+}
+#EXPORT enforce_blocking
diff --git a/copyright b/copyright
index 2541bde..964a9a0 100644
--- a/copyright
+++ b/copyright
@@ -3,7 +3,7 @@ Upstream-Name: Haketilo
Source: https://git.koszko.org/browser-extension/
Files: *
-Copyright: 2021 Wojtek Kosior <koszko@koszko.org>
+Copyright: 2021,2022 Wojtek Kosior <koszko@koszko.org>
License: GPL-3+-javascript
Comment: Wojtek Kosior promises not to sue even in case of violations
of the license.
@@ -78,14 +78,14 @@ License: AGPL-3+
Comment: Wojtek Kosior promises not to sue even in case of violations
of the license.
-Files: test/__init__.py test/unit/*
+Files: test/__init__.py test/unit/* test/data/pages/scripts_to_block_1.html
test/default_profiles/icecat_empty/extensions.json
-Copyright: 2021 Wojtek Kosior <koszko@koszko.org>
+Copyright: 2021,2022 Wojtek Kosior <koszko@koszko.org>
License: CC0
Files: test/profiles.py test/script_loader.py test/unit/conftest.py
test/extension_crafting.py test/unit/utils.py
-Copyright: 2021 Wojtek Kosior <koszko@koszko.org>
+Copyright: 2021,2022 Wojtek Kosior <koszko@koszko.org>
License: GPL-3+
Comment: Wojtek Kosior promises not to sue even in case of violations
of the license.
diff --git a/test/data/pages/gotmyowndomain.html b/test/data/pages/gotmyowndomain.html
index 42c26cc..390cbcc 100644
--- a/test/data/pages/gotmyowndomain.html
+++ b/test/data/pages/gotmyowndomain.html
@@ -2,7 +2,7 @@
<!--
SPDX-License-Identifier: AGPL-3.0-or-later
- Sample testig page
+ Sample testing page
This file is part of Haketilo.
diff --git a/test/data/pages/gotmyowndomain_https.html b/test/data/pages/gotmyowndomain_https.html
index 95c0be4..f602950 100644
--- a/test/data/pages/gotmyowndomain_https.html
+++ b/test/data/pages/gotmyowndomain_https.html
@@ -2,7 +2,7 @@
<!--
SPDX-License-Identifier: AGPL-3.0-or-later
- Sample testig page to serve over HTTPS
+ Sample testing page to serve over HTTPS
This file is part of Haketilo.
@@ -23,7 +23,7 @@
-->
<html>
<head>
- <meta name=charset value="latin1">
+ <meta name="charset" value="latin1">
<title>Schrodinger's Document</title>
</head>
<body>
diff --git a/test/data/pages/scripts_to_block_1.html b/test/data/pages/scripts_to_block_1.html
new file mode 100644
index 0000000..6d868dd
--- /dev/null
+++ b/test/data/pages/scripts_to_block_1.html
@@ -0,0 +1,44 @@
+<!DOCTYPE html>
+<!--
+ SPDX-License-Identifier: CC0-1.0
+
+ A testing page with various scripts that need to get blocked.
+
+ This file is part of Haketilo.
+
+ Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the CC0 1.0 Universal License as published by
+ the Creative Commons Corporation.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ CC0 1.0 Universal License for more details.
+ -->
+<html>
+ <head>
+ <script>
+ window.__run = [...(window.__run || []), 'inline'];
+ </script>
+ <!-- the one below shall not execute even when blocking is off... -->
+ <script type="application/json">
+ window.__run = [...(window.__run || []), 'json'];
+ </script>
+ </head>
+ <body>
+ <button id="clickme1"
+ onclick="window.__run = [...(window.__run || []), 'on'];">
+ Click Meee!
+ </button>
+ <a id="clickme2"
+ href="javascript:window.__run = [...(window.__run || []), 'href'];void(0);">
+ Click Meee!
+ </a>
+ <iframe src="javascript:window.parent.__run = [...(window.parent.__run || []), 'src'];">
+ </iframe>
+ <object data="javascript:window.__run = [...(window.__run || []), 'data'];">
+ </object>
+ </body>
+</html>
diff --git a/test/unit/test_policy_enforcing.py b/test/unit/test_policy_enforcing.py
new file mode 100644
index 0000000..2f7bc80
--- /dev/null
+++ b/test/unit/test_policy_enforcing.py
@@ -0,0 +1,110 @@
+# SPDX-License-Identifier: CC0-1.0
+
+"""
+Haketilo unit tests - enforcing script blocking policy from content script
+"""
+
+# This file is part of Haketilo
+#
+# Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the CC0 1.0 Universal License as published by
+# the Creative Commons Corporation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# CC0 1.0 Universal License for more details.
+
+import pytest
+import json
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+
+from ..script_loader import load_script
+from .utils import are_scripts_allowed
+
+# For simplicity, we'll use one nonce in all test cases.
+nonce = 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'
+
+allow_policy = {'allow': True}
+block_policy = {
+ 'allow': False,
+ 'csp': f"prefetch-src 'none'; script-src-attr 'none'; script-src 'none'; script-src-elem 'none'; frame-src http://* https://*;"
+}
+payload_policy = {
+ 'mapping': 'somemapping',
+ 'payload': {'identifier': 'someresource'},
+ 'csp': f"prefetch-src 'none'; script-src-attr 'none'; script-src 'nonce-{nonce}'; script-src-elem 'nonce-{nonce}';"
+}
+
+content_script = load_script('content/policy_enforcing.js') + ''';{
+const smuggled_what_to_do = /^[^#]*#?(.*)$/.exec(document.URL)[1];
+const what_to_do = smuggled_what_to_do === "" ? {allow: true} :
+ JSON.parse(decodeURIComponent(smuggled_what_to_do));
+
+if (what_to_do.csp_off) {
+ const orig_DOMParser = window.DOMParser;
+ window.DOMParser = function() {
+ parser = new orig_DOMParser();
+ this.parseFromString = () => parser.parseFromString('', 'text/html');
+ }
+}
+
+if (what_to_do.onbeforescriptexecute_off)
+ prevent_script_execution = () => {};
+
+if (what_to_do.sanitize_script_off) {
+ sanitize_script = () => {};
+ desanitize_script = () => {};
+}
+
+enforce_blocking(what_to_do.policy);
+}'''
+
+def get(driver, page, what_to_do):
+ driver.get(page + '#' + urllib.parse.quote(json.dumps(what_to_do)))
+ driver.execute_script('window.before_reload = true; location.reload();')
+ done = lambda _: not driver.execute_script('return window.before_reload;')
+ WebDriverWait(driver, 10).until(done)
+
+@pytest.mark.ext_data({'content_script': content_script})
+@pytest.mark.usefixtures('webextension')
+def test_policy_enforcing(driver, execute_in_page):
+ """
+ A test case of sanitizing <script>s and <meta>s in pages.
+ """
+ # First, see if scripts run when not blocked.
+ get(driver, 'https://gotmyowndoma.in/scripts_to_block_1.html', {
+ 'policy': allow_policy
+ })
+
+ for i in range(1, 3):
+ driver.find_element_by_id(f'clickme{i}').click()
+
+ assert set(driver.execute_script('return window.__run || [];')) == \
+ {'inline', 'on', 'href', 'src', 'data'}
+
+ # Now, verify scripts don't run when blocked.
+ get(driver, 'https://gotmyowndoma.in/scripts_to_block_1.html', {
+ 'policy': block_policy
+ })
+
+ for i in range(1, 3):
+ driver.find_element_by_id(f'clickme{i}').click()
+
+ assert set(driver.execute_script('return window.__run || [];')) == set()
+ assert not are_scripts_allowed(driver)
+
+ # Now, verify only scripts with nonce can run when payload is injected.
+ get(driver, 'https://gotmyowndoma.in/scripts_to_block_1.html', {
+ 'policy': payload_policy
+ })
+
+ for i in range(1, 3):
+ driver.find_element_by_id(f'clickme{i}').click()
+
+ assert set(driver.execute_script('return window.__run || [];')) == set()
+ assert not are_scripts_allowed(driver)
+ assert are_scripts_allowed(driver, nonce)
diff --git a/test/unit/test_webrequest.py b/test/unit/test_webrequest.py
index ae617aa..598f43b 100644
--- a/test/unit/test_webrequest.py
+++ b/test/unit/test_webrequest.py
@@ -22,6 +22,7 @@ from hashlib import sha256
import pytest
from ..script_loader import load_script
+from .utils import are_scripts_allowed
def webrequest_js():
return (load_script('background/webrequest.js',
@@ -50,19 +51,6 @@ def webrequest_js():
start("somesecret");
''')
-def are_scripts_allowed(driver, nonce=None):
- return driver.execute_script(
- '''
- document.scripts_allowed = false;
- const script = document.createElement("script");
- script.innerHTML = "document.scripts_allowed = true;";
- if (arguments[0])
- script.setAttribute("nonce", arguments[0]);
- document.head.append(script);
- return document.scripts_allowed;
- ''',
- nonce)
-
@pytest.mark.ext_data({'background_script': webrequest_js})
@pytest.mark.usefixtures('webextension')
def test_on_headers_received(driver, execute_in_page):
diff --git a/test/unit/utils.py b/test/unit/utils.py
index 96ebf60..8e04d91 100644
--- a/test/unit/utils.py
+++ b/test/unit/utils.py
@@ -187,3 +187,16 @@ def is_prime(n):
return n > 1 and all([n % i != 0 for i in range(2, n)])
broker_js = lambda: load_script('background/broadcast_broker.js') + ';start();'
+
+def are_scripts_allowed(driver, nonce=None):
+ return driver.execute_script(
+ '''
+ document.scripts_allowed = false;
+ const script = document.createElement("script");
+ script.innerHTML = "document.scripts_allowed = true;";
+ if (arguments[0])
+ script.setAttribute("nonce", arguments[0]);
+ document.head.append(script);
+ return document.scripts_allowed;
+ ''',
+ nonce)
diff --git a/test/world_wide_library.py b/test/world_wide_library.py
index 4865b0a..f66a6d5 100644
--- a/test/world_wide_library.py
+++ b/test/world_wide_library.py
@@ -100,6 +100,9 @@ catalog = {
'https://gotmyowndoma.in/index.html':
(200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'),
+ 'https://gotmyowndoma.in/scripts_to_block_1.html':
+ (200, {}, here / 'data' / 'pages' / 'scripts_to_block_1.html'),
+
'https://serve.scrip.ts/': serve_script,
'https://site.with.scripts.block.ed':