From 702eefd252a112375c2da6a9ae4b39915fc2dbf4 Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Fri, 31 Dec 2021 14:23:28 +0100 Subject: utilize Pattern Tree to decide the policy to use and modify HTTP response headers according to that policy This commit also enhances the build script so that preprocessor conditionals can now use operators '&&' and '||'. The features being developed are not yet included in the actual Haketilo build. Some of the new source files contain similar functionality to other ones already existing in the source tree. At some point the latter will be removed. --- Makefile.in | 1 + background/patterns_query_manager.js | 27 +++-- background/policy_injector.js | 12 +- background/webrequest.js | 189 ++++++++++++++++++++++++++++++ common/indexeddb.js | 66 +++++++---- common/misc.js | 11 -- common/patterns_query_tree.js | 2 + common/policy.js | 106 +++++++++++++++++ compute_scripts.awk | 42 ++++++- content/main.js | 12 +- manifest.json | 4 +- test/extension_crafting.py | 1 + test/profiles.py | 30 ++--- test/script_loader.py | 2 +- test/unit/conftest.py | 73 ++++++------ test/unit/test_indexeddb.py | 193 +++++++++++++++++++------------ test/unit/test_patterns_query_manager.py | 39 ++++--- test/unit/test_policy_deciding.py | 121 +++++++++++++++++++ test/unit/test_webrequest.py | 77 ++++++++++++ test/world_wide_library.py | 98 +++++++++++++++- 20 files changed, 903 insertions(+), 203 deletions(-) create mode 100644 background/webrequest.js create mode 100644 common/policy.js create mode 100644 test/unit/test_policy_deciding.py create mode 100644 test/unit/test_webrequest.py diff --git a/Makefile.in b/Makefile.in index 5291299..bf0fdec 100644 --- a/Makefile.in +++ b/Makefile.in @@ -83,6 +83,7 @@ clean mostlyclean: rm -rf test/certs rm -rf $$(find . -name geckodriver.log) rm -rf $$(find . -type d -name __pycache__) + rm -rf $$(find . -type d -name injected_scripts) distclean: clean rm -f Makefile config.status record.conf diff --git a/background/patterns_query_manager.js b/background/patterns_query_manager.js index cb14cb1..e364668 100644 --- a/background/patterns_query_manager.js +++ b/background/patterns_query_manager.js @@ -45,13 +45,18 @@ #IMPORT common/patterns_query_tree.js AS pqt #IMPORT common/indexeddb.js AS haketilodb +#IF MOZILLA || MV3 #FROM common/browser.js IMPORT browser +#ENDIF + +let secret; const tree = pqt.make(); #EXPORT tree const current_mappings = new Map(); +#IF MOZILLA || MV3 let registered_script = null; let script_update_occuring = false; let script_update_needed; @@ -67,6 +72,7 @@ async function update_content_script() script_update_needed = false; const code = `\ +this.haketilo_secret = ${secret}; this.haketilo_pattern_tree = ${JSON.stringify(tree)}; if (this.haketilo_content_script_main) haketilo_content_script_main();`; @@ -89,36 +95,43 @@ if (this.haketilo_content_script_main) function register_mapping(mapping) { - for (const pattern in mapping.payloads) - pqt.register(tree, pattern, mapping.identifier, mapping); + for (const [pattern, resource] of Object.entries(mapping.payloads)) + pqt.register(tree, pattern, mapping.identifier, resource); current_mappings.set(mapping.identifier, mapping); } +#ENDIF function mapping_changed(change) { console.log('mapping changes!', arguments); - const old_version = current_mappings.get(change.identifier); + const old_version = current_mappings.get(change.key); if (old_version !== undefined) { for (const pattern in old_version.payloads) - pqt.deregister(tree, pattern, change.identifier); + pqt.deregister(tree, pattern, change.key); - current_mappings.delete(change.identifier); + current_mappings.delete(change.key); } if (change.new_val !== undefined) register_mapping(change.new_val); +#IF MOZILLA || MV3 script_update_needed = true; setTimeout(update_content_script, 0); +#ENDIF } -async function start() +async function start(secret_) { + secret = secret_; + const [tracking, initial_mappings] = - await haketilodb.track_mappings(mapping_changed); + await haketilodb.track.mappings(mapping_changed); initial_mappings.forEach(register_mapping); +#IF MOZILLA || MV3 script_update_needed = true; await update_content_script(); +#ENDIF } #EXPORT start diff --git a/background/policy_injector.js b/background/policy_injector.js index 2544e8e..b1fc733 100644 --- a/background/policy_injector.js +++ b/background/policy_injector.js @@ -43,13 +43,23 @@ * proprietary program, I am not going to enforce this in court. */ -#FROM common/misc.js IMPORT make_csp_rule, csp_header_regex +#FROM common/misc.js IMPORT csp_header_regex /* Re-enable the import below once nonce stuff here is ready */ #IF NEVER #FROM common/misc.js IMPORT gen_nonce #ENDIF +/* CSP rule that blocks scripts according to policy's needs. */ +function make_csp_rule(policy) +{ + let rule = "prefetch-src 'none'; script-src-attr 'none';"; + const script_src = policy.nonce !== undefined ? + `'nonce-${policy.nonce}'` : "'none'"; + rule += ` script-src ${script_src}; script-src-elem ${script_src};`; + return rule; +} + function inject_csp_headers(headers, policy) { let csp_headers; diff --git a/background/webrequest.js b/background/webrequest.js new file mode 100644 index 0000000..e32947a --- /dev/null +++ b/background/webrequest.js @@ -0,0 +1,189 @@ +/** + * This file is part of Haketilo. + * + * Function: Modify HTTP traffic usng webRequest API. + * + * Copyright (C) 2021 Wojtek Kosior + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * As additional permission under GNU GPL version 3 section 7, you + * may distribute forms of that code without the copy of the GNU + * GPL normally required by section 4, provided you include this + * license notice and, in case of non-source distribution, a URL + * through which recipients can access the Corresponding Source. + * If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not + * obligated to do so. If you do not wish to do so, delete this + * exception statement from your version. + * + * As a special exception to the GPL, any HTML file which merely + * makes function calls to this code, and for that purpose + * includes it by reference shall be deemed a separate work for + * copyright law purposes. If you modify this code, you may extend + * this exception to your version of the code, but you are not + * obligated to do so. If you do not wish to do so, delete this + * exception statement from your version. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * I, Wojtek Kosior, thereby promise not to sue for violation of this file's + * license. Although I request that you do not make use this code in a + * proprietary program, I am not going to enforce this in court. + */ + +#IMPORT common/indexeddb.js AS haketilodb +#IF MOZILLA +#IMPORT background/stream_filter.js +#ENDIF + +#FROM common/browser.js IMPORT browser +#FROM common/misc.js IMPORT is_privileged_url, csp_header_regex +#FROM common/policy.js IMPORT decide_policy + +#FROM background/patterns_query_manager.js IMPORT tree + +let secret; + +let default_allow = false; + +async function track_default_allow() +{ + const set_val = ch => default_allow = (ch.new_val || {}).value; + const [tracking, settings] = await haketilodb.track.settings(set_val); + for (const setting of settings) { + if (setting.name === "default_allow") + default_allow = setting.value; + } +} + +function on_headers_received(details) +{ + const url = details.url; + if (is_privileged_url(details.url)) + return; + + let headers = details.responseHeaders; + + const policy = decide_policy(tree, details.url, default_allow, secret); + if (policy.allow) + return; + + if (policy.payload) + headers = headers.filter(h => !csp_header_regex.test(h.name)); + + headers.push({name: "Content-Security-Policy", value: policy.csp}); + +#IF MOZILLA + let skip = false; + for (const header of headers) { + if (header.name.toLowerCase().trim() !== "content-disposition") + continue; + + if (/^\s*attachment\s*(;.*)$/i.test(header.value)) { + skip = true; + } else { + skip = false; + break; + } + } + skip = skip || (details.statusCode >= 300 && details.statusCode < 400); + + if (!skip) + headers = stream_filter.apply(details, headers, policy); +#ENDIF + + return {responseHeaders: headers}; +} + +#IF CHROMIUM && MV2 +const request_url_regex = /^[^?]*\?url=(.*)$/; +const redirect_url_template = browser.runtime.getURL("dummy") + "?settings="; + +function on_before_request(details) +{ + /* + * Content script will make a synchronous XmlHttpRequest to extension's + * `dummy` file to query settings for given URL. We smuggle that + * information in query parameter of the URL we redirect to. + * A risk of fingerprinting arises if a page with script execution allowed + * guesses the dummy file URL and makes an AJAX call to it. It is currently + * a problem in ManifestV2 Chromium-family port of Haketilo because Chromium + * uses predictable URLs for web-accessible resources. We plan to fix it in + * the future ManifestV3 port. + */ + if (details.type !== "xmlhttprequest") + return {cancel: true}; + +#IF DEBUG + console.debug(`Settings queried using XHR for '${details.url}'.`); +#ENDIF + + /* + * request_url should be of the following format: + * ?url= + */ + const match = request_url_regex.exec(details.url); + if (match) { + const queried_url = decodeURIComponent(match[1]); + + if (details.initiator && !queried_url.startsWith(details.initiator)) { + console.warn(`Blocked suspicious query of '${url}' by '${details.initiator}'. This might be the result of page fingerprinting the browser.`); + return {cancel: true}; + } + + const policy = decide_policy(tree, details.url, default_allow, secret); + if (!policy.error) { + const encoded_policy = encodeURIComponent(JSON.stringify(policy)); + return {redirectUrl: redirect_url_template + encoded_policy}; + } + } + + console.warn(`Bad request! Expected ${browser.runtime.getURL("dummy")}?url=. Got ${request_url}. This might be the result of page fingerprinting the browser.`); + + return {cancel: true}; +} + +const all_types = [ + "main_frame", "sub_frame", "stylesheet", "script", "image", "font", + "object", "xmlhttprequest", "ping", "csp_report", "media", "websocket", + "other", "main_frame", "sub_frame" +]; +#ENDIF + +async function start(secret_) +{ + secret = secret_; + +#IF CHROMIUM + const extra_opts = ["blocking", "extraHeaders"]; +#ELSE + const extra_opts = ["blocking"]; +#ENDIF + + browser.webRequest.onHeadersReceived.addListener( + on_headers_received, + {urls: [""], types: ["main_frame", "sub_frame"]}, + extra_opts.concat("responseHeaders") + ); + +#IF CHROMIUM && MV2 + browser.webRequest.onBeforeRequest.addListener( + on_before_request, + {urls: [browser.runtime.getURL("dummy") + "*"], types: all_types}, + extra_opts + ); +#ENDIF + + await track_default_allow(); +} +#EXPORT start diff --git a/common/indexeddb.js b/common/indexeddb.js index 096391a..e54d1ca 100644 --- a/common/indexeddb.js +++ b/common/indexeddb.js @@ -62,7 +62,8 @@ const stores = [ ["files", {keyPath: "hash_key"}], ["file_uses", {keyPath: "hash_key"}], ["resources", {keyPath: "identifier"}], - ["mappings", {keyPath: "identifier"}] + ["mappings", {keyPath: "identifier"}], + ["settings", {keyPath: "name"}] ]; let db = null; @@ -207,7 +208,7 @@ async function incr_file_uses(context, file_ref, by=1) const decr_file_uses = (ctx, file_ref) => incr_file_uses(ctx, file_ref, -1); -async function finalize_items_transaction(context) +async function finalize_transaction(context) { for (const uses of Object.values(context.file_uses)) { if (uses.uses < 0) @@ -248,7 +249,7 @@ async function finalize_items_transaction(context) return context.result; } -#EXPORT finalize_items_transaction +#EXPORT finalize_transaction /* * How a sample data argument to the function below might look like: @@ -304,7 +305,7 @@ async function _save_items(resources, mappings, context) for (const item of resources.concat(mappings)) await save_item(item, context); - await finalize_items_transaction(context); + await finalize_transaction(context); } /* @@ -314,9 +315,9 @@ async function _save_items(resources, mappings, context) * object with keys being of the form `sha256-`. * * context should be one returned from start_items_transaction() and should be - * later passed to finalize_items_transaction() so that files depended on are - * added to IndexedDB and files that are no longer depended on after this - * operation are removed from IndexedDB. + * later passed to finalize_transaction() so that files depended on are added to + * IndexedDB and files that are no longer depended on after this operation are + * removed from IndexedDB. */ async function save_item(item, context) { @@ -346,9 +347,9 @@ async function _remove_item(store_name, identifier, context) * Remove definition of a resource/mapping from IndexedDB. * * context should be one returned from start_items_transaction() and should be - * later passed to finalize_items_transaction() so that files depended on are - * added to IndexedDB and files that are no longer depended on after this - * operation are removed from IndexedDB. + * later passed to finalize_transaction() so that files depended on are added to + * IndexedDB and files that are no longer depended on after this operation are + * removed from IndexedDB. */ async function remove_item(store_name, identifier, context) { @@ -363,26 +364,49 @@ const remove_resource = (id, ctx) => remove_item("resources", id, ctx); const remove_mapping = (id, ctx) => remove_item("mappings", id, ctx); #EXPORT remove_mapping +/* A simplified kind of transaction for modifying just the "settings" store. */ +async function start_settings_transaction() +{ + const db = await get_db(); + return make_context(db.transaction("settings", "readwrite"), {}); +} + +async function set_setting(name, value) +{ + const context = await start_settings_transaction(); + broadcast.prepare(context.sender, `idb_changes_settings`, name); + await idb_put(context.transaction, "settings", {name, value}); + return finalize_transaction(context); +} +#EXPORT set_setting + +async function get_setting(name) +{ + const transaction = (await get_db()).transaction("settings"); + return ((await idb_get(transaction, "settings", name)) || {}).value; +} +#EXPORT get_setting + /* Callback used when listening to broadcasts while tracking db changes. */ -async function track_change(tracking, identifier) +async function track_change(tracking, key) { const transaction = (await get_db()).transaction([tracking.store_name]); - const new_val = await idb_get(transaction, tracking.store_name, identifier); + const new_val = await idb_get(transaction, tracking.store_name, key); - tracking.onchange({identifier, new_val}); + tracking.onchange({key, new_val}); } /* * Monitor changes to `store_name` IndexedDB object store. * - * `store_name` should be either "resources" or "mappings". + * `store_name` should be either "resources", "mappings" or "settings". * * `onchange` should be a callback that will be called when an item is added, * modified or removed from the store. The callback will be passed an object * representing the change as its first argument. This object will have the * form: * { - * identifier: "the identifier of modified resource/mapping", + * key: "the identifier of modified resource/mapping or settings key", * new_val: undefined // `undefined` if item removed, item object otherwise * } * @@ -395,7 +419,7 @@ async function track_change(tracking, identifier) * actually modified or that it only gets called once after multiple quick * changes to an item. */ -async function track(store_name, onchange) +async function start_tracking(store_name, onchange) { const tracking = {store_name, onchange}; tracking.listener = @@ -408,12 +432,10 @@ async function track(store_name, onchange) return [tracking, (await wait_request(all_req)).target.result]; } -const track_resources = onchange => track("resources", onchange); -#EXPORT track_resources - -const track_mappings = onchange => track("mappings", onchange); -#EXPORT track_mappings +const track = {}; +for (const store_name of ["resources", "mappings", "settings"]) + track[store_name] = onchange => start_tracking(store_name, onchange); +#EXPORT track const untrack = tracking => broadcast.close(tracking.listener); #EXPORT untrack - diff --git a/common/misc.js b/common/misc.js index dc4a598..82f6cbf 100644 --- a/common/misc.js +++ b/common/misc.js @@ -67,17 +67,6 @@ function gen_nonce(length=16) } #EXPORT gen_nonce -/* CSP rule that blocks scripts according to policy's needs. */ -function make_csp_rule(policy) -{ - let rule = "prefetch-src 'none'; script-src-attr 'none';"; - const script_src = policy.nonce !== undefined ? - `'nonce-${policy.nonce}'` : "'none'"; - rule += ` script-src ${script_src}; script-src-elem ${script_src};`; - return rule; -} -#EXPORT make_csp_rule - /* Check if some HTTP header might define CSP rules. */ const csp_header_regex = /^\s*(content-security-policy|x-webkit-csp|x-content-security-policy)/i; diff --git a/common/patterns_query_tree.js b/common/patterns_query_tree.js index 1bbdb39..f8ec405 100644 --- a/common/patterns_query_tree.js +++ b/common/patterns_query_tree.js @@ -41,6 +41,8 @@ * proprietary program, I am not going to enforce this in court. */ +// TODO! Modify the code to use `Object.create(null)` instead of `{}`. + #FROM common/patterns.js IMPORT deconstruct_url /* "Pattern Tree" is how we refer to the data structure used for querying diff --git a/common/policy.js b/common/policy.js new file mode 100644 index 0000000..ebd663f --- /dev/null +++ b/common/policy.js @@ -0,0 +1,106 @@ +/** + * This file is part of Haketilo. + * + * Function: Determining what to do on a given web page. + * + * Copyright (C) 2021 Wojtek Kosior + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * As additional permission under GNU GPL version 3 section 7, you + * may distribute forms of that code without the copy of the GNU + * GPL normally required by section 4, provided you include this + * license notice and, in case of non-source distribution, a URL + * through which recipients can access the Corresponding Source. + * If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not + * obligated to do so. If you do not wish to do so, delete this + * exception statement from your version. + * + * As a special exception to the GPL, any HTML file which merely + * makes function calls to this code, and for that purpose + * includes it by reference shall be deemed a separate work for + * copyright law purposes. If you modify this code, you may extend + * this exception to your version of the code, but you are not + * obligated to do so. If you do not wish to do so, delete this + * exception statement from your version. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * I, Wojtek Kosior, thereby promise not to sue for violation of this file's + * license. Although I request that you do not make use this code in a + * proprietary program, I am not going to enforce this in court. + */ + +#IMPORT common/patterns_query_tree.js AS pqt + +#FROM common/sha256.js IMPORT sha256 + +/* + * CSP rule that either blocks all scripts or only allows scripts with specified + * nonce attached. + */ +function make_csp(nonce) +{ + const rule = nonce ? `nonce-${nonce}` : "none"; + const csp_dict = {"prefetch-src": "none", "script-src-attr": "none"}; + Object.assign(csp_dict, {"script-src": rule, "script-src-elem": rule}); + return Object.entries(csp_dict).map(([a, b]) => `${a} '${b}';`).join(" "); +} + +function decide_policy(patterns_tree, url, default_allow, secret) +{ + const policy = {allow: default_allow}; + + try { + var payloads = pqt.search(patterns_tree, url).next().value; + } catch (e) { + console.error(e); + policy.allow = false; + policy.error = true; + } + + if (payloads !== undefined) { + policy.mapping = Object.keys(payloads).sort()[0]; + const payload = payloads[policy.mapping]; + if (payload.allow !== undefined) { + policy.allow = payload.allow; + } else /* if (payload.identifier) */ { + policy.allow = false; + policy.payload = payload; + /* + * Hash a secret and other values into a string that's unpredictable + * to someone who does not know these values. What we produce here + * is not a true "nonce" because it might get produced multiple + * times given the same url and mapping choice. Nevertheless, this + * is reasonably good given the limitations WebExtension APIs and + * environments give us. If we were using a true nonce, we'd have no + * reliable way of passing it to our content scripts. + */ + const nonce_source = [ + policy.mapping, + policy.payload.identifier, + url, + secret + ]; + policy.nonce = sha256(nonce_source.join(":")); + } + } + + if (!policy.allow) + policy.csp = make_csp(policy.nonce); + + return policy; +} +#EXPORT decide_policy + +#EXPORT () => ({allow: false, csp: make_csp()}) AS fallback_policy diff --git a/compute_scripts.awk b/compute_scripts.awk index b778934..e17d12c 100755 --- a/compute_scripts.awk +++ b/compute_scripts.awk @@ -28,7 +28,12 @@ BEGIN { path_ext_re = "(\\.[-_.a-zA-Z0-9]*)?" path_re = "^" path_dir_re identifier_re path_ext_re "$" - directive_args_patterns["IF"] = "^(NOT[[:space:]]+)?" identifier_re "$" + if_clause_re = "!?" identifier_re + if_AND_re = "([[:space:]]+&&[[:space:]]+" if_clause_re ")*" + if_OR_re = "([[:space:]]+[|][|][[:space:]]+" if_clause_re ")*" + + directive_args_patterns["IF"] = ("^" if_clause_re \ + "(" if_AND_re "|" if_OR_re ")$") directive_args_patterns["ENDIF"] = "^$" directive_args_patterns["ELSE"] = "^$" directive_args_patterns["ELIF"] = "^(NOT[[:space:]]+)?" identifier_re "$" @@ -215,8 +220,7 @@ function process_file(path, read_path, mode, if (directive == "IF") { if (if_nesting_true == if_nesting) { - if ((last_token(directive_args) in defines) == \ - (directive_args ~ /^[^[:space:]]+$/)) + if (if_condition_true(directive_args)) if_nesting_true++ else if_branch_processed = false @@ -255,8 +259,7 @@ function process_file(path, read_path, mode, } if (if_nesting == if_nesting_true + 1 && !if_branch_processed && - (last_token(directive_args) in defines) == \ - (directive_args ~ /^[^[:space:]]+$/)) { + if_condition_true(directive_args)) { if_nesting_true++ } else if (if_nesting == if_nesting_true) { if_branch_processed = true @@ -323,6 +326,35 @@ function process_file(path, read_path, mode, delete reading[read_path] } +function if_condition_true(directive_args, + result, bool, first_iter, word, negated, alt) { + first_iter = true + + while (directive_args) { + word = first_token(directive_args) + sub(/^[^[:space:]]+[[:space:]]*/, "", directive_args) + alt = alt || directive_args ~ /^[|][|]/ + sub(/^[^[:space:]]+[[:space:]]*/, "", directive_args) + + negated = word ~ /^!/ + sub(/^!/, "", word) + bool = (word in defines) != negated + + if (first_iter) { + result = bool + first_iter = false + continue + } + + if (alt) + result = result || bool + else # if (directive_args ~ /^AND/) + result = result && bool + } + + return result +} + function include_file(root_path, read_path, included_path, line, verbatim, read_line, result) { if (validate_path(read_path, included_path, line)) diff --git a/content/main.js b/content/main.js index 9e98635..d97747f 100644 --- a/content/main.js +++ b/content/main.js @@ -46,9 +46,19 @@ #FROM content/page_actions.js IMPORT handle_page_actions #FROM common/misc.js IMPORT gen_nonce, is_privileged_url, \ - make_csp_rule, csp_header_regex + csp_header_regex #FROM common/browser.js IMPORT browser +/* CSP rule that blocks scripts according to policy's needs. */ +function make_csp_rule(policy) +{ + let rule = "prefetch-src 'none'; script-src-attr 'none';"; + const script_src = policy.nonce !== undefined ? + `'nonce-${policy.nonce}'` : "'none'"; + rule += ` script-src ${script_src}; script-src-elem ${script_src};`; + return rule; +} + document.content_loaded = document.readyState === "complete"; const wait_loaded = e => e.content_loaded ? Promise.resolve() : new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true})); diff --git a/manifest.json b/manifest.json index 7a9edd5..ec94c6e 100644 --- a/manifest.json +++ b/manifest.json @@ -11,11 +11,9 @@ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // CC0 1.0 Universal License for more details. -#IF NOT MOZILLA -#IF NOT CHROMIUM +#IF !MOZILLA && !CHROMIUM #ERROR Target browser not selected! Please define 'MOZILLA' or 'CHROMIUM'. #ENDIF -#ENDIF { #IF MV2 diff --git a/test/extension_crafting.py b/test/extension_crafting.py index 9b985b3..df45d26 100644 --- a/test/extension_crafting.py +++ b/test/extension_crafting.py @@ -58,6 +58,7 @@ def manifest_template(): '', 'unlimitedStorage' ], + 'content_security_policy': "default-src 'self'; script-src 'self' https://serve.scrip.ts;", 'web_accessible_resources': ['testpage.html'], 'background': { 'persistent': True, diff --git a/test/profiles.py b/test/profiles.py index 795a0db..acdecb6 100755 --- a/test/profiles.py +++ b/test/profiles.py @@ -34,22 +34,9 @@ from .misc_constants import * class HaketiloFirefox(webdriver.Firefox): """ - This wrapper class around selenium.webdriver.Firefox adds a `loaded_scripts` - instance property that gets resetted to an empty array every time the - `get()` method is called and also facilitates removing the temporary - profile directory after Firefox quits. + This wrapper class around selenium.webdriver.Firefox facilitates removing + the temporary profile directory after Firefox quits. """ - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.reset_loaded_scripts() - - def reset_loaded_scripts(self): - self.loaded_scripts = [] - - def get(self, *args, **kwargs): - self.reset_loaded_scripts() - super().get(*args, **kwargs) - def quit(self, *args, **kwargs): profile_path = self.firefox_profile.path super().quit(*args, **kwargs) @@ -71,8 +58,13 @@ def set_profile_proxy(profile, proxy_host, proxy_port): profile.set_preference(f'network.proxy.backup.{proto}', '') profile.set_preference(f'network.proxy.backup.{proto}_port', 0) -def set_profile_console_logging(profile): - profile.set_preference('devtools.console.stdout.content', True) +def set_profile_csp_enabled(profile): + """ + By default, Firefox Driver disables CSP. The extension we're testing uses + CSP extensively, so we use this function to prepare a Firefox profile that + has it enabled. + """ + profile.set_preference('security.csp.enable', True) # The function below seems not to work for extensions that are # temporarily-installed in Firefox safe mode. Testing is needed to see if it @@ -97,7 +89,7 @@ def firefox_safe_mode(firefox_binary=default_firefox_binary, """ profile = webdriver.FirefoxProfile() set_profile_proxy(profile, proxy_host, proxy_port) - set_profile_console_logging(profile) + set_profile_csp_enabled(profile) options = Options() options.add_argument('--safe-mode') @@ -117,7 +109,7 @@ def firefox_with_profile(firefox_binary=default_firefox_binary, """ profile = webdriver.FirefoxProfile(profile_dir) set_profile_proxy(profile, proxy_host, proxy_port) - set_profile_console_logging(profile) + set_profile_csp_enabled(profile) set_webextension_uuid(profile, default_haketilo_id) return HaketiloFirefox(firefox_profile=profile, diff --git a/test/script_loader.py b/test/script_loader.py index f66f9ae..53de779 100644 --- a/test/script_loader.py +++ b/test/script_loader.py @@ -65,7 +65,7 @@ def load_script(path, code_to_add=None): awk = subprocess.run(['awk', '-f', str(awk_script), '--', '-D', 'MOZILLA', '-D', 'MV2', '-D', 'TEST', '-D', 'UNIT_TEST', - '--output=amalgamate-js:' + key], + '-D', 'DEBUG', '--output=amalgamate-js:' + key], stdout=subprocess.PIPE, cwd=script_root, check=True) script = awk.stdout.decode() script_cache[key] = script diff --git a/test/unit/conftest.py b/test/unit/conftest.py index f9a17f8..beffaf5 100644 --- a/test/unit/conftest.py +++ b/test/unit/conftest.py @@ -34,6 +34,7 @@ from selenium.webdriver.support import expected_conditions as EC from ..profiles import firefox_safe_mode from ..server import do_an_internet from ..extension_crafting import make_extension +from ..world_wide_library import start_serving_script, dump_scripts @pytest.fixture(scope="package") def proxy(): @@ -77,55 +78,55 @@ def webextension(driver, request): driver.uninstall_addon(addon_id) ext_path.unlink() -script_injecting_script = '''\ +script_injector_script = '''\ /* * Selenium by default executes scripts in some weird one-time context. We want * separately-loaded scripts to be able to access global variables defined * before, including those declared with `const` or `let`. To achieve that, we - * run our scripts by injecting them into the page inside a + +

resources

    mappings

      +

      settings

      +
        ''' @pytest.mark.ext_data({ @@ -328,15 +367,21 @@ def test_haketilodb_track(driver, execute_in_page, wait_elem_text): } for window in reversed(windows): driver.switch_to.window(window) - execute_in_page('initial_data = arguments[0];', initial_data) - - # See if track_*() functions properly return the already-existing items. + try : + driver.execute_script('console.log("uuuuuuu");') + execute_in_page('initial_data = arguments[0];', initial_data) + except: + from time import sleep + sleep(100000) + execute_in_page('returnval(set_setting("option15", "123"));') + + # See if track.*() functions properly return the already-existing items. execute_in_page( ''' function update_item(store_name, change) { console.log('update', ...arguments); - const elem_id = `${store_name}_${change.identifier}`; + const elem_id = `${store_name}_${change.key}`; let elem = document.getElementById(elem_id); elem = elem || document.createElement("li"); elem.id = elem_id; @@ -348,35 +393,32 @@ def test_haketilodb_track(driver, execute_in_page, wait_elem_text): let resource_tracking, resource_items, mapping_tracking, mapping_items; - async function start_tracking() + async function start_reporting() { - const update_resource = change => update_item("resources", change); - const update_mapping = change => update_item("mappings", change); - - [resource_tracking, resource_items] = - await track_resources(update_resource); - [mapping_tracking, mapping_items] = - await track_mappings(update_mapping); - - for (const item of resource_items) - update_resource({identifier: item.identifier, new_val: item}); - for (const item of mapping_items) - update_mapping({identifier: item.identifier, new_val: item}); + for (const store_name of ["resources", "mappings", "settings"]) { + [tracking, items] = + await track[store_name](ch => update_item(store_name, ch)); + const prop = store_name === "settings" ? "name" : "identifier"; + for (const item of items) + update_item(store_name, {key: item[prop], new_val: item}); + } } - returnval(start_tracking()); + returnval(start_reporting()); ''') item_counts = driver.execute_script( ''' const childcount = id => document.getElementById(id).childElementCount; - return ["resources", "mappings"].map(childcount); + return ["resources", "mappings", "settings"].map(childcount); ''') - assert item_counts == [1, 1] + assert item_counts == [1, 1, 1] resource_json = driver.find_element_by_id('resources_helloapple').text mapping_json = driver.find_element_by_id('mappings_helloapple').text + setting_json = driver.find_element_by_id('settings_option15').text assert json.loads(resource_json) == sample_resource assert json.loads(mapping_json) == sample_mapping + assert json.loads(setting_json) == {'name': 'option15', 'value': '123'} # See if item additions get tracked properly. driver.switch_to.window(windows[1]) @@ -398,14 +440,17 @@ def test_haketilodb_track(driver, execute_in_page, wait_elem_text): 'files': sample_files_by_hash } execute_in_page('returnval(save_items(arguments[0]));', sample_data) + execute_in_page('returnval(set_setting("option22", "abc"));') driver.switch_to.window(windows[0]) driver.implicitly_wait(10) resource_json = driver.find_element_by_id('resources_helloapple-copy').text mapping_json = driver.find_element_by_id('mappings_helloapple-copy').text + setting_json = driver.find_element_by_id('settings_option22').text driver.implicitly_wait(0) assert json.loads(resource_json) == sample_resource2 assert json.loads(mapping_json) == sample_mapping2 + assert json.loads(setting_json) == {'name': 'option22', 'value': 'abc'} # See if item deletions get tracked properly. driver.switch_to.window(windows[1]) @@ -417,7 +462,8 @@ def test_haketilodb_track(driver, execute_in_page, wait_elem_text): const ctx = await start_items_transaction(store_names, {}); await remove_resource("helloapple", ctx); await remove_mapping("helloapple-copy", ctx); - await finalize_items_transaction(ctx); + await finalize_transaction(ctx); + await set_setting("option22", null); } returnval(remove_items()); }''') @@ -430,7 +476,8 @@ def test_haketilodb_track(driver, execute_in_page, wait_elem_text): return False except WebDriverException: pass - return True + option_text = driver.find_element_by_id('settings_option22').text + return json.loads(option_text)['value'] == None driver.switch_to.window(windows[0]) WebDriverWait(driver, 10).until(condition_items_absent) diff --git a/test/unit/test_patterns_query_manager.py b/test/unit/test_patterns_query_manager.py index 8ae7c28..ae1f490 100644 --- a/test/unit/test_patterns_query_manager.py +++ b/test/unit/test_patterns_query_manager.py @@ -25,10 +25,9 @@ from selenium.webdriver.support.ui import WebDriverWait from ..script_loader import load_script def simple_sample_mapping(patterns, fruit): - if type(patterns) is list: - payloads = dict([(p, {'identifier': fruit}) for p in patterns]) - else: - payloads = {patterns: {'identifier': fruit}} + if type(patterns) is not list: + patterns = [patterns] + payloads = dict([(p, {'identifier': f'{fruit}-{p}'}) for p in patterns]) return { 'source_copyright': [], 'type': 'mapping', @@ -36,9 +35,13 @@ def simple_sample_mapping(patterns, fruit): 'payloads': payloads } -content_script_re = re.compile(r'this.haketilo_pattern_tree = (.*);') +content_script_tree_re = re.compile(r'this.haketilo_pattern_tree = (.*);') def extract_tree_data(content_script_text): - return json.loads(content_script_re.search(content_script_text)[1]) + return json.loads(content_script_tree_re.search(content_script_text)[1]) + +content_script_mapping_re = re.compile(r'this.haketilo_mappings = (.*);') +def extract_mappings_data(content_script_text): + return json.loads(content_script_mapping_re.search(content_script_text)[1]) # Fields that are not relevant for testing are omitted from these mapping # definitions. @@ -82,7 +85,7 @@ def test_pqm_tree_building(driver, execute_in_page): return [{}, initial_mappings]; } - haketilodb.track_mappings = track_mock; + haketilodb.track.mappings = track_mock; let last_script; let unregister_called = 0; @@ -104,7 +107,10 @@ def test_pqm_tree_building(driver, execute_in_page): tree, last_script, unregister_called]); ''', 'https://gotmyowndoma.in/index.html') - assert found == dict([(m['identifier'], m) for m in sample_mappings[0:2]]) + best_pattern = 'https://gotmyowndoma.in/index.html' + assert found == \ + dict([(f'inject-{fruit}', {'identifier': f'{fruit}-{best_pattern}'}) + for fruit in ('banana', 'orange')]) assert tree == extract_tree_data(content_script) assert deregistrations == 0 @@ -114,12 +120,8 @@ def test_pqm_tree_building(driver, execute_in_page): execute_in_page( ''' - for (const mapping of arguments[0]) { - mappingchange({ - identifier: mapping.identifier, - new_val: mapping - }); - } + for (const mapping of arguments[0]) + mappingchange({key: mapping.identifier, new_val: mapping}); ''', sample_mappings[2:]) WebDriverWait(driver, 10).until(condition_mappings_added) @@ -129,7 +131,8 @@ def test_pqm_tree_building(driver, execute_in_page): def condition_odd_removed(driver): last_script = execute_in_page('returnval(last_script);') - return all([id not in last_script for id in odd]) + return (all([id not in last_script for id in odd]) and + all([id in last_script for id in even])) def condition_all_removed(driver): content_script = execute_in_page('returnval(last_script);') @@ -137,7 +140,7 @@ def test_pqm_tree_building(driver, execute_in_page): execute_in_page( ''' - arguments[0].forEach(identifier => mappingchange({identifier})); + arguments[0].forEach(identifier => mappingchange({key: identifier})); ''', odd) @@ -145,7 +148,7 @@ def test_pqm_tree_building(driver, execute_in_page): execute_in_page( ''' - arguments[0].forEach(identifier => mappingchange({identifier})); + arguments[0].forEach(identifier => mappingchange({key: identifier})); ''', even) @@ -224,7 +227,7 @@ def test_pqm_script_injection(driver, execute_in_page): const ctx = await start_items_transaction(["mappings"], {}); for (const id of identifiers) await remove_mapping(id, ctx); - await finalize_items_transaction(ctx); + await finalize_transaction(ctx); } returnval(remove_items()); }''', diff --git a/test/unit/test_policy_deciding.py b/test/unit/test_policy_deciding.py new file mode 100644 index 0000000..a360537 --- /dev/null +++ b/test/unit/test_policy_deciding.py @@ -0,0 +1,121 @@ +# SPDX-License-Identifier: CC0-1.0 + +""" +Haketilo unit tests - determining what to do on a given web page +""" + +# This file is part of Haketilo +# +# Copyright (C) 2021, Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the CC0 1.0 Universal License as published by +# the Creative Commons Corporation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# CC0 1.0 Universal License for more details. + +import re +from hashlib import sha256 +import pytest + +from ..script_loader import load_script + +csp_re = re.compile(r'^\S+\s+\S+;(?:\s+\S+\s+\S+;)*$') +rule_re = re.compile(r'^\s*(?P\S+)\s+(?P\S+)$') +def parse_csp(csp): + ''' + Parsing of CSP string into a dict. A simplified format of CSP is assumed. + ''' + assert csp_re.match(csp) + + result = {} + + for rule in csp.split(';')[:-1]: + match = rule_re.match(rule) + result[match.group('src_kind')] = match.group('allowed_origins') + + return result + +@pytest.mark.get_page('https://gotmyowndoma.in') +def test_decide_policy(execute_in_page): + """ + policy.js contains code that, using a Pattern Query Tree instance and a URL, + decides what Haketilo should do on a page opened at that URL, i.e. whether + it should block or allow script execution and whether it should inject its + own scripts and which ones. Test that the policy object gets constructed + properly. + """ + execute_in_page(load_script('common/policy.js')) + + policy = execute_in_page( + ''' + returnval(decide_policy(pqt.make(), "http://unkno.wn/", true, "abcd")); + ''') + assert policy['allow'] == True + for prop in ('mapping', 'payload', 'nonce', 'csp'): + assert prop not in policy + + policy = execute_in_page( + '''{ + const tree = pqt.make(); + pqt.register(tree, "http://kno.wn", "allowed", {allow: true}); + returnval(decide_policy(tree, "http://kno.wn/", false, "abcd")); + }''') + assert policy['allow'] == True + assert policy['mapping'] == 'allowed' + for prop in ('payload', 'nonce', 'csp'): + assert prop not in policy + + policy = execute_in_page( + ''' + returnval(decide_policy(pqt.make(), "http://unkno.wn/", false, "abcd")); + ''' + ) + assert policy['allow'] == False + for prop in ('mapping', 'payload', 'nonce'): + assert prop not in policy + assert parse_csp(policy['csp']) == { + 'prefetch-src': "'none'", + 'script-src-attr': "'none'", + 'script-src': "'none'", + 'script-src-elem': "'none'" + } + + policy = execute_in_page( + '''{ + const tree = pqt.make(); + pqt.register(tree, "http://kno.wn", "disallowed", {allow: false}); + returnval(decide_policy(tree, "http://kno.wn/", true, "abcd")); + }''') + assert policy['allow'] == False + assert policy['mapping'] == 'disallowed' + for prop in ('payload', 'nonce'): + assert prop not in policy + assert parse_csp(policy['csp']) == { + 'prefetch-src': "'none'", + 'script-src-attr': "'none'", + 'script-src': "'none'", + 'script-src-elem': "'none'" + } + + policy = execute_in_page( + '''{ + const tree = pqt.make(); + pqt.register(tree, "http://kno.wn", "m1", {identifier: "res1"}); + returnval(decide_policy(tree, "http://kno.wn/", true, "abcd")); + }''') + assert policy['allow'] == False + assert policy['mapping'] == 'm1' + assert policy['payload'] == {'identifier': 'res1'} + + assert policy['nonce'] == \ + sha256('m1:res1:http://kno.wn/:abcd'.encode()).digest().hex() + assert parse_csp(policy['csp']) == { + 'prefetch-src': f"'none'", + 'script-src-attr': f"'none'", + 'script-src': f"'nonce-{policy['nonce']}'", + 'script-src-elem': f"'nonce-{policy['nonce']}'" + } diff --git a/test/unit/test_webrequest.py b/test/unit/test_webrequest.py new file mode 100644 index 0000000..6af2758 --- /dev/null +++ b/test/unit/test_webrequest.py @@ -0,0 +1,77 @@ +# SPDX-License-Identifier: CC0-1.0 + +""" +Haketilo unit tests - modifying requests using webRequest API +""" + +# This file is part of Haketilo +# +# Copyright (C) 2021, Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the CC0 1.0 Universal License as published by +# the Creative Commons Corporation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# CC0 1.0 Universal License for more details. + +import re +from hashlib import sha256 +import pytest + +from ..script_loader import load_script + +def webrequest_js(): + return (load_script('background/webrequest.js', + '#IMPORT common/patterns_query_tree.js AS pqt') + + '''; + // Mock pattern tree. + tree = pqt.make(); + pqt.register(tree, "https://site.with.scripts.block.ed/***", + "disallowed", {allow: false}); + pqt.register(tree, "https://site.with.paylo.ad/***", + "somemapping", {identifier: "someresource"}); + + // Mock IndexedDB. + haketilodb.track.settings = + () => [{}, [{name: "default_allow", value: true}]]; + + // Mock stream_filter. + stream_filter.apply = (details, headers, policy) => headers; + + // Mock secret and start webrequest operations. + start("somesecret"); + ''') + +def are_scripts_allowed(driver, nonce=None): + return driver.execute_script( + ''' + document.scripts_allowed = false; + const script = document.createElement("script"); + script.innerHTML = "document.scripts_allowed = true;"; + if (arguments[0]) + script.setAttribute("nonce", arguments[0]); + document.head.append(script); + return document.scripts_allowed; + ''', + nonce) + +@pytest.mark.ext_data({'background_script': webrequest_js}) +@pytest.mark.usefixtures('webextension') +def test_on_headers_received(driver, execute_in_page): + for attempt in range(10): + driver.get('https://site.with.scripts.block.ed/') + + if not are_scripts_allowed(driver): + break + assert attempt != 9 + + driver.get('https://site.with.scripts.allow.ed/') + assert are_scripts_allowed(driver) + + driver.get('https://site.with.paylo.ad/') + assert not are_scripts_allowed(driver) + source = 'somemapping:someresource:https://site.with.paylo.ad/index.html:somesecret' + assert are_scripts_allowed(driver, sha256(source.encode()).digest().hex()) diff --git a/test/world_wide_library.py b/test/world_wide_library.py index 860c987..43d3512 100644 --- a/test/world_wide_library.py +++ b/test/world_wide_library.py @@ -27,13 +27,99 @@ Our helpful little stand-in for the Internet # file's license. Although I request that you do not make use this code # in a proprietary program, I am not going to enforce this in court. +from hashlib import sha256 +from pathlib import Path +from shutil import rmtree +from threading import Lock + from .misc_constants import here +served_scripts = {} +served_scripts_lock = Lock() + +def start_serving_script(script_text): + """ + Register given script so that it is served at + https://serve.scrip.ts/?sha256= + + Returns the URL at which script will be served. + + This function lacks thread safety. Might moght consider fixing this if it + turns + """ + sha256sum = sha256(script_text.encode()).digest().hex() + served_scripts_lock.acquire() + served_scripts[sha256sum] = script_text + served_scripts_lock.release() + + return f'https://serve.scrip.ts/?sha256={sha256sum}' + +def serve_script(command, get_params, post_params): + """ + info() callback to pass to request-handling code in server.py. Facilitates + serving scripts that have been registered with start_serving_script(). + """ + served_scripts_lock.acquire() + try: + script = served_scripts.get(get_params['sha256'][0]) + finally: + served_scripts_lock.release() + if script is None: + return 404, {}, b'' + + return 200, {'Content-Type': 'application/javascript'}, script + +def dump_scripts(directory='./injected_scripts'): + """ + Write all scripts that have been registered with start_serving_script() + under the provided directory. If the directory already exists, it is wiped + beforehand. If it doesn't exist, it is created. + """ + directory = Path(directory) + rmtree(directory, ignore_errors=True) + directory.mkdir(parents=True) + + served_scripts_lock.acquire() + for sha256, script in served_scripts.items(): + with open(directory / sha256, 'wt') as file: + file.write(script) + served_scripts_lock.release() + catalog = { - 'http://gotmyowndoma.in': (302, {'location': 'http://gotmyowndoma.in/index.html'}, None), - 'http://gotmyowndoma.in/': (302, {'location': 'http://gotmyowndoma.in/index.html'}, None), - 'http://gotmyowndoma.in/index.html': (200, {}, here / 'data' / 'pages' / 'gotmyowndomain.html'), - 'https://gotmyowndoma.in': (302, {'location': 'https://gotmyowndoma.in/index.html'}, None), - 'https://gotmyowndoma.in/': (302, {'location': 'https://gotmyowndoma.in/index.html'}, None), - 'https://gotmyowndoma.in/index.html': (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html') + 'http://gotmyowndoma.in': + (302, {'location': 'http://gotmyowndoma.in/index.html'}, None), + 'http://gotmyowndoma.in/': + (302, {'location': 'http://gotmyowndoma.in/index.html'}, None), + 'http://gotmyowndoma.in/index.html': + (200, {}, here / 'data' / 'pages' / 'gotmyowndomain.html'), + + 'https://gotmyowndoma.in': + (302, {'location': 'https://gotmyowndoma.in/index.html'}, None), + 'https://gotmyowndoma.in/': + (302, {'location': 'https://gotmyowndoma.in/index.html'}, None), + 'https://gotmyowndoma.in/index.html': + (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'), + + 'https://serve.scrip.ts/': serve_script, + + 'https://site.with.scripts.block.ed': + (302, {'location': 'https://site.with.scripts.block.ed/index.html'}, None), + 'https://site.with.scripts.block.ed/': + (302, {'location': 'https://site.with.scripts.block.ed/index.html'}, None), + 'https://site.with.scripts.block.ed/index.html': + (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'), + + 'https://site.with.scripts.allow.ed': + (302, {'location': 'https://site.with.scripts.allow.ed/index.html'}, None), + 'https://site.with.scripts.allow.ed/': + (302, {'location': 'https://site.with.scripts.allow.ed/index.html'}, None), + 'https://site.with.scripts.allow.ed/index.html': + (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'), + + 'https://site.with.paylo.ad': + (302, {'location': 'https://site.with.paylo.ad/index.html'}, None), + 'https://site.with.paylo.ad/': + (302, {'location': 'https://site.with.paylo.ad/index.html'}, None), + 'https://site.with.paylo.ad/index.html': + (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html') } -- cgit v1.2.3