diff options
-rw-r--r-- | Makefile.in | 1 | ||||
-rw-r--r-- | background/patterns_query_manager.js | 27 | ||||
-rw-r--r-- | background/policy_injector.js | 12 | ||||
-rw-r--r-- | background/webrequest.js | 189 | ||||
-rw-r--r-- | common/indexeddb.js | 66 | ||||
-rw-r--r-- | common/misc.js | 11 | ||||
-rw-r--r-- | common/patterns_query_tree.js | 2 | ||||
-rw-r--r-- | common/policy.js | 106 | ||||
-rwxr-xr-x | compute_scripts.awk | 42 | ||||
-rw-r--r-- | content/main.js | 12 | ||||
-rw-r--r-- | manifest.json | 4 | ||||
-rw-r--r-- | test/extension_crafting.py | 1 | ||||
-rwxr-xr-x | test/profiles.py | 30 | ||||
-rw-r--r-- | test/script_loader.py | 2 | ||||
-rw-r--r-- | test/unit/conftest.py | 73 | ||||
-rw-r--r-- | test/unit/test_indexeddb.py | 193 | ||||
-rw-r--r-- | test/unit/test_patterns_query_manager.py | 39 | ||||
-rw-r--r-- | test/unit/test_policy_deciding.py | 121 | ||||
-rw-r--r-- | test/unit/test_webrequest.py | 77 | ||||
-rw-r--r-- | test/world_wide_library.py | 98 |
20 files changed, 903 insertions, 203 deletions
diff --git a/Makefile.in b/Makefile.in index 5291299..bf0fdec 100644 --- a/Makefile.in +++ b/Makefile.in @@ -83,6 +83,7 @@ clean mostlyclean: rm -rf test/certs rm -rf $$(find . -name geckodriver.log) rm -rf $$(find . -type d -name __pycache__) + rm -rf $$(find . -type d -name injected_scripts) distclean: clean rm -f Makefile config.status record.conf diff --git a/background/patterns_query_manager.js b/background/patterns_query_manager.js index cb14cb1..e364668 100644 --- a/background/patterns_query_manager.js +++ b/background/patterns_query_manager.js @@ -45,13 +45,18 @@ #IMPORT common/patterns_query_tree.js AS pqt #IMPORT common/indexeddb.js AS haketilodb +#IF MOZILLA || MV3 #FROM common/browser.js IMPORT browser +#ENDIF + +let secret; const tree = pqt.make(); #EXPORT tree const current_mappings = new Map(); +#IF MOZILLA || MV3 let registered_script = null; let script_update_occuring = false; let script_update_needed; @@ -67,6 +72,7 @@ async function update_content_script() script_update_needed = false; const code = `\ +this.haketilo_secret = ${secret}; this.haketilo_pattern_tree = ${JSON.stringify(tree)}; if (this.haketilo_content_script_main) haketilo_content_script_main();`; @@ -89,36 +95,43 @@ if (this.haketilo_content_script_main) function register_mapping(mapping) { - for (const pattern in mapping.payloads) - pqt.register(tree, pattern, mapping.identifier, mapping); + for (const [pattern, resource] of Object.entries(mapping.payloads)) + pqt.register(tree, pattern, mapping.identifier, resource); current_mappings.set(mapping.identifier, mapping); } +#ENDIF function mapping_changed(change) { console.log('mapping changes!', arguments); - const old_version = current_mappings.get(change.identifier); + const old_version = current_mappings.get(change.key); if (old_version !== undefined) { for (const pattern in old_version.payloads) - pqt.deregister(tree, pattern, change.identifier); + pqt.deregister(tree, pattern, change.key); - current_mappings.delete(change.identifier); + current_mappings.delete(change.key); } if (change.new_val !== undefined) register_mapping(change.new_val); +#IF MOZILLA || MV3 script_update_needed = true; setTimeout(update_content_script, 0); +#ENDIF } -async function start() +async function start(secret_) { + secret = secret_; + const [tracking, initial_mappings] = - await haketilodb.track_mappings(mapping_changed); + await haketilodb.track.mappings(mapping_changed); initial_mappings.forEach(register_mapping); +#IF MOZILLA || MV3 script_update_needed = true; await update_content_script(); +#ENDIF } #EXPORT start diff --git a/background/policy_injector.js b/background/policy_injector.js index 2544e8e..b1fc733 100644 --- a/background/policy_injector.js +++ b/background/policy_injector.js @@ -43,13 +43,23 @@ * proprietary program, I am not going to enforce this in court. */ -#FROM common/misc.js IMPORT make_csp_rule, csp_header_regex +#FROM common/misc.js IMPORT csp_header_regex /* Re-enable the import below once nonce stuff here is ready */ #IF NEVER #FROM common/misc.js IMPORT gen_nonce #ENDIF +/* CSP rule that blocks scripts according to policy's needs. */ +function make_csp_rule(policy) +{ + let rule = "prefetch-src 'none'; script-src-attr 'none';"; + const script_src = policy.nonce !== undefined ? + `'nonce-${policy.nonce}'` : "'none'"; + rule += ` script-src ${script_src}; script-src-elem ${script_src};`; + return rule; +} + function inject_csp_headers(headers, policy) { let csp_headers; diff --git a/background/webrequest.js b/background/webrequest.js new file mode 100644 index 0000000..e32947a --- /dev/null +++ b/background/webrequest.js @@ -0,0 +1,189 @@ +/** + * This file is part of Haketilo. + * + * Function: Modify HTTP traffic usng webRequest API. + * + * Copyright (C) 2021 Wojtek Kosior <koszko@koszko.org> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * As additional permission under GNU GPL version 3 section 7, you + * may distribute forms of that code without the copy of the GNU + * GPL normally required by section 4, provided you include this + * license notice and, in case of non-source distribution, a URL + * through which recipients can access the Corresponding Source. + * If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not + * obligated to do so. If you do not wish to do so, delete this + * exception statement from your version. + * + * As a special exception to the GPL, any HTML file which merely + * makes function calls to this code, and for that purpose + * includes it by reference shall be deemed a separate work for + * copyright law purposes. If you modify this code, you may extend + * this exception to your version of the code, but you are not + * obligated to do so. If you do not wish to do so, delete this + * exception statement from your version. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + * + * I, Wojtek Kosior, thereby promise not to sue for violation of this file's + * license. Although I request that you do not make use this code in a + * proprietary program, I am not going to enforce this in court. + */ + +#IMPORT common/indexeddb.js AS haketilodb +#IF MOZILLA +#IMPORT background/stream_filter.js +#ENDIF + +#FROM common/browser.js IMPORT browser +#FROM common/misc.js IMPORT is_privileged_url, csp_header_regex +#FROM common/policy.js IMPORT decide_policy + +#FROM background/patterns_query_manager.js IMPORT tree + +let secret; + +let default_allow = false; + +async function track_default_allow() +{ + const set_val = ch => default_allow = (ch.new_val || {}).value; + const [tracking, settings] = await haketilodb.track.settings(set_val); + for (const setting of settings) { + if (setting.name === "default_allow") + default_allow = setting.value; + } +} + +function on_headers_received(details) +{ + const url = details.url; + if (is_privileged_url(details.url)) + return; + + let headers = details.responseHeaders; + + const policy = decide_policy(tree, details.url, default_allow, secret); + if (policy.allow) + return; + + if (policy.payload) + headers = headers.filter(h => !csp_header_regex.test(h.name)); + + headers.push({name: "Content-Security-Policy", value: policy.csp}); + +#IF MOZILLA + let skip = false; + for (const header of headers) { + if (header.name.toLowerCase().trim() !== "content-disposition") + continue; + + if (/^\s*attachment\s*(;.*)$/i.test(header.value)) { + skip = true; + } else { + skip = false; + break; + } + } + skip = skip || (details.statusCode >= 300 && details.statusCode < 400); + + if (!skip) + headers = stream_filter.apply(details, headers, policy); +#ENDIF + + return {responseHeaders: headers}; +} + +#IF CHROMIUM && MV2 +const request_url_regex = /^[^?]*\?url=(.*)$/; +const redirect_url_template = browser.runtime.getURL("dummy") + "?settings="; + +function on_before_request(details) +{ + /* + * Content script will make a synchronous XmlHttpRequest to extension's + * `dummy` file to query settings for given URL. We smuggle that + * information in query parameter of the URL we redirect to. + * A risk of fingerprinting arises if a page with script execution allowed + * guesses the dummy file URL and makes an AJAX call to it. It is currently + * a problem in ManifestV2 Chromium-family port of Haketilo because Chromium + * uses predictable URLs for web-accessible resources. We plan to fix it in + * the future ManifestV3 port. + */ + if (details.type !== "xmlhttprequest") + return {cancel: true}; + +#IF DEBUG + console.debug(`Settings queried using XHR for '${details.url}'.`); +#ENDIF + + /* + * request_url should be of the following format: + * <url_for_extension's_dummy_file>?url=<valid_urlencoded_url> + */ + const match = request_url_regex.exec(details.url); + if (match) { + const queried_url = decodeURIComponent(match[1]); + + if (details.initiator && !queried_url.startsWith(details.initiator)) { + console.warn(`Blocked suspicious query of '${url}' by '${details.initiator}'. This might be the result of page fingerprinting the browser.`); + return {cancel: true}; + } + + const policy = decide_policy(tree, details.url, default_allow, secret); + if (!policy.error) { + const encoded_policy = encodeURIComponent(JSON.stringify(policy)); + return {redirectUrl: redirect_url_template + encoded_policy}; + } + } + + console.warn(`Bad request! Expected ${browser.runtime.getURL("dummy")}?url=<valid_urlencoded_url>. Got ${request_url}. This might be the result of page fingerprinting the browser.`); + + return {cancel: true}; +} + +const all_types = [ + "main_frame", "sub_frame", "stylesheet", "script", "image", "font", + "object", "xmlhttprequest", "ping", "csp_report", "media", "websocket", + "other", "main_frame", "sub_frame" +]; +#ENDIF + +async function start(secret_) +{ + secret = secret_; + +#IF CHROMIUM + const extra_opts = ["blocking", "extraHeaders"]; +#ELSE + const extra_opts = ["blocking"]; +#ENDIF + + browser.webRequest.onHeadersReceived.addListener( + on_headers_received, + {urls: ["<all_urls>"], types: ["main_frame", "sub_frame"]}, + extra_opts.concat("responseHeaders") + ); + +#IF CHROMIUM && MV2 + browser.webRequest.onBeforeRequest.addListener( + on_before_request, + {urls: [browser.runtime.getURL("dummy") + "*"], types: all_types}, + extra_opts + ); +#ENDIF + + await track_default_allow(); +} +#EXPORT start diff --git a/common/indexeddb.js b/common/indexeddb.js index 096391a..e54d1ca 100644 --- a/common/indexeddb.js +++ b/common/indexeddb.js @@ -62,7 +62,8 @@ const stores = [ ["files", {keyPath: "hash_key"}], ["file_uses", {keyPath: "hash_key"}], ["resources", {keyPath: "identifier"}], - ["mappings", {keyPath: "identifier"}] + ["mappings", {keyPath: "identifier"}], + ["settings", {keyPath: "name"}] ]; let db = null; @@ -207,7 +208,7 @@ async function incr_file_uses(context, file_ref, by=1) const decr_file_uses = (ctx, file_ref) => incr_file_uses(ctx, file_ref, -1); -async function finalize_items_transaction(context) +async function finalize_transaction(context) { for (const uses of Object.values(context.file_uses)) { if (uses.uses < 0) @@ -248,7 +249,7 @@ async function finalize_items_transaction(context) return context.result; } -#EXPORT finalize_items_transaction +#EXPORT finalize_transaction /* * How a sample data argument to the function below might look like: @@ -304,7 +305,7 @@ async function _save_items(resources, mappings, context) for (const item of resources.concat(mappings)) await save_item(item, context); - await finalize_items_transaction(context); + await finalize_transaction(context); } /* @@ -314,9 +315,9 @@ async function _save_items(resources, mappings, context) * object with keys being of the form `sha256-<file's-sha256-sum>`. * * context should be one returned from start_items_transaction() and should be - * later passed to finalize_items_transaction() so that files depended on are - * added to IndexedDB and files that are no longer depended on after this - * operation are removed from IndexedDB. + * later passed to finalize_transaction() so that files depended on are added to + * IndexedDB and files that are no longer depended on after this operation are + * removed from IndexedDB. */ async function save_item(item, context) { @@ -346,9 +347,9 @@ async function _remove_item(store_name, identifier, context) * Remove definition of a resource/mapping from IndexedDB. * * context should be one returned from start_items_transaction() and should be - * later passed to finalize_items_transaction() so that files depended on are - * added to IndexedDB and files that are no longer depended on after this - * operation are removed from IndexedDB. + * later passed to finalize_transaction() so that files depended on are added to + * IndexedDB and files that are no longer depended on after this operation are + * removed from IndexedDB. */ async function remove_item(store_name, identifier, context) { @@ -363,26 +364,49 @@ const remove_resource = (id, ctx) => remove_item("resources", id, ctx); const remove_mapping = (id, ctx) => remove_item("mappings", id, ctx); #EXPORT remove_mapping +/* A simplified kind of transaction for modifying just the "settings" store. */ +async function start_settings_transaction() +{ + const db = await get_db(); + return make_context(db.transaction("settings", "readwrite"), {}); +} + +async function set_setting(name, value) +{ + const context = await start_settings_transaction(); + broadcast.prepare(context.sender, `idb_changes_settings`, name); + await idb_put(context.transaction, "settings", {name, value}); + return finalize_transaction(context); +} +#EXPORT set_setting + +async function get_setting(name) +{ + const transaction = (await get_db()).transaction("settings"); + return ((await idb_get(transaction, "settings", name)) || {}).value; +} +#EXPORT get_setting + /* Callback used when listening to broadcasts while tracking db changes. */ -async function track_change(tracking, identifier) +async function track_change(tracking, key) { const transaction = (await get_db()).transaction([tracking.store_name]); - const new_val = await idb_get(transaction, tracking.store_name, identifier); + const new_val = await idb_get(transaction, tracking.store_name, key); - tracking.onchange({identifier, new_val}); + tracking.onchange({key, new_val}); } /* * Monitor changes to `store_name` IndexedDB object store. * - * `store_name` should be either "resources" or "mappings". + * `store_name` should be either "resources", "mappings" or "settings". * * `onchange` should be a callback that will be called when an item is added, * modified or removed from the store. The callback will be passed an object * representing the change as its first argument. This object will have the * form: * { - * identifier: "the identifier of modified resource/mapping", + * key: "the identifier of modified resource/mapping or settings key", * new_val: undefined // `undefined` if item removed, item object otherwise * } * @@ -395,7 +419,7 @@ async function track_change(tracking, identifier) * actually modified or that it only gets called once after multiple quick * changes to an item. */ -async function track(store_name, onchange) +async function start_tracking(store_name, onchange) { const tracking = {store_name, onchange}; tracking.listener = @@ -408,12 +432,10 @@ async function track(store_name, onchange) return [tracking, (await wait_request(all_req)).target.result]; } -const track_resources = onchange => track("resources", onchange); -#EXPORT track_resources - -const track_mappings = onchange => track("mappings", onchange); -#EXPORT track_mappings +const track = {}; +for (const store_name of ["resources", "mappings", "settings"]) + track[store_name] = onchange => start_tracking(store_name, onchange); +#EXPORT track const untrack = tracking => broadcast.close(tracking.listener); #EXPORT untrack - diff --git a/common/misc.js b/common/misc.js index dc4a598..82f6cbf 100644 --- a/common/misc.js +++ b/common/misc.js @@ -67,17 +67,6 @@ function gen_nonce(length=16) } #EXPORT gen_nonce -/* CSP rule that blocks scripts according to policy's needs. */ -function make_csp_rule(policy) -{ - let rule = "prefetch-src 'none'; script-src-attr 'none';"; - const script_src = policy.nonce !== undefined ? - `'nonce-${policy.nonce}'` : "'none'"; - rule += ` script-src ${script_src}; script-src-elem ${script_src};`; - return rule; -} -#EXPORT make_csp_rule - /* Check if some HTTP header might define CSP rules. */ const csp_header_regex = /^\s*(content-security-policy|x-webkit-csp|x-content-security-policy)/i; diff --git a/common/patterns_query_tree.js b/common/patterns_query_tree.js index 1bbdb39..f8ec405 100644 --- a/common/patterns_query_tree.js +++ b/common/patterns_query_tree.js @@ -41,6 +41,8 @@ * proprietary program, I am not going to enforce this in court. */ +// TODO! Modify the code to use `Object.create(null)` instead of `{}`. + #FROM common/patterns.js IMPORT deconstruct_url /* "Pattern Tree" is how we refer to the data structure used for querying diff --git a/common/policy.js b/common/policy.js new file mode 100644 index 0000000..ebd663f --- /dev/null +++ b/common/policy.js @@ -0,0 +1,106 @@ +/** + * This file is part of Haketilo. + * + * Function: Determining what to do on a given web page. + * + * Copyright (C) 2021 Wojtek Kosior + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * As additional permission under GNU GPL version 3 section 7, you + * may distribute forms of that code without the copy of the GNU + * GPL normally required by section 4, provided you include this + * license notice and, in case of non-source distribution, a URL + * through which recipients can access the Corresponding Source. + * If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not + * obligated to do so. If you do not wish to do so, delete this + * exception statement from your version. + * + * As a special exception to the GPL, any HTML file which merely + * makes function calls to this code, and for that purpose + * includes it by reference shall be deemed a separate work for + * copyright law purposes. If you modify this code, you may extend + * this exception to your version of the code, but you are not + * obligated to do so. If you do not wish to do so, delete this + * exception statement from your version. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + * + * I, Wojtek Kosior, thereby promise not to sue for violation of this file's + * license. Although I request that you do not make use this code in a + * proprietary program, I am not going to enforce this in court. + */ + +#IMPORT common/patterns_query_tree.js AS pqt + +#FROM common/sha256.js IMPORT sha256 + +/* + * CSP rule that either blocks all scripts or only allows scripts with specified + * nonce attached. + */ +function make_csp(nonce) +{ + const rule = nonce ? `nonce-${nonce}` : "none"; + const csp_dict = {"prefetch-src": "none", "script-src-attr": "none"}; + Object.assign(csp_dict, {"script-src": rule, "script-src-elem": rule}); + return Object.entries(csp_dict).map(([a, b]) => `${a} '${b}';`).join(" "); +} + +function decide_policy(patterns_tree, url, default_allow, secret) +{ + const policy = {allow: default_allow}; + + try { + var payloads = pqt.search(patterns_tree, url).next().value; + } catch (e) { + console.error(e); + policy.allow = false; + policy.error = true; + } + + if (payloads !== undefined) { + policy.mapping = Object.keys(payloads).sort()[0]; + const payload = payloads[policy.mapping]; + if (payload.allow !== undefined) { + policy.allow = payload.allow; + } else /* if (payload.identifier) */ { + policy.allow = false; + policy.payload = payload; + /* + * Hash a secret and other values into a string that's unpredictable + * to someone who does not know these values. What we produce here + * is not a true "nonce" because it might get produced multiple + * times given the same url and mapping choice. Nevertheless, this + * is reasonably good given the limitations WebExtension APIs and + * environments give us. If we were using a true nonce, we'd have no + * reliable way of passing it to our content scripts. + */ + const nonce_source = [ + policy.mapping, + policy.payload.identifier, + url, + secret + ]; + policy.nonce = sha256(nonce_source.join(":")); + } + } + + if (!policy.allow) + policy.csp = make_csp(policy.nonce); + + return policy; +} +#EXPORT decide_policy + +#EXPORT () => ({allow: false, csp: make_csp()}) AS fallback_policy diff --git a/compute_scripts.awk b/compute_scripts.awk index b778934..e17d12c 100755 --- a/compute_scripts.awk +++ b/compute_scripts.awk @@ -28,7 +28,12 @@ BEGIN { path_ext_re = "(\\.[-_.a-zA-Z0-9]*)?" path_re = "^" path_dir_re identifier_re path_ext_re "$" - directive_args_patterns["IF"] = "^(NOT[[:space:]]+)?" identifier_re "$" + if_clause_re = "!?" identifier_re + if_AND_re = "([[:space:]]+&&[[:space:]]+" if_clause_re ")*" + if_OR_re = "([[:space:]]+[|][|][[:space:]]+" if_clause_re ")*" + + directive_args_patterns["IF"] = ("^" if_clause_re \ + "(" if_AND_re "|" if_OR_re ")$") directive_args_patterns["ENDIF"] = "^$" directive_args_patterns["ELSE"] = "^$" directive_args_patterns["ELIF"] = "^(NOT[[:space:]]+)?" identifier_re "$" @@ -215,8 +220,7 @@ function process_file(path, read_path, mode, if (directive == "IF") { if (if_nesting_true == if_nesting) { - if ((last_token(directive_args) in defines) == \ - (directive_args ~ /^[^[:space:]]+$/)) + if (if_condition_true(directive_args)) if_nesting_true++ else if_branch_processed = false @@ -255,8 +259,7 @@ function process_file(path, read_path, mode, } if (if_nesting == if_nesting_true + 1 && !if_branch_processed && - (last_token(directive_args) in defines) == \ - (directive_args ~ /^[^[:space:]]+$/)) { + if_condition_true(directive_args)) { if_nesting_true++ } else if (if_nesting == if_nesting_true) { if_branch_processed = true @@ -323,6 +326,35 @@ function process_file(path, read_path, mode, delete reading[read_path] } +function if_condition_true(directive_args, + result, bool, first_iter, word, negated, alt) { + first_iter = true + + while (directive_args) { + word = first_token(directive_args) + sub(/^[^[:space:]]+[[:space:]]*/, "", directive_args) + alt = alt || directive_args ~ /^[|][|]/ + sub(/^[^[:space:]]+[[:space:]]*/, "", directive_args) + + negated = word ~ /^!/ + sub(/^!/, "", word) + bool = (word in defines) != negated + + if (first_iter) { + result = bool + first_iter = false + continue + } + + if (alt) + result = result || bool + else # if (directive_args ~ /^AND/) + result = result && bool + } + + return result +} + function include_file(root_path, read_path, included_path, line, verbatim, read_line, result) { if (validate_path(read_path, included_path, line)) diff --git a/content/main.js b/content/main.js index 9e98635..d97747f 100644 --- a/content/main.js +++ b/content/main.js @@ -46,9 +46,19 @@ #FROM content/page_actions.js IMPORT handle_page_actions #FROM common/misc.js IMPORT gen_nonce, is_privileged_url, \ - make_csp_rule, csp_header_regex + csp_header_regex #FROM common/browser.js IMPORT browser +/* CSP rule that blocks scripts according to policy's needs. */ +function make_csp_rule(policy) +{ + let rule = "prefetch-src 'none'; script-src-attr 'none';"; + const script_src = policy.nonce !== undefined ? + `'nonce-${policy.nonce}'` : "'none'"; + rule += ` script-src ${script_src}; script-src-elem ${script_src};`; + return rule; +} + document.content_loaded = document.readyState === "complete"; const wait_loaded = e => e.content_loaded ? Promise.resolve() : new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true})); diff --git a/manifest.json b/manifest.json index 7a9edd5..ec94c6e 100644 --- a/manifest.json +++ b/manifest.json @@ -11,11 +11,9 @@ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // CC0 1.0 Universal License for more details. -#IF NOT MOZILLA -#IF NOT CHROMIUM +#IF !MOZILLA && !CHROMIUM #ERROR Target browser not selected! Please define 'MOZILLA' or 'CHROMIUM'. #ENDIF -#ENDIF { #IF MV2 diff --git a/test/extension_crafting.py b/test/extension_crafting.py index 9b985b3..df45d26 100644 --- a/test/extension_crafting.py +++ b/test/extension_crafting.py @@ -58,6 +58,7 @@ def manifest_template(): '<all_urls>', 'unlimitedStorage' ], + 'content_security_policy': "default-src 'self'; script-src 'self' https://serve.scrip.ts;", 'web_accessible_resources': ['testpage.html'], 'background': { 'persistent': True, diff --git a/test/profiles.py b/test/profiles.py index 795a0db..acdecb6 100755 --- a/test/profiles.py +++ b/test/profiles.py @@ -34,22 +34,9 @@ from .misc_constants import * class HaketiloFirefox(webdriver.Firefox): """ - This wrapper class around selenium.webdriver.Firefox adds a `loaded_scripts` - instance property that gets resetted to an empty array every time the - `get()` method is called and also facilitates removing the temporary - profile directory after Firefox quits. + This wrapper class around selenium.webdriver.Firefox facilitates removing + the temporary profile directory after Firefox quits. """ - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.reset_loaded_scripts() - - def reset_loaded_scripts(self): - self.loaded_scripts = [] - - def get(self, *args, **kwargs): - self.reset_loaded_scripts() - super().get(*args, **kwargs) - def quit(self, *args, **kwargs): profile_path = self.firefox_profile.path super().quit(*args, **kwargs) @@ -71,8 +58,13 @@ def set_profile_proxy(profile, proxy_host, proxy_port): profile.set_preference(f'network.proxy.backup.{proto}', '') profile.set_preference(f'network.proxy.backup.{proto}_port', 0) -def set_profile_console_logging(profile): - profile.set_preference('devtools.console.stdout.content', True) +def set_profile_csp_enabled(profile): + """ + By default, Firefox Driver disables CSP. The extension we're testing uses + CSP extensively, so we use this function to prepare a Firefox profile that + has it enabled. + """ + profile.set_preference('security.csp.enable', True) # The function below seems not to work for extensions that are # temporarily-installed in Firefox safe mode. Testing is needed to see if it @@ -97,7 +89,7 @@ def firefox_safe_mode(firefox_binary=default_firefox_binary, """ profile = webdriver.FirefoxProfile() set_profile_proxy(profile, proxy_host, proxy_port) - set_profile_console_logging(profile) + set_profile_csp_enabled(profile) options = Options() options.add_argument('--safe-mode') @@ -117,7 +109,7 @@ def firefox_with_profile(firefox_binary=default_firefox_binary, """ profile = webdriver.FirefoxProfile(profile_dir) set_profile_proxy(profile, proxy_host, proxy_port) - set_profile_console_logging(profile) + set_profile_csp_enabled(profile) set_webextension_uuid(profile, default_haketilo_id) return HaketiloFirefox(firefox_profile=profile, diff --git a/test/script_loader.py b/test/script_loader.py index f66f9ae..53de779 100644 --- a/test/script_loader.py +++ b/test/script_loader.py @@ -65,7 +65,7 @@ def load_script(path, code_to_add=None): awk = subprocess.run(['awk', '-f', str(awk_script), '--', '-D', 'MOZILLA', '-D', 'MV2', '-D', 'TEST', '-D', 'UNIT_TEST', - '--output=amalgamate-js:' + key], + '-D', 'DEBUG', '--output=amalgamate-js:' + key], stdout=subprocess.PIPE, cwd=script_root, check=True) script = awk.stdout.decode() script_cache[key] = script diff --git a/test/unit/conftest.py b/test/unit/conftest.py index f9a17f8..beffaf5 100644 --- a/test/unit/conftest.py +++ b/test/unit/conftest.py @@ -34,6 +34,7 @@ from selenium.webdriver.support import expected_conditions as EC from ..profiles import firefox_safe_mode from ..server import do_an_internet from ..extension_crafting import make_extension +from ..world_wide_library import start_serving_script, dump_scripts @pytest.fixture(scope="package") def proxy(): @@ -77,55 +78,55 @@ def webextension(driver, request): driver.uninstall_addon(addon_id) ext_path.unlink() -script_injecting_script = '''\ +script_injector_script = '''\ /* * Selenium by default executes scripts in some weird one-time context. We want * separately-loaded scripts to be able to access global variables defined * before, including those declared with `const` or `let`. To achieve that, we - * run our scripts by injecting them into the page inside a <script> tag. We use - * custom properties of the `window` object to communicate with injected code. + * run our scripts by injecting them into the page with a <script> tag that runs + * javascript served by our proxy. We use custom properties of the `window` + * object to communicate with injected code. */ - -const script_elem = document.createElement('script'); -script_elem.textContent = arguments[0]; - -delete window.haketilo_selenium_return_value; -delete window.haketilo_selenium_exception; -window.returnval = (val => window.haketilo_selenium_return_value = val); -window.arguments = arguments[1]; - -document.body.append(script_elem); - -/* - * To ease debugging, we want this script to signal all exceptions from the - * injectee. - */ -try { +const inject = async () => { + delete window.haketilo_selenium_return_value; + delete window.haketilo_selenium_exception; + window.returnval = val => window.haketilo_selenium_return_value = val; + + const injectee = document.createElement('script'); + injectee.src = arguments[0]; + injectee.type = "application/javascript"; + injectee.async = true; + const prom = new Promise(cb => injectee.onload = cb); + + window.arguments = arguments[1]; + document.body.append(injectee); + + await prom; + + /* + * To ease debugging, we want this script to signal all exceptions from the + * injectee. + */ if (window.haketilo_selenium_exception !== false) - throw 'Error in injected script! Check your geckodriver.log!'; -} finally { - script_elem.remove(); -} + throw ['haketilo_selenium_error', + 'Error in injected script! Check your geckodriver.log and ./injected_scripts/!']; -return window.haketilo_selenium_return_value; + return window.haketilo_selenium_return_value; +} +return inject(); ''' def _execute_in_page_context(driver, script, args): script = script + '\n;\nwindow.haketilo_selenium_exception = false;' - driver.loaded_scripts.append(script) + script_url = start_serving_script(script) + try: - return driver.execute_script(script_injecting_script, script, args) + result = driver.execute_script(script_injector_script, script_url, args) + if type(result) == list and result[0] == 'haketilo_selenium_error': + raise Exception(result[1]) + return result except Exception as e: - import sys - - print("Scripts loaded since driver's last get() method call:", - file=sys.stderr) - - for script in driver.loaded_scripts: - lines = enumerate(script.split('\n'), 1) - for err_info in [('===',), *lines]: - print(*err_info, file=sys.stderr) - + dump_scripts() raise e from None # Some fixtures here just define functions that operate on driver. We should diff --git a/test/unit/test_indexeddb.py b/test/unit/test_indexeddb.py index 476690c..df3df81 100644 --- a/test/unit/test_indexeddb.py +++ b/test/unit/test_indexeddb.py @@ -75,26 +75,9 @@ def make_sample_mapping(): def file_ref(file_name): return {'file': file_name, 'hash_key': sample_files[file_name]['hash_key']} -@pytest.mark.get_page('https://gotmyowndoma.in') -def test_haketilodb_save_remove(execute_in_page): - """ - indexeddb.js facilitates operating on Haketilo's internal database. - Verify database operations work properly. - """ - execute_in_page(indexeddb_js()) - # Mock some unwanted imports. +def clear_indexeddb(execute_in_page): execute_in_page( '''{ - const broadcast_mock = {}; - const nop = () => {}; - for (const key in broadcast) - broadcast_mock[key] = nop; - broadcast = broadcast_mock; - }''') - - # Start with no database. - execute_in_page( - ''' async function delete_db() { if (db) { db.close(); @@ -108,12 +91,13 @@ def test_haketilodb_save_remove(execute_in_page): } returnval(delete_db()); - ''' + }''' ) +def get_db_contents(execute_in_page): # Facilitate retrieving all IndexedDB contents. - execute_in_page( - ''' + return execute_in_page( + '''{ async function get_database_contents() { const db = await get_db(); @@ -130,20 +114,45 @@ def test_haketilodb_save_remove(execute_in_page): store_names_reqs.forEach(([sn, req]) => result[sn] = req.result); return result; } - ''') + returnval(get_database_contents()); + }''') + +def mock_broadcast(execute_in_page): + execute_in_page( + '''{ + const broadcast_mock = {}; + const nop = () => {}; + for (const key in broadcast) + broadcast_mock[key] = nop; + broadcast = broadcast_mock; + }''') + +@pytest.mark.get_page('https://gotmyowndoma.in') +def test_haketilodb_item_modifications(driver, execute_in_page): + """ + indexeddb.js facilitates operating on Haketilo's internal database. + Verify database operations on mappings/resources work properly. + """ + execute_in_page(indexeddb_js()) + mock_broadcast(execute_in_page) + + # Start with no database. + clear_indexeddb(execute_in_page) sample_item = make_sample_resource() sample_item['source_copyright'][0]['extra_prop'] = True - database_contents = execute_in_page( + execute_in_page( '''{ const promise = start_items_transaction(["resources"], arguments[1]) .then(ctx => save_item(arguments[0], ctx).then(() => ctx)) - .then(finalize_items_transaction) - .then(get_database_contents); + .then(finalize_transaction); returnval(promise); }''', sample_item, sample_files_by_hash) + + database_contents = get_db_contents(execute_in_page) + assert len(database_contents['files']) == 4 assert all([sample_files_by_hash[file['hash_key']] == file['contents'] for file in database_contents['files']]) @@ -162,31 +171,33 @@ def test_haketilodb_save_remove(execute_in_page): sample_item['scripts'].append(file_ref('combined.js')) incomplete_files = {**sample_files_by_hash} incomplete_files.pop(sample_files['combined.js']['hash_key']) - result = execute_in_page( + exception = execute_in_page( '''{ - const promise = (async () => { + const args = arguments; + async function try_add_item() + { const context = - await start_items_transaction(["resources"], arguments[1]); + await start_items_transaction(["resources"], args[1]); try { - await save_item(arguments[0], context); - await finalize_items_transaction(context); - return {}; + await save_item(args[0], context); + await finalize_transaction(context); + return; } catch(e) { - var exception = e; + return e; } - - return {exception, db_contents: await get_database_contents()}; - })(); - returnval(promise); + } + returnval(try_add_item()); }''', sample_item, incomplete_files) - assert result - assert 'file not present' in result['exception'] + previous_database_contents = database_contents + database_contents = get_db_contents(execute_in_page) + + assert 'file not present' in exception for key, val in database_contents.items(): keyfun = lambda item: item.get('hash_key') or item['identifier'] - assert sorted(result['db_contents'][key], key=keyfun) \ - == sorted(val, key=keyfun) + assert sorted(previous_database_contents[key], key=keyfun) \ + == sorted(val, key=keyfun) # See if adding another item that partially uses first's files works OK. sample_item = make_sample_mapping() @@ -194,12 +205,13 @@ def test_haketilodb_save_remove(execute_in_page): '''{ const promise = start_items_transaction(["mappings"], arguments[1]) .then(ctx => save_item(arguments[0], ctx).then(() => ctx)) - .then(finalize_items_transaction) - .then(get_database_contents); + .then(finalize_transaction); returnval(promise); }''', sample_item, sample_files_by_hash) + database_contents = get_db_contents(execute_in_page) + names = ['README.md', 'report.spdx', 'LICENSES/somelicense.txt', 'hello.js', 'bye.js'] sample_files_list = [sample_files[name] for name in names] @@ -222,17 +234,18 @@ def test_haketilodb_save_remove(execute_in_page): # Try removing the items to get an empty database again. results = [None, None] for i, item_type in enumerate(['resource', 'mapping']): - results[i] = execute_in_page( + execute_in_page( f'''{{ const remover = remove_{item_type}; const promise = start_items_transaction(["{item_type}s"], {{}}) .then(ctx => remover('helloapple', ctx).then(() => ctx)) - .then(finalize_items_transaction) - .then(get_database_contents); + .then(finalize_transaction); returnval(promise); }}''') + results[i] = get_db_contents(execute_in_page) + names = ['README.md', 'report.spdx'] sample_files_list = [sample_files[name] for name in names] uses_list = [1, 1] @@ -271,22 +284,48 @@ def test_haketilodb_save_remove(execute_in_page): }, 'files': sample_files_by_hash } - database_contents = execute_in_page( - ''' - initial_data = arguments[0]; - returnval(delete_db().then(() => get_database_contents())); - ''', - initial_data) + + clear_indexeddb(execute_in_page) + execute_in_page('initial_data = arguments[0];', initial_data) + database_contents = get_db_contents(execute_in_page) + assert database_contents['resources'] == [sample_resource] assert database_contents['mappings'] == [sample_mapping] +@pytest.mark.get_page('https://gotmyowndoma.in') +def test_haketilodb_settings(driver, execute_in_page): + """ + indexeddb.js facilitates operating on Haketilo's internal database. + Verify database assigning/retrieving values of simple "settings" works + properly. + """ + execute_in_page(indexeddb_js()) + mock_broadcast(execute_in_page) + + # Start with no database. + clear_indexeddb(execute_in_page) + + assert get_db_contents(execute_in_page)['settings'] == [] + + assert execute_in_page('returnval(get_setting("option15"));') == None + + execute_in_page('returnval(set_setting("option15", "disable"));') + assert execute_in_page('returnval(get_setting("option15"));') == 'disable' + + execute_in_page('returnval(set_setting("option15", "enable"));') + assert execute_in_page('returnval(get_setting("option15"));') == 'enable' + test_page_html = ''' <!DOCTYPE html> <script src="/testpage.js"></script> +<script>console.log("inline!")</script> +<script nonce="123456789">console.log("inline nonce!")</script> <h2>resources</h2> <ul id="resources"></ul> <h2>mappings</h2> <ul id="mappings"></ul> +<h2>settings</h2> +<ul id="settings"></ul> ''' @pytest.mark.ext_data({ @@ -328,15 +367,21 @@ def test_haketilodb_track(driver, execute_in_page, wait_elem_text): } for window in reversed(windows): driver.switch_to.window(window) - execute_in_page('initial_data = arguments[0];', initial_data) - - # See if track_*() functions properly return the already-existing items. + try : + driver.execute_script('console.log("uuuuuuu");') + execute_in_page('initial_data = arguments[0];', initial_data) + except: + from time import sleep + sleep(100000) + execute_in_page('returnval(set_setting("option15", "123"));') + + # See if track.*() functions properly return the already-existing items. execute_in_page( ''' function update_item(store_name, change) { console.log('update', ...arguments); - const elem_id = `${store_name}_${change.identifier}`; + const elem_id = `${store_name}_${change.key}`; let elem = document.getElementById(elem_id); elem = elem || document.createElement("li"); elem.id = elem_id; @@ -348,35 +393,32 @@ def test_haketilodb_track(driver, execute_in_page, wait_elem_text): let resource_tracking, resource_items, mapping_tracking, mapping_items; - async function start_tracking() + async function start_reporting() { - const update_resource = change => update_item("resources", change); - const update_mapping = change => update_item("mappings", change); - - [resource_tracking, resource_items] = - await track_resources(update_resource); - [mapping_tracking, mapping_items] = - await track_mappings(update_mapping); - - for (const item of resource_items) - update_resource({identifier: item.identifier, new_val: item}); - for (const item of mapping_items) - update_mapping({identifier: item.identifier, new_val: item}); + for (const store_name of ["resources", "mappings", "settings"]) { + [tracking, items] = + await track[store_name](ch => update_item(store_name, ch)); + const prop = store_name === "settings" ? "name" : "identifier"; + for (const item of items) + update_item(store_name, {key: item[prop], new_val: item}); + } } - returnval(start_tracking()); + returnval(start_reporting()); ''') item_counts = driver.execute_script( ''' const childcount = id => document.getElementById(id).childElementCount; - return ["resources", "mappings"].map(childcount); + return ["resources", "mappings", "settings"].map(childcount); ''') - assert item_counts == [1, 1] + assert item_counts == [1, 1, 1] resource_json = driver.find_element_by_id('resources_helloapple').text mapping_json = driver.find_element_by_id('mappings_helloapple').text + setting_json = driver.find_element_by_id('settings_option15').text assert json.loads(resource_json) == sample_resource assert json.loads(mapping_json) == sample_mapping + assert json.loads(setting_json) == {'name': 'option15', 'value': '123'} # See if item additions get tracked properly. driver.switch_to.window(windows[1]) @@ -398,14 +440,17 @@ def test_haketilodb_track(driver, execute_in_page, wait_elem_text): 'files': sample_files_by_hash } execute_in_page('returnval(save_items(arguments[0]));', sample_data) + execute_in_page('returnval(set_setting("option22", "abc"));') driver.switch_to.window(windows[0]) driver.implicitly_wait(10) resource_json = driver.find_element_by_id('resources_helloapple-copy').text mapping_json = driver.find_element_by_id('mappings_helloapple-copy').text + setting_json = driver.find_element_by_id('settings_option22').text driver.implicitly_wait(0) assert json.loads(resource_json) == sample_resource2 assert json.loads(mapping_json) == sample_mapping2 + assert json.loads(setting_json) == {'name': 'option22', 'value': 'abc'} # See if item deletions get tracked properly. driver.switch_to.window(windows[1]) @@ -417,7 +462,8 @@ def test_haketilodb_track(driver, execute_in_page, wait_elem_text): const ctx = await start_items_transaction(store_names, {}); await remove_resource("helloapple", ctx); await remove_mapping("helloapple-copy", ctx); - await finalize_items_transaction(ctx); + await finalize_transaction(ctx); + await set_setting("option22", null); } returnval(remove_items()); }''') @@ -430,7 +476,8 @@ def test_haketilodb_track(driver, execute_in_page, wait_elem_text): return False except WebDriverException: pass - return True + option_text = driver.find_element_by_id('settings_option22').text + return json.loads(option_text)['value'] == None driver.switch_to.window(windows[0]) WebDriverWait(driver, 10).until(condition_items_absent) diff --git a/test/unit/test_patterns_query_manager.py b/test/unit/test_patterns_query_manager.py index 8ae7c28..ae1f490 100644 --- a/test/unit/test_patterns_query_manager.py +++ b/test/unit/test_patterns_query_manager.py @@ -25,10 +25,9 @@ from selenium.webdriver.support.ui import WebDriverWait from ..script_loader import load_script def simple_sample_mapping(patterns, fruit): - if type(patterns) is list: - payloads = dict([(p, {'identifier': fruit}) for p in patterns]) - else: - payloads = {patterns: {'identifier': fruit}} + if type(patterns) is not list: + patterns = [patterns] + payloads = dict([(p, {'identifier': f'{fruit}-{p}'}) for p in patterns]) return { 'source_copyright': [], 'type': 'mapping', @@ -36,9 +35,13 @@ def simple_sample_mapping(patterns, fruit): 'payloads': payloads } -content_script_re = re.compile(r'this.haketilo_pattern_tree = (.*);') +content_script_tree_re = re.compile(r'this.haketilo_pattern_tree = (.*);') def extract_tree_data(content_script_text): - return json.loads(content_script_re.search(content_script_text)[1]) + return json.loads(content_script_tree_re.search(content_script_text)[1]) + +content_script_mapping_re = re.compile(r'this.haketilo_mappings = (.*);') +def extract_mappings_data(content_script_text): + return json.loads(content_script_mapping_re.search(content_script_text)[1]) # Fields that are not relevant for testing are omitted from these mapping # definitions. @@ -82,7 +85,7 @@ def test_pqm_tree_building(driver, execute_in_page): return [{}, initial_mappings]; } - haketilodb.track_mappings = track_mock; + haketilodb.track.mappings = track_mock; let last_script; let unregister_called = 0; @@ -104,7 +107,10 @@ def test_pqm_tree_building(driver, execute_in_page): tree, last_script, unregister_called]); ''', 'https://gotmyowndoma.in/index.html') - assert found == dict([(m['identifier'], m) for m in sample_mappings[0:2]]) + best_pattern = 'https://gotmyowndoma.in/index.html' + assert found == \ + dict([(f'inject-{fruit}', {'identifier': f'{fruit}-{best_pattern}'}) + for fruit in ('banana', 'orange')]) assert tree == extract_tree_data(content_script) assert deregistrations == 0 @@ -114,12 +120,8 @@ def test_pqm_tree_building(driver, execute_in_page): execute_in_page( ''' - for (const mapping of arguments[0]) { - mappingchange({ - identifier: mapping.identifier, - new_val: mapping - }); - } + for (const mapping of arguments[0]) + mappingchange({key: mapping.identifier, new_val: mapping}); ''', sample_mappings[2:]) WebDriverWait(driver, 10).until(condition_mappings_added) @@ -129,7 +131,8 @@ def test_pqm_tree_building(driver, execute_in_page): def condition_odd_removed(driver): last_script = execute_in_page('returnval(last_script);') - return all([id not in last_script for id in odd]) + return (all([id not in last_script for id in odd]) and + all([id in last_script for id in even])) def condition_all_removed(driver): content_script = execute_in_page('returnval(last_script);') @@ -137,7 +140,7 @@ def test_pqm_tree_building(driver, execute_in_page): execute_in_page( ''' - arguments[0].forEach(identifier => mappingchange({identifier})); + arguments[0].forEach(identifier => mappingchange({key: identifier})); ''', odd) @@ -145,7 +148,7 @@ def test_pqm_tree_building(driver, execute_in_page): execute_in_page( ''' - arguments[0].forEach(identifier => mappingchange({identifier})); + arguments[0].forEach(identifier => mappingchange({key: identifier})); ''', even) @@ -224,7 +227,7 @@ def test_pqm_script_injection(driver, execute_in_page): const ctx = await start_items_transaction(["mappings"], {}); for (const id of identifiers) await remove_mapping(id, ctx); - await finalize_items_transaction(ctx); + await finalize_transaction(ctx); } returnval(remove_items()); }''', diff --git a/test/unit/test_policy_deciding.py b/test/unit/test_policy_deciding.py new file mode 100644 index 0000000..a360537 --- /dev/null +++ b/test/unit/test_policy_deciding.py @@ -0,0 +1,121 @@ +# SPDX-License-Identifier: CC0-1.0 + +""" +Haketilo unit tests - determining what to do on a given web page +""" + +# This file is part of Haketilo +# +# Copyright (C) 2021, Wojtek Kosior <koszko@koszko.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the CC0 1.0 Universal License as published by +# the Creative Commons Corporation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# CC0 1.0 Universal License for more details. + +import re +from hashlib import sha256 +import pytest + +from ..script_loader import load_script + +csp_re = re.compile(r'^\S+\s+\S+;(?:\s+\S+\s+\S+;)*$') +rule_re = re.compile(r'^\s*(?P<src_kind>\S+)\s+(?P<allowed_origins>\S+)$') +def parse_csp(csp): + ''' + Parsing of CSP string into a dict. A simplified format of CSP is assumed. + ''' + assert csp_re.match(csp) + + result = {} + + for rule in csp.split(';')[:-1]: + match = rule_re.match(rule) + result[match.group('src_kind')] = match.group('allowed_origins') + + return result + +@pytest.mark.get_page('https://gotmyowndoma.in') +def test_decide_policy(execute_in_page): + """ + policy.js contains code that, using a Pattern Query Tree instance and a URL, + decides what Haketilo should do on a page opened at that URL, i.e. whether + it should block or allow script execution and whether it should inject its + own scripts and which ones. Test that the policy object gets constructed + properly. + """ + execute_in_page(load_script('common/policy.js')) + + policy = execute_in_page( + ''' + returnval(decide_policy(pqt.make(), "http://unkno.wn/", true, "abcd")); + ''') + assert policy['allow'] == True + for prop in ('mapping', 'payload', 'nonce', 'csp'): + assert prop not in policy + + policy = execute_in_page( + '''{ + const tree = pqt.make(); + pqt.register(tree, "http://kno.wn", "allowed", {allow: true}); + returnval(decide_policy(tree, "http://kno.wn/", false, "abcd")); + }''') + assert policy['allow'] == True + assert policy['mapping'] == 'allowed' + for prop in ('payload', 'nonce', 'csp'): + assert prop not in policy + + policy = execute_in_page( + ''' + returnval(decide_policy(pqt.make(), "http://unkno.wn/", false, "abcd")); + ''' + ) + assert policy['allow'] == False + for prop in ('mapping', 'payload', 'nonce'): + assert prop not in policy + assert parse_csp(policy['csp']) == { + 'prefetch-src': "'none'", + 'script-src-attr': "'none'", + 'script-src': "'none'", + 'script-src-elem': "'none'" + } + + policy = execute_in_page( + '''{ + const tree = pqt.make(); + pqt.register(tree, "http://kno.wn", "disallowed", {allow: false}); + returnval(decide_policy(tree, "http://kno.wn/", true, "abcd")); + }''') + assert policy['allow'] == False + assert policy['mapping'] == 'disallowed' + for prop in ('payload', 'nonce'): + assert prop not in policy + assert parse_csp(policy['csp']) == { + 'prefetch-src': "'none'", + 'script-src-attr': "'none'", + 'script-src': "'none'", + 'script-src-elem': "'none'" + } + + policy = execute_in_page( + '''{ + const tree = pqt.make(); + pqt.register(tree, "http://kno.wn", "m1", {identifier: "res1"}); + returnval(decide_policy(tree, "http://kno.wn/", true, "abcd")); + }''') + assert policy['allow'] == False + assert policy['mapping'] == 'm1' + assert policy['payload'] == {'identifier': 'res1'} + + assert policy['nonce'] == \ + sha256('m1:res1:http://kno.wn/:abcd'.encode()).digest().hex() + assert parse_csp(policy['csp']) == { + 'prefetch-src': f"'none'", + 'script-src-attr': f"'none'", + 'script-src': f"'nonce-{policy['nonce']}'", + 'script-src-elem': f"'nonce-{policy['nonce']}'" + } diff --git a/test/unit/test_webrequest.py b/test/unit/test_webrequest.py new file mode 100644 index 0000000..6af2758 --- /dev/null +++ b/test/unit/test_webrequest.py @@ -0,0 +1,77 @@ +# SPDX-License-Identifier: CC0-1.0 + +""" +Haketilo unit tests - modifying requests using webRequest API +""" + +# This file is part of Haketilo +# +# Copyright (C) 2021, Wojtek Kosior <koszko@koszko.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the CC0 1.0 Universal License as published by +# the Creative Commons Corporation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# CC0 1.0 Universal License for more details. + +import re +from hashlib import sha256 +import pytest + +from ..script_loader import load_script + +def webrequest_js(): + return (load_script('background/webrequest.js', + '#IMPORT common/patterns_query_tree.js AS pqt') + + '''; + // Mock pattern tree. + tree = pqt.make(); + pqt.register(tree, "https://site.with.scripts.block.ed/***", + "disallowed", {allow: false}); + pqt.register(tree, "https://site.with.paylo.ad/***", + "somemapping", {identifier: "someresource"}); + + // Mock IndexedDB. + haketilodb.track.settings = + () => [{}, [{name: "default_allow", value: true}]]; + + // Mock stream_filter. + stream_filter.apply = (details, headers, policy) => headers; + + // Mock secret and start webrequest operations. + start("somesecret"); + ''') + +def are_scripts_allowed(driver, nonce=None): + return driver.execute_script( + ''' + document.scripts_allowed = false; + const script = document.createElement("script"); + script.innerHTML = "document.scripts_allowed = true;"; + if (arguments[0]) + script.setAttribute("nonce", arguments[0]); + document.head.append(script); + return document.scripts_allowed; + ''', + nonce) + +@pytest.mark.ext_data({'background_script': webrequest_js}) +@pytest.mark.usefixtures('webextension') +def test_on_headers_received(driver, execute_in_page): + for attempt in range(10): + driver.get('https://site.with.scripts.block.ed/') + + if not are_scripts_allowed(driver): + break + assert attempt != 9 + + driver.get('https://site.with.scripts.allow.ed/') + assert are_scripts_allowed(driver) + + driver.get('https://site.with.paylo.ad/') + assert not are_scripts_allowed(driver) + source = 'somemapping:someresource:https://site.with.paylo.ad/index.html:somesecret' + assert are_scripts_allowed(driver, sha256(source.encode()).digest().hex()) diff --git a/test/world_wide_library.py b/test/world_wide_library.py index 860c987..43d3512 100644 --- a/test/world_wide_library.py +++ b/test/world_wide_library.py @@ -27,13 +27,99 @@ Our helpful little stand-in for the Internet # file's license. Although I request that you do not make use this code # in a proprietary program, I am not going to enforce this in court. +from hashlib import sha256 +from pathlib import Path +from shutil import rmtree +from threading import Lock + from .misc_constants import here +served_scripts = {} +served_scripts_lock = Lock() + +def start_serving_script(script_text): + """ + Register given script so that it is served at + https://serve.scrip.ts/?sha256=<script's_sha256_sum> + + Returns the URL at which script will be served. + + This function lacks thread safety. Might moght consider fixing this if it + turns + """ + sha256sum = sha256(script_text.encode()).digest().hex() + served_scripts_lock.acquire() + served_scripts[sha256sum] = script_text + served_scripts_lock.release() + + return f'https://serve.scrip.ts/?sha256={sha256sum}' + +def serve_script(command, get_params, post_params): + """ + info() callback to pass to request-handling code in server.py. Facilitates + serving scripts that have been registered with start_serving_script(). + """ + served_scripts_lock.acquire() + try: + script = served_scripts.get(get_params['sha256'][0]) + finally: + served_scripts_lock.release() + if script is None: + return 404, {}, b'' + + return 200, {'Content-Type': 'application/javascript'}, script + +def dump_scripts(directory='./injected_scripts'): + """ + Write all scripts that have been registered with start_serving_script() + under the provided directory. If the directory already exists, it is wiped + beforehand. If it doesn't exist, it is created. + """ + directory = Path(directory) + rmtree(directory, ignore_errors=True) + directory.mkdir(parents=True) + + served_scripts_lock.acquire() + for sha256, script in served_scripts.items(): + with open(directory / sha256, 'wt') as file: + file.write(script) + served_scripts_lock.release() + catalog = { - 'http://gotmyowndoma.in': (302, {'location': 'http://gotmyowndoma.in/index.html'}, None), - 'http://gotmyowndoma.in/': (302, {'location': 'http://gotmyowndoma.in/index.html'}, None), - 'http://gotmyowndoma.in/index.html': (200, {}, here / 'data' / 'pages' / 'gotmyowndomain.html'), - 'https://gotmyowndoma.in': (302, {'location': 'https://gotmyowndoma.in/index.html'}, None), - 'https://gotmyowndoma.in/': (302, {'location': 'https://gotmyowndoma.in/index.html'}, None), - 'https://gotmyowndoma.in/index.html': (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html') + 'http://gotmyowndoma.in': + (302, {'location': 'http://gotmyowndoma.in/index.html'}, None), + 'http://gotmyowndoma.in/': + (302, {'location': 'http://gotmyowndoma.in/index.html'}, None), + 'http://gotmyowndoma.in/index.html': + (200, {}, here / 'data' / 'pages' / 'gotmyowndomain.html'), + + 'https://gotmyowndoma.in': + (302, {'location': 'https://gotmyowndoma.in/index.html'}, None), + 'https://gotmyowndoma.in/': + (302, {'location': 'https://gotmyowndoma.in/index.html'}, None), + 'https://gotmyowndoma.in/index.html': + (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'), + + 'https://serve.scrip.ts/': serve_script, + + 'https://site.with.scripts.block.ed': + (302, {'location': 'https://site.with.scripts.block.ed/index.html'}, None), + 'https://site.with.scripts.block.ed/': + (302, {'location': 'https://site.with.scripts.block.ed/index.html'}, None), + 'https://site.with.scripts.block.ed/index.html': + (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'), + + 'https://site.with.scripts.allow.ed': + (302, {'location': 'https://site.with.scripts.allow.ed/index.html'}, None), + 'https://site.with.scripts.allow.ed/': + (302, {'location': 'https://site.with.scripts.allow.ed/index.html'}, None), + 'https://site.with.scripts.allow.ed/index.html': + (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'), + + 'https://site.with.paylo.ad': + (302, {'location': 'https://site.with.paylo.ad/index.html'}, None), + 'https://site.with.paylo.ad/': + (302, {'location': 'https://site.with.paylo.ad/index.html'}, None), + 'https://site.with.paylo.ad/index.html': + (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html') } |