summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile.in1
-rw-r--r--background/patterns_query_manager.js27
-rw-r--r--background/policy_injector.js12
-rw-r--r--background/webrequest.js189
-rw-r--r--common/indexeddb.js66
-rw-r--r--common/misc.js11
-rw-r--r--common/patterns_query_tree.js2
-rw-r--r--common/policy.js106
-rwxr-xr-xcompute_scripts.awk42
-rw-r--r--content/main.js12
-rw-r--r--manifest.json4
-rw-r--r--test/extension_crafting.py1
-rwxr-xr-xtest/profiles.py30
-rw-r--r--test/script_loader.py2
-rw-r--r--test/unit/conftest.py73
-rw-r--r--test/unit/test_indexeddb.py193
-rw-r--r--test/unit/test_patterns_query_manager.py39
-rw-r--r--test/unit/test_policy_deciding.py121
-rw-r--r--test/unit/test_webrequest.py77
-rw-r--r--test/world_wide_library.py98
20 files changed, 903 insertions, 203 deletions
diff --git a/Makefile.in b/Makefile.in
index 5291299..bf0fdec 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -83,6 +83,7 @@ clean mostlyclean:
rm -rf test/certs
rm -rf $$(find . -name geckodriver.log)
rm -rf $$(find . -type d -name __pycache__)
+ rm -rf $$(find . -type d -name injected_scripts)
distclean: clean
rm -f Makefile config.status record.conf
diff --git a/background/patterns_query_manager.js b/background/patterns_query_manager.js
index cb14cb1..e364668 100644
--- a/background/patterns_query_manager.js
+++ b/background/patterns_query_manager.js
@@ -45,13 +45,18 @@
#IMPORT common/patterns_query_tree.js AS pqt
#IMPORT common/indexeddb.js AS haketilodb
+#IF MOZILLA || MV3
#FROM common/browser.js IMPORT browser
+#ENDIF
+
+let secret;
const tree = pqt.make();
#EXPORT tree
const current_mappings = new Map();
+#IF MOZILLA || MV3
let registered_script = null;
let script_update_occuring = false;
let script_update_needed;
@@ -67,6 +72,7 @@ async function update_content_script()
script_update_needed = false;
const code = `\
+this.haketilo_secret = ${secret};
this.haketilo_pattern_tree = ${JSON.stringify(tree)};
if (this.haketilo_content_script_main)
haketilo_content_script_main();`;
@@ -89,36 +95,43 @@ if (this.haketilo_content_script_main)
function register_mapping(mapping)
{
- for (const pattern in mapping.payloads)
- pqt.register(tree, pattern, mapping.identifier, mapping);
+ for (const [pattern, resource] of Object.entries(mapping.payloads))
+ pqt.register(tree, pattern, mapping.identifier, resource);
current_mappings.set(mapping.identifier, mapping);
}
+#ENDIF
function mapping_changed(change)
{
console.log('mapping changes!', arguments);
- const old_version = current_mappings.get(change.identifier);
+ const old_version = current_mappings.get(change.key);
if (old_version !== undefined) {
for (const pattern in old_version.payloads)
- pqt.deregister(tree, pattern, change.identifier);
+ pqt.deregister(tree, pattern, change.key);
- current_mappings.delete(change.identifier);
+ current_mappings.delete(change.key);
}
if (change.new_val !== undefined)
register_mapping(change.new_val);
+#IF MOZILLA || MV3
script_update_needed = true;
setTimeout(update_content_script, 0);
+#ENDIF
}
-async function start()
+async function start(secret_)
{
+ secret = secret_;
+
const [tracking, initial_mappings] =
- await haketilodb.track_mappings(mapping_changed);
+ await haketilodb.track.mappings(mapping_changed);
initial_mappings.forEach(register_mapping);
+#IF MOZILLA || MV3
script_update_needed = true;
await update_content_script();
+#ENDIF
}
#EXPORT start
diff --git a/background/policy_injector.js b/background/policy_injector.js
index 2544e8e..b1fc733 100644
--- a/background/policy_injector.js
+++ b/background/policy_injector.js
@@ -43,13 +43,23 @@
* proprietary program, I am not going to enforce this in court.
*/
-#FROM common/misc.js IMPORT make_csp_rule, csp_header_regex
+#FROM common/misc.js IMPORT csp_header_regex
/* Re-enable the import below once nonce stuff here is ready */
#IF NEVER
#FROM common/misc.js IMPORT gen_nonce
#ENDIF
+/* CSP rule that blocks scripts according to policy's needs. */
+function make_csp_rule(policy)
+{
+ let rule = "prefetch-src 'none'; script-src-attr 'none';";
+ const script_src = policy.nonce !== undefined ?
+ `'nonce-${policy.nonce}'` : "'none'";
+ rule += ` script-src ${script_src}; script-src-elem ${script_src};`;
+ return rule;
+}
+
function inject_csp_headers(headers, policy)
{
let csp_headers;
diff --git a/background/webrequest.js b/background/webrequest.js
new file mode 100644
index 0000000..e32947a
--- /dev/null
+++ b/background/webrequest.js
@@ -0,0 +1,189 @@
+/**
+ * This file is part of Haketilo.
+ *
+ * Function: Modify HTTP traffic usng webRequest API.
+ *
+ * Copyright (C) 2021 Wojtek Kosior <koszko@koszko.org>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * As additional permission under GNU GPL version 3 section 7, you
+ * may distribute forms of that code without the copy of the GNU
+ * GPL normally required by section 4, provided you include this
+ * license notice and, in case of non-source distribution, a URL
+ * through which recipients can access the Corresponding Source.
+ * If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not
+ * obligated to do so. If you do not wish to do so, delete this
+ * exception statement from your version.
+ *
+ * As a special exception to the GPL, any HTML file which merely
+ * makes function calls to this code, and for that purpose
+ * includes it by reference shall be deemed a separate work for
+ * copyright law purposes. If you modify this code, you may extend
+ * this exception to your version of the code, but you are not
+ * obligated to do so. If you do not wish to do so, delete this
+ * exception statement from your version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * I, Wojtek Kosior, thereby promise not to sue for violation of this file's
+ * license. Although I request that you do not make use this code in a
+ * proprietary program, I am not going to enforce this in court.
+ */
+
+#IMPORT common/indexeddb.js AS haketilodb
+#IF MOZILLA
+#IMPORT background/stream_filter.js
+#ENDIF
+
+#FROM common/browser.js IMPORT browser
+#FROM common/misc.js IMPORT is_privileged_url, csp_header_regex
+#FROM common/policy.js IMPORT decide_policy
+
+#FROM background/patterns_query_manager.js IMPORT tree
+
+let secret;
+
+let default_allow = false;
+
+async function track_default_allow()
+{
+ const set_val = ch => default_allow = (ch.new_val || {}).value;
+ const [tracking, settings] = await haketilodb.track.settings(set_val);
+ for (const setting of settings) {
+ if (setting.name === "default_allow")
+ default_allow = setting.value;
+ }
+}
+
+function on_headers_received(details)
+{
+ const url = details.url;
+ if (is_privileged_url(details.url))
+ return;
+
+ let headers = details.responseHeaders;
+
+ const policy = decide_policy(tree, details.url, default_allow, secret);
+ if (policy.allow)
+ return;
+
+ if (policy.payload)
+ headers = headers.filter(h => !csp_header_regex.test(h.name));
+
+ headers.push({name: "Content-Security-Policy", value: policy.csp});
+
+#IF MOZILLA
+ let skip = false;
+ for (const header of headers) {
+ if (header.name.toLowerCase().trim() !== "content-disposition")
+ continue;
+
+ if (/^\s*attachment\s*(;.*)$/i.test(header.value)) {
+ skip = true;
+ } else {
+ skip = false;
+ break;
+ }
+ }
+ skip = skip || (details.statusCode >= 300 && details.statusCode < 400);
+
+ if (!skip)
+ headers = stream_filter.apply(details, headers, policy);
+#ENDIF
+
+ return {responseHeaders: headers};
+}
+
+#IF CHROMIUM && MV2
+const request_url_regex = /^[^?]*\?url=(.*)$/;
+const redirect_url_template = browser.runtime.getURL("dummy") + "?settings=";
+
+function on_before_request(details)
+{
+ /*
+ * Content script will make a synchronous XmlHttpRequest to extension's
+ * `dummy` file to query settings for given URL. We smuggle that
+ * information in query parameter of the URL we redirect to.
+ * A risk of fingerprinting arises if a page with script execution allowed
+ * guesses the dummy file URL and makes an AJAX call to it. It is currently
+ * a problem in ManifestV2 Chromium-family port of Haketilo because Chromium
+ * uses predictable URLs for web-accessible resources. We plan to fix it in
+ * the future ManifestV3 port.
+ */
+ if (details.type !== "xmlhttprequest")
+ return {cancel: true};
+
+#IF DEBUG
+ console.debug(`Settings queried using XHR for '${details.url}'.`);
+#ENDIF
+
+ /*
+ * request_url should be of the following format:
+ * <url_for_extension's_dummy_file>?url=<valid_urlencoded_url>
+ */
+ const match = request_url_regex.exec(details.url);
+ if (match) {
+ const queried_url = decodeURIComponent(match[1]);
+
+ if (details.initiator && !queried_url.startsWith(details.initiator)) {
+ console.warn(`Blocked suspicious query of '${url}' by '${details.initiator}'. This might be the result of page fingerprinting the browser.`);
+ return {cancel: true};
+ }
+
+ const policy = decide_policy(tree, details.url, default_allow, secret);
+ if (!policy.error) {
+ const encoded_policy = encodeURIComponent(JSON.stringify(policy));
+ return {redirectUrl: redirect_url_template + encoded_policy};
+ }
+ }
+
+ console.warn(`Bad request! Expected ${browser.runtime.getURL("dummy")}?url=<valid_urlencoded_url>. Got ${request_url}. This might be the result of page fingerprinting the browser.`);
+
+ return {cancel: true};
+}
+
+const all_types = [
+ "main_frame", "sub_frame", "stylesheet", "script", "image", "font",
+ "object", "xmlhttprequest", "ping", "csp_report", "media", "websocket",
+ "other", "main_frame", "sub_frame"
+];
+#ENDIF
+
+async function start(secret_)
+{
+ secret = secret_;
+
+#IF CHROMIUM
+ const extra_opts = ["blocking", "extraHeaders"];
+#ELSE
+ const extra_opts = ["blocking"];
+#ENDIF
+
+ browser.webRequest.onHeadersReceived.addListener(
+ on_headers_received,
+ {urls: ["<all_urls>"], types: ["main_frame", "sub_frame"]},
+ extra_opts.concat("responseHeaders")
+ );
+
+#IF CHROMIUM && MV2
+ browser.webRequest.onBeforeRequest.addListener(
+ on_before_request,
+ {urls: [browser.runtime.getURL("dummy") + "*"], types: all_types},
+ extra_opts
+ );
+#ENDIF
+
+ await track_default_allow();
+}
+#EXPORT start
diff --git a/common/indexeddb.js b/common/indexeddb.js
index 096391a..e54d1ca 100644
--- a/common/indexeddb.js
+++ b/common/indexeddb.js
@@ -62,7 +62,8 @@ const stores = [
["files", {keyPath: "hash_key"}],
["file_uses", {keyPath: "hash_key"}],
["resources", {keyPath: "identifier"}],
- ["mappings", {keyPath: "identifier"}]
+ ["mappings", {keyPath: "identifier"}],
+ ["settings", {keyPath: "name"}]
];
let db = null;
@@ -207,7 +208,7 @@ async function incr_file_uses(context, file_ref, by=1)
const decr_file_uses = (ctx, file_ref) => incr_file_uses(ctx, file_ref, -1);
-async function finalize_items_transaction(context)
+async function finalize_transaction(context)
{
for (const uses of Object.values(context.file_uses)) {
if (uses.uses < 0)
@@ -248,7 +249,7 @@ async function finalize_items_transaction(context)
return context.result;
}
-#EXPORT finalize_items_transaction
+#EXPORT finalize_transaction
/*
* How a sample data argument to the function below might look like:
@@ -304,7 +305,7 @@ async function _save_items(resources, mappings, context)
for (const item of resources.concat(mappings))
await save_item(item, context);
- await finalize_items_transaction(context);
+ await finalize_transaction(context);
}
/*
@@ -314,9 +315,9 @@ async function _save_items(resources, mappings, context)
* object with keys being of the form `sha256-<file's-sha256-sum>`.
*
* context should be one returned from start_items_transaction() and should be
- * later passed to finalize_items_transaction() so that files depended on are
- * added to IndexedDB and files that are no longer depended on after this
- * operation are removed from IndexedDB.
+ * later passed to finalize_transaction() so that files depended on are added to
+ * IndexedDB and files that are no longer depended on after this operation are
+ * removed from IndexedDB.
*/
async function save_item(item, context)
{
@@ -346,9 +347,9 @@ async function _remove_item(store_name, identifier, context)
* Remove definition of a resource/mapping from IndexedDB.
*
* context should be one returned from start_items_transaction() and should be
- * later passed to finalize_items_transaction() so that files depended on are
- * added to IndexedDB and files that are no longer depended on after this
- * operation are removed from IndexedDB.
+ * later passed to finalize_transaction() so that files depended on are added to
+ * IndexedDB and files that are no longer depended on after this operation are
+ * removed from IndexedDB.
*/
async function remove_item(store_name, identifier, context)
{
@@ -363,26 +364,49 @@ const remove_resource = (id, ctx) => remove_item("resources", id, ctx);
const remove_mapping = (id, ctx) => remove_item("mappings", id, ctx);
#EXPORT remove_mapping
+/* A simplified kind of transaction for modifying just the "settings" store. */
+async function start_settings_transaction()
+{
+ const db = await get_db();
+ return make_context(db.transaction("settings", "readwrite"), {});
+}
+
+async function set_setting(name, value)
+{
+ const context = await start_settings_transaction();
+ broadcast.prepare(context.sender, `idb_changes_settings`, name);
+ await idb_put(context.transaction, "settings", {name, value});
+ return finalize_transaction(context);
+}
+#EXPORT set_setting
+
+async function get_setting(name)
+{
+ const transaction = (await get_db()).transaction("settings");
+ return ((await idb_get(transaction, "settings", name)) || {}).value;
+}
+#EXPORT get_setting
+
/* Callback used when listening to broadcasts while tracking db changes. */
-async function track_change(tracking, identifier)
+async function track_change(tracking, key)
{
const transaction = (await get_db()).transaction([tracking.store_name]);
- const new_val = await idb_get(transaction, tracking.store_name, identifier);
+ const new_val = await idb_get(transaction, tracking.store_name, key);
- tracking.onchange({identifier, new_val});
+ tracking.onchange({key, new_val});
}
/*
* Monitor changes to `store_name` IndexedDB object store.
*
- * `store_name` should be either "resources" or "mappings".
+ * `store_name` should be either "resources", "mappings" or "settings".
*
* `onchange` should be a callback that will be called when an item is added,
* modified or removed from the store. The callback will be passed an object
* representing the change as its first argument. This object will have the
* form:
* {
- * identifier: "the identifier of modified resource/mapping",
+ * key: "the identifier of modified resource/mapping or settings key",
* new_val: undefined // `undefined` if item removed, item object otherwise
* }
*
@@ -395,7 +419,7 @@ async function track_change(tracking, identifier)
* actually modified or that it only gets called once after multiple quick
* changes to an item.
*/
-async function track(store_name, onchange)
+async function start_tracking(store_name, onchange)
{
const tracking = {store_name, onchange};
tracking.listener =
@@ -408,12 +432,10 @@ async function track(store_name, onchange)
return [tracking, (await wait_request(all_req)).target.result];
}
-const track_resources = onchange => track("resources", onchange);
-#EXPORT track_resources
-
-const track_mappings = onchange => track("mappings", onchange);
-#EXPORT track_mappings
+const track = {};
+for (const store_name of ["resources", "mappings", "settings"])
+ track[store_name] = onchange => start_tracking(store_name, onchange);
+#EXPORT track
const untrack = tracking => broadcast.close(tracking.listener);
#EXPORT untrack
-
diff --git a/common/misc.js b/common/misc.js
index dc4a598..82f6cbf 100644
--- a/common/misc.js
+++ b/common/misc.js
@@ -67,17 +67,6 @@ function gen_nonce(length=16)
}
#EXPORT gen_nonce
-/* CSP rule that blocks scripts according to policy's needs. */
-function make_csp_rule(policy)
-{
- let rule = "prefetch-src 'none'; script-src-attr 'none';";
- const script_src = policy.nonce !== undefined ?
- `'nonce-${policy.nonce}'` : "'none'";
- rule += ` script-src ${script_src}; script-src-elem ${script_src};`;
- return rule;
-}
-#EXPORT make_csp_rule
-
/* Check if some HTTP header might define CSP rules. */
const csp_header_regex =
/^\s*(content-security-policy|x-webkit-csp|x-content-security-policy)/i;
diff --git a/common/patterns_query_tree.js b/common/patterns_query_tree.js
index 1bbdb39..f8ec405 100644
--- a/common/patterns_query_tree.js
+++ b/common/patterns_query_tree.js
@@ -41,6 +41,8 @@
* proprietary program, I am not going to enforce this in court.
*/
+// TODO! Modify the code to use `Object.create(null)` instead of `{}`.
+
#FROM common/patterns.js IMPORT deconstruct_url
/* "Pattern Tree" is how we refer to the data structure used for querying
diff --git a/common/policy.js b/common/policy.js
new file mode 100644
index 0000000..ebd663f
--- /dev/null
+++ b/common/policy.js
@@ -0,0 +1,106 @@
+/**
+ * This file is part of Haketilo.
+ *
+ * Function: Determining what to do on a given web page.
+ *
+ * Copyright (C) 2021 Wojtek Kosior
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * As additional permission under GNU GPL version 3 section 7, you
+ * may distribute forms of that code without the copy of the GNU
+ * GPL normally required by section 4, provided you include this
+ * license notice and, in case of non-source distribution, a URL
+ * through which recipients can access the Corresponding Source.
+ * If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not
+ * obligated to do so. If you do not wish to do so, delete this
+ * exception statement from your version.
+ *
+ * As a special exception to the GPL, any HTML file which merely
+ * makes function calls to this code, and for that purpose
+ * includes it by reference shall be deemed a separate work for
+ * copyright law purposes. If you modify this code, you may extend
+ * this exception to your version of the code, but you are not
+ * obligated to do so. If you do not wish to do so, delete this
+ * exception statement from your version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * I, Wojtek Kosior, thereby promise not to sue for violation of this file's
+ * license. Although I request that you do not make use this code in a
+ * proprietary program, I am not going to enforce this in court.
+ */
+
+#IMPORT common/patterns_query_tree.js AS pqt
+
+#FROM common/sha256.js IMPORT sha256
+
+/*
+ * CSP rule that either blocks all scripts or only allows scripts with specified
+ * nonce attached.
+ */
+function make_csp(nonce)
+{
+ const rule = nonce ? `nonce-${nonce}` : "none";
+ const csp_dict = {"prefetch-src": "none", "script-src-attr": "none"};
+ Object.assign(csp_dict, {"script-src": rule, "script-src-elem": rule});
+ return Object.entries(csp_dict).map(([a, b]) => `${a} '${b}';`).join(" ");
+}
+
+function decide_policy(patterns_tree, url, default_allow, secret)
+{
+ const policy = {allow: default_allow};
+
+ try {
+ var payloads = pqt.search(patterns_tree, url).next().value;
+ } catch (e) {
+ console.error(e);
+ policy.allow = false;
+ policy.error = true;
+ }
+
+ if (payloads !== undefined) {
+ policy.mapping = Object.keys(payloads).sort()[0];
+ const payload = payloads[policy.mapping];
+ if (payload.allow !== undefined) {
+ policy.allow = payload.allow;
+ } else /* if (payload.identifier) */ {
+ policy.allow = false;
+ policy.payload = payload;
+ /*
+ * Hash a secret and other values into a string that's unpredictable
+ * to someone who does not know these values. What we produce here
+ * is not a true "nonce" because it might get produced multiple
+ * times given the same url and mapping choice. Nevertheless, this
+ * is reasonably good given the limitations WebExtension APIs and
+ * environments give us. If we were using a true nonce, we'd have no
+ * reliable way of passing it to our content scripts.
+ */
+ const nonce_source = [
+ policy.mapping,
+ policy.payload.identifier,
+ url,
+ secret
+ ];
+ policy.nonce = sha256(nonce_source.join(":"));
+ }
+ }
+
+ if (!policy.allow)
+ policy.csp = make_csp(policy.nonce);
+
+ return policy;
+}
+#EXPORT decide_policy
+
+#EXPORT () => ({allow: false, csp: make_csp()}) AS fallback_policy
diff --git a/compute_scripts.awk b/compute_scripts.awk
index b778934..e17d12c 100755
--- a/compute_scripts.awk
+++ b/compute_scripts.awk
@@ -28,7 +28,12 @@ BEGIN {
path_ext_re = "(\\.[-_.a-zA-Z0-9]*)?"
path_re = "^" path_dir_re identifier_re path_ext_re "$"
- directive_args_patterns["IF"] = "^(NOT[[:space:]]+)?" identifier_re "$"
+ if_clause_re = "!?" identifier_re
+ if_AND_re = "([[:space:]]+&&[[:space:]]+" if_clause_re ")*"
+ if_OR_re = "([[:space:]]+[|][|][[:space:]]+" if_clause_re ")*"
+
+ directive_args_patterns["IF"] = ("^" if_clause_re \
+ "(" if_AND_re "|" if_OR_re ")$")
directive_args_patterns["ENDIF"] = "^$"
directive_args_patterns["ELSE"] = "^$"
directive_args_patterns["ELIF"] = "^(NOT[[:space:]]+)?" identifier_re "$"
@@ -215,8 +220,7 @@ function process_file(path, read_path, mode,
if (directive == "IF") {
if (if_nesting_true == if_nesting) {
- if ((last_token(directive_args) in defines) == \
- (directive_args ~ /^[^[:space:]]+$/))
+ if (if_condition_true(directive_args))
if_nesting_true++
else
if_branch_processed = false
@@ -255,8 +259,7 @@ function process_file(path, read_path, mode,
}
if (if_nesting == if_nesting_true + 1 && !if_branch_processed &&
- (last_token(directive_args) in defines) == \
- (directive_args ~ /^[^[:space:]]+$/)) {
+ if_condition_true(directive_args)) {
if_nesting_true++
} else if (if_nesting == if_nesting_true) {
if_branch_processed = true
@@ -323,6 +326,35 @@ function process_file(path, read_path, mode,
delete reading[read_path]
}
+function if_condition_true(directive_args,
+ result, bool, first_iter, word, negated, alt) {
+ first_iter = true
+
+ while (directive_args) {
+ word = first_token(directive_args)
+ sub(/^[^[:space:]]+[[:space:]]*/, "", directive_args)
+ alt = alt || directive_args ~ /^[|][|]/
+ sub(/^[^[:space:]]+[[:space:]]*/, "", directive_args)
+
+ negated = word ~ /^!/
+ sub(/^!/, "", word)
+ bool = (word in defines) != negated
+
+ if (first_iter) {
+ result = bool
+ first_iter = false
+ continue
+ }
+
+ if (alt)
+ result = result || bool
+ else # if (directive_args ~ /^AND/)
+ result = result && bool
+ }
+
+ return result
+}
+
function include_file(root_path, read_path, included_path, line, verbatim,
read_line, result) {
if (validate_path(read_path, included_path, line))
diff --git a/content/main.js b/content/main.js
index 9e98635..d97747f 100644
--- a/content/main.js
+++ b/content/main.js
@@ -46,9 +46,19 @@
#FROM content/page_actions.js IMPORT handle_page_actions
#FROM common/misc.js IMPORT gen_nonce, is_privileged_url, \
- make_csp_rule, csp_header_regex
+ csp_header_regex
#FROM common/browser.js IMPORT browser
+/* CSP rule that blocks scripts according to policy's needs. */
+function make_csp_rule(policy)
+{
+ let rule = "prefetch-src 'none'; script-src-attr 'none';";
+ const script_src = policy.nonce !== undefined ?
+ `'nonce-${policy.nonce}'` : "'none'";
+ rule += ` script-src ${script_src}; script-src-elem ${script_src};`;
+ return rule;
+}
+
document.content_loaded = document.readyState === "complete";
const wait_loaded = e => e.content_loaded ? Promise.resolve() :
new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true}));
diff --git a/manifest.json b/manifest.json
index 7a9edd5..ec94c6e 100644
--- a/manifest.json
+++ b/manifest.json
@@ -11,11 +11,9 @@
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// CC0 1.0 Universal License for more details.
-#IF NOT MOZILLA
-#IF NOT CHROMIUM
+#IF !MOZILLA && !CHROMIUM
#ERROR Target browser not selected! Please define 'MOZILLA' or 'CHROMIUM'.
#ENDIF
-#ENDIF
{
#IF MV2
diff --git a/test/extension_crafting.py b/test/extension_crafting.py
index 9b985b3..df45d26 100644
--- a/test/extension_crafting.py
+++ b/test/extension_crafting.py
@@ -58,6 +58,7 @@ def manifest_template():
'<all_urls>',
'unlimitedStorage'
],
+ 'content_security_policy': "default-src 'self'; script-src 'self' https://serve.scrip.ts;",
'web_accessible_resources': ['testpage.html'],
'background': {
'persistent': True,
diff --git a/test/profiles.py b/test/profiles.py
index 795a0db..acdecb6 100755
--- a/test/profiles.py
+++ b/test/profiles.py
@@ -34,22 +34,9 @@ from .misc_constants import *
class HaketiloFirefox(webdriver.Firefox):
"""
- This wrapper class around selenium.webdriver.Firefox adds a `loaded_scripts`
- instance property that gets resetted to an empty array every time the
- `get()` method is called and also facilitates removing the temporary
- profile directory after Firefox quits.
+ This wrapper class around selenium.webdriver.Firefox facilitates removing
+ the temporary profile directory after Firefox quits.
"""
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
- self.reset_loaded_scripts()
-
- def reset_loaded_scripts(self):
- self.loaded_scripts = []
-
- def get(self, *args, **kwargs):
- self.reset_loaded_scripts()
- super().get(*args, **kwargs)
-
def quit(self, *args, **kwargs):
profile_path = self.firefox_profile.path
super().quit(*args, **kwargs)
@@ -71,8 +58,13 @@ def set_profile_proxy(profile, proxy_host, proxy_port):
profile.set_preference(f'network.proxy.backup.{proto}', '')
profile.set_preference(f'network.proxy.backup.{proto}_port', 0)
-def set_profile_console_logging(profile):
- profile.set_preference('devtools.console.stdout.content', True)
+def set_profile_csp_enabled(profile):
+ """
+ By default, Firefox Driver disables CSP. The extension we're testing uses
+ CSP extensively, so we use this function to prepare a Firefox profile that
+ has it enabled.
+ """
+ profile.set_preference('security.csp.enable', True)
# The function below seems not to work for extensions that are
# temporarily-installed in Firefox safe mode. Testing is needed to see if it
@@ -97,7 +89,7 @@ def firefox_safe_mode(firefox_binary=default_firefox_binary,
"""
profile = webdriver.FirefoxProfile()
set_profile_proxy(profile, proxy_host, proxy_port)
- set_profile_console_logging(profile)
+ set_profile_csp_enabled(profile)
options = Options()
options.add_argument('--safe-mode')
@@ -117,7 +109,7 @@ def firefox_with_profile(firefox_binary=default_firefox_binary,
"""
profile = webdriver.FirefoxProfile(profile_dir)
set_profile_proxy(profile, proxy_host, proxy_port)
- set_profile_console_logging(profile)
+ set_profile_csp_enabled(profile)
set_webextension_uuid(profile, default_haketilo_id)
return HaketiloFirefox(firefox_profile=profile,
diff --git a/test/script_loader.py b/test/script_loader.py
index f66f9ae..53de779 100644
--- a/test/script_loader.py
+++ b/test/script_loader.py
@@ -65,7 +65,7 @@ def load_script(path, code_to_add=None):
awk = subprocess.run(['awk', '-f', str(awk_script), '--', '-D', 'MOZILLA',
'-D', 'MV2', '-D', 'TEST', '-D', 'UNIT_TEST',
- '--output=amalgamate-js:' + key],
+ '-D', 'DEBUG', '--output=amalgamate-js:' + key],
stdout=subprocess.PIPE, cwd=script_root, check=True)
script = awk.stdout.decode()
script_cache[key] = script
diff --git a/test/unit/conftest.py b/test/unit/conftest.py
index f9a17f8..beffaf5 100644
--- a/test/unit/conftest.py
+++ b/test/unit/conftest.py
@@ -34,6 +34,7 @@ from selenium.webdriver.support import expected_conditions as EC
from ..profiles import firefox_safe_mode
from ..server import do_an_internet
from ..extension_crafting import make_extension
+from ..world_wide_library import start_serving_script, dump_scripts
@pytest.fixture(scope="package")
def proxy():
@@ -77,55 +78,55 @@ def webextension(driver, request):
driver.uninstall_addon(addon_id)
ext_path.unlink()
-script_injecting_script = '''\
+script_injector_script = '''\
/*
* Selenium by default executes scripts in some weird one-time context. We want
* separately-loaded scripts to be able to access global variables defined
* before, including those declared with `const` or `let`. To achieve that, we
- * run our scripts by injecting them into the page inside a <script> tag. We use
- * custom properties of the `window` object to communicate with injected code.
+ * run our scripts by injecting them into the page with a <script> tag that runs
+ * javascript served by our proxy. We use custom properties of the `window`
+ * object to communicate with injected code.
*/
-
-const script_elem = document.createElement('script');
-script_elem.textContent = arguments[0];
-
-delete window.haketilo_selenium_return_value;
-delete window.haketilo_selenium_exception;
-window.returnval = (val => window.haketilo_selenium_return_value = val);
-window.arguments = arguments[1];
-
-document.body.append(script_elem);
-
-/*
- * To ease debugging, we want this script to signal all exceptions from the
- * injectee.
- */
-try {
+const inject = async () => {
+ delete window.haketilo_selenium_return_value;
+ delete window.haketilo_selenium_exception;
+ window.returnval = val => window.haketilo_selenium_return_value = val;
+
+ const injectee = document.createElement('script');
+ injectee.src = arguments[0];
+ injectee.type = "application/javascript";
+ injectee.async = true;
+ const prom = new Promise(cb => injectee.onload = cb);
+
+ window.arguments = arguments[1];
+ document.body.append(injectee);
+
+ await prom;
+
+ /*
+ * To ease debugging, we want this script to signal all exceptions from the
+ * injectee.
+ */
if (window.haketilo_selenium_exception !== false)
- throw 'Error in injected script! Check your geckodriver.log!';
-} finally {
- script_elem.remove();
-}
+ throw ['haketilo_selenium_error',
+ 'Error in injected script! Check your geckodriver.log and ./injected_scripts/!'];
-return window.haketilo_selenium_return_value;
+ return window.haketilo_selenium_return_value;
+}
+return inject();
'''
def _execute_in_page_context(driver, script, args):
script = script + '\n;\nwindow.haketilo_selenium_exception = false;'
- driver.loaded_scripts.append(script)
+ script_url = start_serving_script(script)
+
try:
- return driver.execute_script(script_injecting_script, script, args)
+ result = driver.execute_script(script_injector_script, script_url, args)
+ if type(result) == list and result[0] == 'haketilo_selenium_error':
+ raise Exception(result[1])
+ return result
except Exception as e:
- import sys
-
- print("Scripts loaded since driver's last get() method call:",
- file=sys.stderr)
-
- for script in driver.loaded_scripts:
- lines = enumerate(script.split('\n'), 1)
- for err_info in [('===',), *lines]:
- print(*err_info, file=sys.stderr)
-
+ dump_scripts()
raise e from None
# Some fixtures here just define functions that operate on driver. We should
diff --git a/test/unit/test_indexeddb.py b/test/unit/test_indexeddb.py
index 476690c..df3df81 100644
--- a/test/unit/test_indexeddb.py
+++ b/test/unit/test_indexeddb.py
@@ -75,26 +75,9 @@ def make_sample_mapping():
def file_ref(file_name):
return {'file': file_name, 'hash_key': sample_files[file_name]['hash_key']}
-@pytest.mark.get_page('https://gotmyowndoma.in')
-def test_haketilodb_save_remove(execute_in_page):
- """
- indexeddb.js facilitates operating on Haketilo's internal database.
- Verify database operations work properly.
- """
- execute_in_page(indexeddb_js())
- # Mock some unwanted imports.
+def clear_indexeddb(execute_in_page):
execute_in_page(
'''{
- const broadcast_mock = {};
- const nop = () => {};
- for (const key in broadcast)
- broadcast_mock[key] = nop;
- broadcast = broadcast_mock;
- }''')
-
- # Start with no database.
- execute_in_page(
- '''
async function delete_db() {
if (db) {
db.close();
@@ -108,12 +91,13 @@ def test_haketilodb_save_remove(execute_in_page):
}
returnval(delete_db());
- '''
+ }'''
)
+def get_db_contents(execute_in_page):
# Facilitate retrieving all IndexedDB contents.
- execute_in_page(
- '''
+ return execute_in_page(
+ '''{
async function get_database_contents()
{
const db = await get_db();
@@ -130,20 +114,45 @@ def test_haketilodb_save_remove(execute_in_page):
store_names_reqs.forEach(([sn, req]) => result[sn] = req.result);
return result;
}
- ''')
+ returnval(get_database_contents());
+ }''')
+
+def mock_broadcast(execute_in_page):
+ execute_in_page(
+ '''{
+ const broadcast_mock = {};
+ const nop = () => {};
+ for (const key in broadcast)
+ broadcast_mock[key] = nop;
+ broadcast = broadcast_mock;
+ }''')
+
+@pytest.mark.get_page('https://gotmyowndoma.in')
+def test_haketilodb_item_modifications(driver, execute_in_page):
+ """
+ indexeddb.js facilitates operating on Haketilo's internal database.
+ Verify database operations on mappings/resources work properly.
+ """
+ execute_in_page(indexeddb_js())
+ mock_broadcast(execute_in_page)
+
+ # Start with no database.
+ clear_indexeddb(execute_in_page)
sample_item = make_sample_resource()
sample_item['source_copyright'][0]['extra_prop'] = True
- database_contents = execute_in_page(
+ execute_in_page(
'''{
const promise = start_items_transaction(["resources"], arguments[1])
.then(ctx => save_item(arguments[0], ctx).then(() => ctx))
- .then(finalize_items_transaction)
- .then(get_database_contents);
+ .then(finalize_transaction);
returnval(promise);
}''',
sample_item, sample_files_by_hash)
+
+ database_contents = get_db_contents(execute_in_page)
+
assert len(database_contents['files']) == 4
assert all([sample_files_by_hash[file['hash_key']] == file['contents']
for file in database_contents['files']])
@@ -162,31 +171,33 @@ def test_haketilodb_save_remove(execute_in_page):
sample_item['scripts'].append(file_ref('combined.js'))
incomplete_files = {**sample_files_by_hash}
incomplete_files.pop(sample_files['combined.js']['hash_key'])
- result = execute_in_page(
+ exception = execute_in_page(
'''{
- const promise = (async () => {
+ const args = arguments;
+ async function try_add_item()
+ {
const context =
- await start_items_transaction(["resources"], arguments[1]);
+ await start_items_transaction(["resources"], args[1]);
try {
- await save_item(arguments[0], context);
- await finalize_items_transaction(context);
- return {};
+ await save_item(args[0], context);
+ await finalize_transaction(context);
+ return;
} catch(e) {
- var exception = e;
+ return e;
}
-
- return {exception, db_contents: await get_database_contents()};
- })();
- returnval(promise);
+ }
+ returnval(try_add_item());
}''',
sample_item, incomplete_files)
- assert result
- assert 'file not present' in result['exception']
+ previous_database_contents = database_contents
+ database_contents = get_db_contents(execute_in_page)
+
+ assert 'file not present' in exception
for key, val in database_contents.items():
keyfun = lambda item: item.get('hash_key') or item['identifier']
- assert sorted(result['db_contents'][key], key=keyfun) \
- == sorted(val, key=keyfun)
+ assert sorted(previous_database_contents[key], key=keyfun) \
+ == sorted(val, key=keyfun)
# See if adding another item that partially uses first's files works OK.
sample_item = make_sample_mapping()
@@ -194,12 +205,13 @@ def test_haketilodb_save_remove(execute_in_page):
'''{
const promise = start_items_transaction(["mappings"], arguments[1])
.then(ctx => save_item(arguments[0], ctx).then(() => ctx))
- .then(finalize_items_transaction)
- .then(get_database_contents);
+ .then(finalize_transaction);
returnval(promise);
}''',
sample_item, sample_files_by_hash)
+ database_contents = get_db_contents(execute_in_page)
+
names = ['README.md', 'report.spdx', 'LICENSES/somelicense.txt', 'hello.js',
'bye.js']
sample_files_list = [sample_files[name] for name in names]
@@ -222,17 +234,18 @@ def test_haketilodb_save_remove(execute_in_page):
# Try removing the items to get an empty database again.
results = [None, None]
for i, item_type in enumerate(['resource', 'mapping']):
- results[i] = execute_in_page(
+ execute_in_page(
f'''{{
const remover = remove_{item_type};
const promise =
start_items_transaction(["{item_type}s"], {{}})
.then(ctx => remover('helloapple', ctx).then(() => ctx))
- .then(finalize_items_transaction)
- .then(get_database_contents);
+ .then(finalize_transaction);
returnval(promise);
}}''')
+ results[i] = get_db_contents(execute_in_page)
+
names = ['README.md', 'report.spdx']
sample_files_list = [sample_files[name] for name in names]
uses_list = [1, 1]
@@ -271,22 +284,48 @@ def test_haketilodb_save_remove(execute_in_page):
},
'files': sample_files_by_hash
}
- database_contents = execute_in_page(
- '''
- initial_data = arguments[0];
- returnval(delete_db().then(() => get_database_contents()));
- ''',
- initial_data)
+
+ clear_indexeddb(execute_in_page)
+ execute_in_page('initial_data = arguments[0];', initial_data)
+ database_contents = get_db_contents(execute_in_page)
+
assert database_contents['resources'] == [sample_resource]
assert database_contents['mappings'] == [sample_mapping]
+@pytest.mark.get_page('https://gotmyowndoma.in')
+def test_haketilodb_settings(driver, execute_in_page):
+ """
+ indexeddb.js facilitates operating on Haketilo's internal database.
+ Verify database assigning/retrieving values of simple "settings" works
+ properly.
+ """
+ execute_in_page(indexeddb_js())
+ mock_broadcast(execute_in_page)
+
+ # Start with no database.
+ clear_indexeddb(execute_in_page)
+
+ assert get_db_contents(execute_in_page)['settings'] == []
+
+ assert execute_in_page('returnval(get_setting("option15"));') == None
+
+ execute_in_page('returnval(set_setting("option15", "disable"));')
+ assert execute_in_page('returnval(get_setting("option15"));') == 'disable'
+
+ execute_in_page('returnval(set_setting("option15", "enable"));')
+ assert execute_in_page('returnval(get_setting("option15"));') == 'enable'
+
test_page_html = '''
<!DOCTYPE html>
<script src="/testpage.js"></script>
+<script>console.log("inline!")</script>
+<script nonce="123456789">console.log("inline nonce!")</script>
<h2>resources</h2>
<ul id="resources"></ul>
<h2>mappings</h2>
<ul id="mappings"></ul>
+<h2>settings</h2>
+<ul id="settings"></ul>
'''
@pytest.mark.ext_data({
@@ -328,15 +367,21 @@ def test_haketilodb_track(driver, execute_in_page, wait_elem_text):
}
for window in reversed(windows):
driver.switch_to.window(window)
- execute_in_page('initial_data = arguments[0];', initial_data)
-
- # See if track_*() functions properly return the already-existing items.
+ try :
+ driver.execute_script('console.log("uuuuuuu");')
+ execute_in_page('initial_data = arguments[0];', initial_data)
+ except:
+ from time import sleep
+ sleep(100000)
+ execute_in_page('returnval(set_setting("option15", "123"));')
+
+ # See if track.*() functions properly return the already-existing items.
execute_in_page(
'''
function update_item(store_name, change)
{
console.log('update', ...arguments);
- const elem_id = `${store_name}_${change.identifier}`;
+ const elem_id = `${store_name}_${change.key}`;
let elem = document.getElementById(elem_id);
elem = elem || document.createElement("li");
elem.id = elem_id;
@@ -348,35 +393,32 @@ def test_haketilodb_track(driver, execute_in_page, wait_elem_text):
let resource_tracking, resource_items, mapping_tracking, mapping_items;
- async function start_tracking()
+ async function start_reporting()
{
- const update_resource = change => update_item("resources", change);
- const update_mapping = change => update_item("mappings", change);
-
- [resource_tracking, resource_items] =
- await track_resources(update_resource);
- [mapping_tracking, mapping_items] =
- await track_mappings(update_mapping);
-
- for (const item of resource_items)
- update_resource({identifier: item.identifier, new_val: item});
- for (const item of mapping_items)
- update_mapping({identifier: item.identifier, new_val: item});
+ for (const store_name of ["resources", "mappings", "settings"]) {
+ [tracking, items] =
+ await track[store_name](ch => update_item(store_name, ch));
+ const prop = store_name === "settings" ? "name" : "identifier";
+ for (const item of items)
+ update_item(store_name, {key: item[prop], new_val: item});
+ }
}
- returnval(start_tracking());
+ returnval(start_reporting());
''')
item_counts = driver.execute_script(
'''
const childcount = id => document.getElementById(id).childElementCount;
- return ["resources", "mappings"].map(childcount);
+ return ["resources", "mappings", "settings"].map(childcount);
''')
- assert item_counts == [1, 1]
+ assert item_counts == [1, 1, 1]
resource_json = driver.find_element_by_id('resources_helloapple').text
mapping_json = driver.find_element_by_id('mappings_helloapple').text
+ setting_json = driver.find_element_by_id('settings_option15').text
assert json.loads(resource_json) == sample_resource
assert json.loads(mapping_json) == sample_mapping
+ assert json.loads(setting_json) == {'name': 'option15', 'value': '123'}
# See if item additions get tracked properly.
driver.switch_to.window(windows[1])
@@ -398,14 +440,17 @@ def test_haketilodb_track(driver, execute_in_page, wait_elem_text):
'files': sample_files_by_hash
}
execute_in_page('returnval(save_items(arguments[0]));', sample_data)
+ execute_in_page('returnval(set_setting("option22", "abc"));')
driver.switch_to.window(windows[0])
driver.implicitly_wait(10)
resource_json = driver.find_element_by_id('resources_helloapple-copy').text
mapping_json = driver.find_element_by_id('mappings_helloapple-copy').text
+ setting_json = driver.find_element_by_id('settings_option22').text
driver.implicitly_wait(0)
assert json.loads(resource_json) == sample_resource2
assert json.loads(mapping_json) == sample_mapping2
+ assert json.loads(setting_json) == {'name': 'option22', 'value': 'abc'}
# See if item deletions get tracked properly.
driver.switch_to.window(windows[1])
@@ -417,7 +462,8 @@ def test_haketilodb_track(driver, execute_in_page, wait_elem_text):
const ctx = await start_items_transaction(store_names, {});
await remove_resource("helloapple", ctx);
await remove_mapping("helloapple-copy", ctx);
- await finalize_items_transaction(ctx);
+ await finalize_transaction(ctx);
+ await set_setting("option22", null);
}
returnval(remove_items());
}''')
@@ -430,7 +476,8 @@ def test_haketilodb_track(driver, execute_in_page, wait_elem_text):
return False
except WebDriverException:
pass
- return True
+ option_text = driver.find_element_by_id('settings_option22').text
+ return json.loads(option_text)['value'] == None
driver.switch_to.window(windows[0])
WebDriverWait(driver, 10).until(condition_items_absent)
diff --git a/test/unit/test_patterns_query_manager.py b/test/unit/test_patterns_query_manager.py
index 8ae7c28..ae1f490 100644
--- a/test/unit/test_patterns_query_manager.py
+++ b/test/unit/test_patterns_query_manager.py
@@ -25,10 +25,9 @@ from selenium.webdriver.support.ui import WebDriverWait
from ..script_loader import load_script
def simple_sample_mapping(patterns, fruit):
- if type(patterns) is list:
- payloads = dict([(p, {'identifier': fruit}) for p in patterns])
- else:
- payloads = {patterns: {'identifier': fruit}}
+ if type(patterns) is not list:
+ patterns = [patterns]
+ payloads = dict([(p, {'identifier': f'{fruit}-{p}'}) for p in patterns])
return {
'source_copyright': [],
'type': 'mapping',
@@ -36,9 +35,13 @@ def simple_sample_mapping(patterns, fruit):
'payloads': payloads
}
-content_script_re = re.compile(r'this.haketilo_pattern_tree = (.*);')
+content_script_tree_re = re.compile(r'this.haketilo_pattern_tree = (.*);')
def extract_tree_data(content_script_text):
- return json.loads(content_script_re.search(content_script_text)[1])
+ return json.loads(content_script_tree_re.search(content_script_text)[1])
+
+content_script_mapping_re = re.compile(r'this.haketilo_mappings = (.*);')
+def extract_mappings_data(content_script_text):
+ return json.loads(content_script_mapping_re.search(content_script_text)[1])
# Fields that are not relevant for testing are omitted from these mapping
# definitions.
@@ -82,7 +85,7 @@ def test_pqm_tree_building(driver, execute_in_page):
return [{}, initial_mappings];
}
- haketilodb.track_mappings = track_mock;
+ haketilodb.track.mappings = track_mock;
let last_script;
let unregister_called = 0;
@@ -104,7 +107,10 @@ def test_pqm_tree_building(driver, execute_in_page):
tree, last_script, unregister_called]);
''',
'https://gotmyowndoma.in/index.html')
- assert found == dict([(m['identifier'], m) for m in sample_mappings[0:2]])
+ best_pattern = 'https://gotmyowndoma.in/index.html'
+ assert found == \
+ dict([(f'inject-{fruit}', {'identifier': f'{fruit}-{best_pattern}'})
+ for fruit in ('banana', 'orange')])
assert tree == extract_tree_data(content_script)
assert deregistrations == 0
@@ -114,12 +120,8 @@ def test_pqm_tree_building(driver, execute_in_page):
execute_in_page(
'''
- for (const mapping of arguments[0]) {
- mappingchange({
- identifier: mapping.identifier,
- new_val: mapping
- });
- }
+ for (const mapping of arguments[0])
+ mappingchange({key: mapping.identifier, new_val: mapping});
''',
sample_mappings[2:])
WebDriverWait(driver, 10).until(condition_mappings_added)
@@ -129,7 +131,8 @@ def test_pqm_tree_building(driver, execute_in_page):
def condition_odd_removed(driver):
last_script = execute_in_page('returnval(last_script);')
- return all([id not in last_script for id in odd])
+ return (all([id not in last_script for id in odd]) and
+ all([id in last_script for id in even]))
def condition_all_removed(driver):
content_script = execute_in_page('returnval(last_script);')
@@ -137,7 +140,7 @@ def test_pqm_tree_building(driver, execute_in_page):
execute_in_page(
'''
- arguments[0].forEach(identifier => mappingchange({identifier}));
+ arguments[0].forEach(identifier => mappingchange({key: identifier}));
''',
odd)
@@ -145,7 +148,7 @@ def test_pqm_tree_building(driver, execute_in_page):
execute_in_page(
'''
- arguments[0].forEach(identifier => mappingchange({identifier}));
+ arguments[0].forEach(identifier => mappingchange({key: identifier}));
''',
even)
@@ -224,7 +227,7 @@ def test_pqm_script_injection(driver, execute_in_page):
const ctx = await start_items_transaction(["mappings"], {});
for (const id of identifiers)
await remove_mapping(id, ctx);
- await finalize_items_transaction(ctx);
+ await finalize_transaction(ctx);
}
returnval(remove_items());
}''',
diff --git a/test/unit/test_policy_deciding.py b/test/unit/test_policy_deciding.py
new file mode 100644
index 0000000..a360537
--- /dev/null
+++ b/test/unit/test_policy_deciding.py
@@ -0,0 +1,121 @@
+# SPDX-License-Identifier: CC0-1.0
+
+"""
+Haketilo unit tests - determining what to do on a given web page
+"""
+
+# This file is part of Haketilo
+#
+# Copyright (C) 2021, Wojtek Kosior <koszko@koszko.org>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the CC0 1.0 Universal License as published by
+# the Creative Commons Corporation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# CC0 1.0 Universal License for more details.
+
+import re
+from hashlib import sha256
+import pytest
+
+from ..script_loader import load_script
+
+csp_re = re.compile(r'^\S+\s+\S+;(?:\s+\S+\s+\S+;)*$')
+rule_re = re.compile(r'^\s*(?P<src_kind>\S+)\s+(?P<allowed_origins>\S+)$')
+def parse_csp(csp):
+ '''
+ Parsing of CSP string into a dict. A simplified format of CSP is assumed.
+ '''
+ assert csp_re.match(csp)
+
+ result = {}
+
+ for rule in csp.split(';')[:-1]:
+ match = rule_re.match(rule)
+ result[match.group('src_kind')] = match.group('allowed_origins')
+
+ return result
+
+@pytest.mark.get_page('https://gotmyowndoma.in')
+def test_decide_policy(execute_in_page):
+ """
+ policy.js contains code that, using a Pattern Query Tree instance and a URL,
+ decides what Haketilo should do on a page opened at that URL, i.e. whether
+ it should block or allow script execution and whether it should inject its
+ own scripts and which ones. Test that the policy object gets constructed
+ properly.
+ """
+ execute_in_page(load_script('common/policy.js'))
+
+ policy = execute_in_page(
+ '''
+ returnval(decide_policy(pqt.make(), "http://unkno.wn/", true, "abcd"));
+ ''')
+ assert policy['allow'] == True
+ for prop in ('mapping', 'payload', 'nonce', 'csp'):
+ assert prop not in policy
+
+ policy = execute_in_page(
+ '''{
+ const tree = pqt.make();
+ pqt.register(tree, "http://kno.wn", "allowed", {allow: true});
+ returnval(decide_policy(tree, "http://kno.wn/", false, "abcd"));
+ }''')
+ assert policy['allow'] == True
+ assert policy['mapping'] == 'allowed'
+ for prop in ('payload', 'nonce', 'csp'):
+ assert prop not in policy
+
+ policy = execute_in_page(
+ '''
+ returnval(decide_policy(pqt.make(), "http://unkno.wn/", false, "abcd"));
+ '''
+ )
+ assert policy['allow'] == False
+ for prop in ('mapping', 'payload', 'nonce'):
+ assert prop not in policy
+ assert parse_csp(policy['csp']) == {
+ 'prefetch-src': "'none'",
+ 'script-src-attr': "'none'",
+ 'script-src': "'none'",
+ 'script-src-elem': "'none'"
+ }
+
+ policy = execute_in_page(
+ '''{
+ const tree = pqt.make();
+ pqt.register(tree, "http://kno.wn", "disallowed", {allow: false});
+ returnval(decide_policy(tree, "http://kno.wn/", true, "abcd"));
+ }''')
+ assert policy['allow'] == False
+ assert policy['mapping'] == 'disallowed'
+ for prop in ('payload', 'nonce'):
+ assert prop not in policy
+ assert parse_csp(policy['csp']) == {
+ 'prefetch-src': "'none'",
+ 'script-src-attr': "'none'",
+ 'script-src': "'none'",
+ 'script-src-elem': "'none'"
+ }
+
+ policy = execute_in_page(
+ '''{
+ const tree = pqt.make();
+ pqt.register(tree, "http://kno.wn", "m1", {identifier: "res1"});
+ returnval(decide_policy(tree, "http://kno.wn/", true, "abcd"));
+ }''')
+ assert policy['allow'] == False
+ assert policy['mapping'] == 'm1'
+ assert policy['payload'] == {'identifier': 'res1'}
+
+ assert policy['nonce'] == \
+ sha256('m1:res1:http://kno.wn/:abcd'.encode()).digest().hex()
+ assert parse_csp(policy['csp']) == {
+ 'prefetch-src': f"'none'",
+ 'script-src-attr': f"'none'",
+ 'script-src': f"'nonce-{policy['nonce']}'",
+ 'script-src-elem': f"'nonce-{policy['nonce']}'"
+ }
diff --git a/test/unit/test_webrequest.py b/test/unit/test_webrequest.py
new file mode 100644
index 0000000..6af2758
--- /dev/null
+++ b/test/unit/test_webrequest.py
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: CC0-1.0
+
+"""
+Haketilo unit tests - modifying requests using webRequest API
+"""
+
+# This file is part of Haketilo
+#
+# Copyright (C) 2021, Wojtek Kosior <koszko@koszko.org>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the CC0 1.0 Universal License as published by
+# the Creative Commons Corporation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# CC0 1.0 Universal License for more details.
+
+import re
+from hashlib import sha256
+import pytest
+
+from ..script_loader import load_script
+
+def webrequest_js():
+ return (load_script('background/webrequest.js',
+ '#IMPORT common/patterns_query_tree.js AS pqt') +
+ ''';
+ // Mock pattern tree.
+ tree = pqt.make();
+ pqt.register(tree, "https://site.with.scripts.block.ed/***",
+ "disallowed", {allow: false});
+ pqt.register(tree, "https://site.with.paylo.ad/***",
+ "somemapping", {identifier: "someresource"});
+
+ // Mock IndexedDB.
+ haketilodb.track.settings =
+ () => [{}, [{name: "default_allow", value: true}]];
+
+ // Mock stream_filter.
+ stream_filter.apply = (details, headers, policy) => headers;
+
+ // Mock secret and start webrequest operations.
+ start("somesecret");
+ ''')
+
+def are_scripts_allowed(driver, nonce=None):
+ return driver.execute_script(
+ '''
+ document.scripts_allowed = false;
+ const script = document.createElement("script");
+ script.innerHTML = "document.scripts_allowed = true;";
+ if (arguments[0])
+ script.setAttribute("nonce", arguments[0]);
+ document.head.append(script);
+ return document.scripts_allowed;
+ ''',
+ nonce)
+
+@pytest.mark.ext_data({'background_script': webrequest_js})
+@pytest.mark.usefixtures('webextension')
+def test_on_headers_received(driver, execute_in_page):
+ for attempt in range(10):
+ driver.get('https://site.with.scripts.block.ed/')
+
+ if not are_scripts_allowed(driver):
+ break
+ assert attempt != 9
+
+ driver.get('https://site.with.scripts.allow.ed/')
+ assert are_scripts_allowed(driver)
+
+ driver.get('https://site.with.paylo.ad/')
+ assert not are_scripts_allowed(driver)
+ source = 'somemapping:someresource:https://site.with.paylo.ad/index.html:somesecret'
+ assert are_scripts_allowed(driver, sha256(source.encode()).digest().hex())
diff --git a/test/world_wide_library.py b/test/world_wide_library.py
index 860c987..43d3512 100644
--- a/test/world_wide_library.py
+++ b/test/world_wide_library.py
@@ -27,13 +27,99 @@ Our helpful little stand-in for the Internet
# file's license. Although I request that you do not make use this code
# in a proprietary program, I am not going to enforce this in court.
+from hashlib import sha256
+from pathlib import Path
+from shutil import rmtree
+from threading import Lock
+
from .misc_constants import here
+served_scripts = {}
+served_scripts_lock = Lock()
+
+def start_serving_script(script_text):
+ """
+ Register given script so that it is served at
+ https://serve.scrip.ts/?sha256=<script's_sha256_sum>
+
+ Returns the URL at which script will be served.
+
+ This function lacks thread safety. Might moght consider fixing this if it
+ turns
+ """
+ sha256sum = sha256(script_text.encode()).digest().hex()
+ served_scripts_lock.acquire()
+ served_scripts[sha256sum] = script_text
+ served_scripts_lock.release()
+
+ return f'https://serve.scrip.ts/?sha256={sha256sum}'
+
+def serve_script(command, get_params, post_params):
+ """
+ info() callback to pass to request-handling code in server.py. Facilitates
+ serving scripts that have been registered with start_serving_script().
+ """
+ served_scripts_lock.acquire()
+ try:
+ script = served_scripts.get(get_params['sha256'][0])
+ finally:
+ served_scripts_lock.release()
+ if script is None:
+ return 404, {}, b''
+
+ return 200, {'Content-Type': 'application/javascript'}, script
+
+def dump_scripts(directory='./injected_scripts'):
+ """
+ Write all scripts that have been registered with start_serving_script()
+ under the provided directory. If the directory already exists, it is wiped
+ beforehand. If it doesn't exist, it is created.
+ """
+ directory = Path(directory)
+ rmtree(directory, ignore_errors=True)
+ directory.mkdir(parents=True)
+
+ served_scripts_lock.acquire()
+ for sha256, script in served_scripts.items():
+ with open(directory / sha256, 'wt') as file:
+ file.write(script)
+ served_scripts_lock.release()
+
catalog = {
- 'http://gotmyowndoma.in': (302, {'location': 'http://gotmyowndoma.in/index.html'}, None),
- 'http://gotmyowndoma.in/': (302, {'location': 'http://gotmyowndoma.in/index.html'}, None),
- 'http://gotmyowndoma.in/index.html': (200, {}, here / 'data' / 'pages' / 'gotmyowndomain.html'),
- 'https://gotmyowndoma.in': (302, {'location': 'https://gotmyowndoma.in/index.html'}, None),
- 'https://gotmyowndoma.in/': (302, {'location': 'https://gotmyowndoma.in/index.html'}, None),
- 'https://gotmyowndoma.in/index.html': (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html')
+ 'http://gotmyowndoma.in':
+ (302, {'location': 'http://gotmyowndoma.in/index.html'}, None),
+ 'http://gotmyowndoma.in/':
+ (302, {'location': 'http://gotmyowndoma.in/index.html'}, None),
+ 'http://gotmyowndoma.in/index.html':
+ (200, {}, here / 'data' / 'pages' / 'gotmyowndomain.html'),
+
+ 'https://gotmyowndoma.in':
+ (302, {'location': 'https://gotmyowndoma.in/index.html'}, None),
+ 'https://gotmyowndoma.in/':
+ (302, {'location': 'https://gotmyowndoma.in/index.html'}, None),
+ 'https://gotmyowndoma.in/index.html':
+ (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'),
+
+ 'https://serve.scrip.ts/': serve_script,
+
+ 'https://site.with.scripts.block.ed':
+ (302, {'location': 'https://site.with.scripts.block.ed/index.html'}, None),
+ 'https://site.with.scripts.block.ed/':
+ (302, {'location': 'https://site.with.scripts.block.ed/index.html'}, None),
+ 'https://site.with.scripts.block.ed/index.html':
+ (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'),
+
+ 'https://site.with.scripts.allow.ed':
+ (302, {'location': 'https://site.with.scripts.allow.ed/index.html'}, None),
+ 'https://site.with.scripts.allow.ed/':
+ (302, {'location': 'https://site.with.scripts.allow.ed/index.html'}, None),
+ 'https://site.with.scripts.allow.ed/index.html':
+ (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'),
+
+ 'https://site.with.paylo.ad':
+ (302, {'location': 'https://site.with.paylo.ad/index.html'}, None),
+ 'https://site.with.paylo.ad/':
+ (302, {'location': 'https://site.with.paylo.ad/index.html'}, None),
+ 'https://site.with.paylo.ad/index.html':
+ (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html')
}