summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWojtek Kosior <koszko@koszko.org>2021-12-31 14:23:28 +0100
committerWojtek Kosior <koszko@koszko.org>2021-12-31 14:23:28 +0100
commit702eefd252a112375c2da6a9ae4b39915fc2dbf4 (patch)
tree479158ba4f29e12cfb1eb9240b16d4f5d00df492
parent01e977f922ea29cd2994f96c18e4b3f033b1802d (diff)
downloadbrowser-extension-702eefd252a112375c2da6a9ae4b39915fc2dbf4.tar.gz
browser-extension-702eefd252a112375c2da6a9ae4b39915fc2dbf4.zip
utilize Pattern Tree to decide the policy to use and modify HTTP response headers according to that policy
This commit also enhances the build script so that preprocessor conditionals can now use operators '&&' and '||'. The features being developed are not yet included in the actual Haketilo build. Some of the new source files contain similar functionality to other ones already existing in the source tree. At some point the latter will be removed.
-rw-r--r--Makefile.in1
-rw-r--r--background/patterns_query_manager.js27
-rw-r--r--background/policy_injector.js12
-rw-r--r--background/webrequest.js189
-rw-r--r--common/indexeddb.js66
-rw-r--r--common/misc.js11
-rw-r--r--common/patterns_query_tree.js2
-rw-r--r--common/policy.js106
-rwxr-xr-xcompute_scripts.awk42
-rw-r--r--content/main.js12
-rw-r--r--manifest.json4
-rw-r--r--test/extension_crafting.py1
-rwxr-xr-xtest/profiles.py30
-rw-r--r--test/script_loader.py2
-rw-r--r--test/unit/conftest.py73
-rw-r--r--test/unit/test_indexeddb.py193
-rw-r--r--test/unit/test_patterns_query_manager.py39
-rw-r--r--test/unit/test_policy_deciding.py121
-rw-r--r--test/unit/test_webrequest.py77
-rw-r--r--test/world_wide_library.py98
20 files changed, 903 insertions, 203 deletions
diff --git a/Makefile.in b/Makefile.in
index 5291299..bf0fdec 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -83,6 +83,7 @@ clean mostlyclean:
rm -rf test/certs
rm -rf $$(find . -name geckodriver.log)
rm -rf $$(find . -type d -name __pycache__)
+ rm -rf $$(find . -type d -name injected_scripts)
distclean: clean
rm -f Makefile config.status record.conf
diff --git a/background/patterns_query_manager.js b/background/patterns_query_manager.js
index cb14cb1..e364668 100644
--- a/background/patterns_query_manager.js
+++ b/background/patterns_query_manager.js
@@ -45,13 +45,18 @@
#IMPORT common/patterns_query_tree.js AS pqt
#IMPORT common/indexeddb.js AS haketilodb
+#IF MOZILLA || MV3
#FROM common/browser.js IMPORT browser
+#ENDIF
+
+let secret;
const tree = pqt.make();
#EXPORT tree
const current_mappings = new Map();
+#IF MOZILLA || MV3
let registered_script = null;
let script_update_occuring = false;
let script_update_needed;
@@ -67,6 +72,7 @@ async function update_content_script()
script_update_needed = false;
const code = `\
+this.haketilo_secret = ${secret};
this.haketilo_pattern_tree = ${JSON.stringify(tree)};
if (this.haketilo_content_script_main)
haketilo_content_script_main();`;
@@ -89,36 +95,43 @@ if (this.haketilo_content_script_main)
function register_mapping(mapping)
{
- for (const pattern in mapping.payloads)
- pqt.register(tree, pattern, mapping.identifier, mapping);
+ for (const [pattern, resource] of Object.entries(mapping.payloads))
+ pqt.register(tree, pattern, mapping.identifier, resource);
current_mappings.set(mapping.identifier, mapping);
}
+#ENDIF
function mapping_changed(change)
{
console.log('mapping changes!', arguments);
- const old_version = current_mappings.get(change.identifier);
+ const old_version = current_mappings.get(change.key);
if (old_version !== undefined) {
for (const pattern in old_version.payloads)
- pqt.deregister(tree, pattern, change.identifier);
+ pqt.deregister(tree, pattern, change.key);
- current_mappings.delete(change.identifier);
+ current_mappings.delete(change.key);
}
if (change.new_val !== undefined)
register_mapping(change.new_val);
+#IF MOZILLA || MV3
script_update_needed = true;
setTimeout(update_content_script, 0);
+#ENDIF
}
-async function start()
+async function start(secret_)
{
+ secret = secret_;
+
const [tracking, initial_mappings] =
- await haketilodb.track_mappings(mapping_changed);
+ await haketilodb.track.mappings(mapping_changed);
initial_mappings.forEach(register_mapping);
+#IF MOZILLA || MV3
script_update_needed = true;
await update_content_script();
+#ENDIF
}
#EXPORT start
diff --git a/background/policy_injector.js b/background/policy_injector.js
index 2544e8e..b1fc733 100644
--- a/background/policy_injector.js
+++ b/background/policy_injector.js
@@ -43,13 +43,23 @@
* proprietary program, I am not going to enforce this in court.
*/
-#FROM common/misc.js IMPORT make_csp_rule, csp_header_regex
+#FROM common/misc.js IMPORT csp_header_regex
/* Re-enable the import below once nonce stuff here is ready */
#IF NEVER
#FROM common/misc.js IMPORT gen_nonce
#ENDIF
+/* CSP rule that blocks scripts according to policy's needs. */
+function make_csp_rule(policy)
+{
+ let rule = "prefetch-src 'none'; script-src-attr 'none';";
+ const script_src = policy.nonce !== undefined ?
+ `'nonce-${policy.nonce}'` : "'none'";
+ rule += ` script-src ${script_src}; script-src-elem ${script_src};`;
+ return rule;
+}
+
function inject_csp_headers(headers, policy)
{
let csp_headers;
diff --git a/background/webrequest.js b/background/webrequest.js
new file mode 100644
index 0000000..e32947a
--- /dev/null
+++ b/background/webrequest.js
@@ -0,0 +1,189 @@
+/**
+ * This file is part of Haketilo.
+ *
+ * Function: Modify HTTP traffic usng webRequest API.
+ *
+ * Copyright (C) 2021 Wojtek Kosior <koszko@koszko.org>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * As additional permission under GNU GPL version 3 section 7, you
+ * may distribute forms of that code without the copy of the GNU
+ * GPL normally required by section 4, provided you include this
+ * license notice and, in case of non-source distribution, a URL
+ * through which recipients can access the Corresponding Source.
+ * If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not
+ * obligated to do so. If you do not wish to do so, delete this
+ * exception statement from your version.
+ *
+ * As a special exception to the GPL, any HTML file which merely
+ * makes function calls to this code, and for that purpose
+ * includes it by reference shall be deemed a separate work for
+ * copyright law purposes. If you modify this code, you may extend
+ * this exception to your version of the code, but you are not
+ * obligated to do so. If you do not wish to do so, delete this
+ * exception statement from your version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * I, Wojtek Kosior, thereby promise not to sue for violation of this file's
+ * license. Although I request that you do not make use this code in a
+ * proprietary program, I am not going to enforce this in court.
+ */
+
+#IMPORT common/indexeddb.js AS haketilodb
+#IF MOZILLA
+#IMPORT background/stream_filter.js
+#ENDIF
+
+#FROM common/browser.js IMPORT browser
+#FROM common/misc.js IMPORT is_privileged_url, csp_header_regex
+#FROM common/policy.js IMPORT decide_policy
+
+#FROM background/patterns_query_manager.js IMPORT tree
+
+let secret;
+
+let default_allow = false;
+
+async function track_default_allow()
+{
+ const set_val = ch => default_allow = (ch.new_val || {}).value;
+ const [tracking, settings] = await haketilodb.track.settings(set_val);
+ for (const setting of settings) {
+ if (setting.name === "default_allow")
+ default_allow = setting.value;
+ }
+}
+
+function on_headers_received(details)
+{
+ const url = details.url;
+ if (is_privileged_url(details.url))
+ return;
+
+ let headers = details.responseHeaders;
+
+ const policy = decide_policy(tree, details.url, default_allow, secret);
+ if (policy.allow)
+ return;
+
+ if (policy.payload)
+ headers = headers.filter(h => !csp_header_regex.test(h.name));
+
+ headers.push({name: "Content-Security-Policy", value: policy.csp});
+
+#IF MOZILLA
+ let skip = false;
+ for (const header of headers) {
+ if (header.name.toLowerCase().trim() !== "content-disposition")
+ continue;
+
+ if (/^\s*attachment\s*(;.*)$/i.test(header.value)) {
+ skip = true;
+ } else {
+ skip = false;
+ break;
+ }
+ }
+ skip = skip || (details.statusCode >= 300 && details.statusCode < 400);
+
+ if (!skip)
+ headers = stream_filter.apply(details, headers, policy);
+#ENDIF
+
+ return {responseHeaders: headers};
+}
+
+#IF CHROMIUM && MV2
+const request_url_regex = /^[^?]*\?url=(.*)$/;
+const redirect_url_template = browser.runtime.getURL("dummy") + "?settings=";
+
+function on_before_request(details)
+{
+ /*
+ * Content script will make a synchronous XmlHttpRequest to extension's
+ * `dummy` file to query settings for given URL. We smuggle that
+ * information in query parameter of the URL we redirect to.
+ * A risk of fingerprinting arises if a page with script execution allowed
+ * guesses the dummy file URL and makes an AJAX call to it. It is currently
+ * a problem in ManifestV2 Chromium-family port of Haketilo because Chromium
+ * uses predictable URLs for web-accessible resources. We plan to fix it in
+ * the future ManifestV3 port.
+ */
+ if (details.type !== "xmlhttprequest")
+ return {cancel: true};
+
+#IF DEBUG
+ console.debug(`Settings queried using XHR for '${details.url}'.`);
+#ENDIF
+
+ /*
+ * request_url should be of the following format:
+ * <url_for_extension's_dummy_file>?url=<valid_urlencoded_url>
+ */
+ const match = request_url_regex.exec(details.url);
+ if (match) {
+ const queried_url = decodeURIComponent(match[1]);
+
+ if (details.initiator && !queried_url.startsWith(details.initiator)) {
+ console.warn(`Blocked suspicious query of '${url}' by '${details.initiator}'. This might be the result of page fingerprinting the browser.`);
+ return {cancel: true};
+ }
+
+ const policy = decide_policy(tree, details.url, default_allow, secret);
+ if (!policy.error) {
+ const encoded_policy = encodeURIComponent(JSON.stringify(policy));
+ return {redirectUrl: redirect_url_template + encoded_policy};
+ }
+ }
+
+ console.warn(`Bad request! Expected ${browser.runtime.getURL("dummy")}?url=<valid_urlencoded_url>. Got ${request_url}. This might be the result of page fingerprinting the browser.`);
+
+ return {cancel: true};
+}
+
+const all_types = [
+ "main_frame", "sub_frame", "stylesheet", "script", "image", "font",
+ "object", "xmlhttprequest", "ping", "csp_report", "media", "websocket",
+ "other", "main_frame", "sub_frame"
+];
+#ENDIF
+
+async function start(secret_)
+{
+ secret = secret_;
+
+#IF CHROMIUM
+ const extra_opts = ["blocking", "extraHeaders"];
+#ELSE
+ const extra_opts = ["blocking"];
+#ENDIF
+
+ browser.webRequest.onHeadersReceived.addListener(
+ on_headers_received,
+ {urls: ["<all_urls>"], types: ["main_frame", "sub_frame"]},
+ extra_opts.concat("responseHeaders")
+ );
+
+#IF CHROMIUM && MV2
+ browser.webRequest.onBeforeRequest.addListener(
+ on_before_request,
+ {urls: [browser.runtime.getURL("dummy") + "*"], types: all_types},
+ extra_opts
+ );
+#ENDIF
+
+ await track_default_allow();
+}
+#EXPORT start
diff --git a/common/indexeddb.js b/common/indexeddb.js
index 096391a..e54d1ca 100644
--- a/common/indexeddb.js
+++ b/common/indexeddb.js
@@ -62,7 +62,8 @@ const stores = [
["files", {keyPath: "hash_key"}],
["file_uses", {keyPath: "hash_key"}],
["resources", {keyPath: "identifier"}],
- ["mappings", {keyPath: "identifier"}]
+ ["mappings", {keyPath: "identifier"}],
+ ["settings", {keyPath: "name"}]
];
let db = null;
@@ -207,7 +208,7 @@ async function incr_file_uses(context, file_ref, by=1)
const decr_file_uses = (ctx, file_ref) => incr_file_uses(ctx, file_ref, -1);
-async function finalize_items_transaction(context)
+async function finalize_transaction(context)
{
for (const uses of Object.values(context.file_uses)) {
if (uses.uses < 0)
@@ -248,7 +249,7 @@ async function finalize_items_transaction(context)
return context.result;
}
-#EXPORT finalize_items_transaction
+#EXPORT finalize_transaction
/*
* How a sample data argument to the function below might look like:
@@ -304,7 +305,7 @@ async function _save_items(resources, mappings, context)
for (const item of resources.concat(mappings))
await save_item(item, context);
- await finalize_items_transaction(context);
+ await finalize_transaction(context);
}
/*
@@ -314,9 +315,9 @@ async function _save_items(resources, mappings, context)
* object with keys being of the form `sha256-<file's-sha256-sum>`.
*
* context should be one returned from start_items_transaction() and should be
- * later passed to finalize_items_transaction() so that files depended on are
- * added to IndexedDB and files that are no longer depended on after this
- * operation are removed from IndexedDB.
+ * later passed to finalize_transaction() so that files depended on are added to
+ * IndexedDB and files that are no longer depended on after this operation are
+ * removed from IndexedDB.
*/
async function save_item(item, context)
{
@@ -346,9 +347,9 @@ async function _remove_item(store_name, identifier, context)
* Remove definition of a resource/mapping from IndexedDB.
*
* context should be one returned from start_items_transaction() and should be
- * later passed to finalize_items_transaction() so that files depended on are
- * added to IndexedDB and files that are no longer depended on after this
- * operation are removed from IndexedDB.
+ * later passed to finalize_transaction() so that files depended on are added to
+ * IndexedDB and files that are no longer depended on after this operation are
+ * removed from IndexedDB.
*/
async function remove_item(store_name, identifier, context)
{
@@ -363,26 +364,49 @@ const remove_resource = (id, ctx) => remove_item("resources", id, ctx);
const remove_mapping = (id, ctx) => remove_item("mappings", id, ctx);
#EXPORT remove_mapping
+/* A simplified kind of transaction for modifying just the "settings" store. */
+async function start_settings_transaction()
+{
+ const db = await get_db();
+ return make_context(db.transaction("settings", "readwrite"), {});
+}
+
+async function set_setting(name, value)
+{
+ const context = await start_settings_transaction();
+ broadcast.prepare(context.sender, `idb_changes_settings`, name);
+ await idb_put(context.transaction, "settings", {name, value});
+ return finalize_transaction(context);
+}
+#EXPORT set_setting
+
+async function get_setting(name)
+{
+ const transaction = (await get_db()).transaction("settings");
+ return ((await idb_get(transaction, "settings", name)) || {}).value;
+}
+#EXPORT get_setting
+
/* Callback used when listening to broadcasts while tracking db changes. */
-async function track_change(tracking, identifier)
+async function track_change(tracking, key)
{
const transaction = (await get_db()).transaction([tracking.store_name]);
- const new_val = await idb_get(transaction, tracking.store_name, identifier);
+ const new_val = await idb_get(transaction, tracking.store_name, key);
- tracking.onchange({identifier, new_val});
+ tracking.onchange({key, new_val});
}
/*
* Monitor changes to `store_name` IndexedDB object store.
*
- * `store_name` should be either "resources" or "mappings".
+ * `store_name` should be either "resources", "mappings" or "settings".
*
* `onchange` should be a callback that will be called when an item is added,
* modified or removed from the store. The callback will be passed an object
* representing the change as its first argument. This object will have the
* form:
* {
- * identifier: "the identifier of modified resource/mapping",
+ * key: "the identifier of modified resource/mapping or settings key",
* new_val: undefined // `undefined` if item removed, item object otherwise
* }
*
@@ -395,7 +419,7 @@ async function track_change(tracking, identifier)
* actually modified or that it only gets called once after multiple quick
* changes to an item.
*/
-async function track(store_name, onchange)
+async function start_tracking(store_name, onchange)
{
const tracking = {store_name, onchange};
tracking.listener =
@@ -408,12 +432,10 @@ async function track(store_name, onchange)
return [tracking, (await wait_request(all_req)).target.result];
}
-const track_resources = onchange => track("resources", onchange);
-#EXPORT track_resources
-
-const track_mappings = onchange => track("mappings", onchange);
-#EXPORT track_mappings
+const track = {};
+for (const store_name of ["resources", "mappings", "settings"])
+ track[store_name] = onchange => start_tracking(store_name, onchange);
+#EXPORT track
const untrack = tracking => broadcast.close(tracking.listener);
#EXPORT untrack
-
diff --git a/common/misc.js b/common/misc.js
index dc4a598..82f6cbf 100644
--- a/common/misc.js
+++ b/common/misc.js
@@ -67,17 +67,6 @@ function gen_nonce(length=16)
}
#EXPORT gen_nonce
-/* CSP rule that blocks scripts according to policy's needs. */
-function make_csp_rule(policy)
-{
- let rule = "prefetch-src 'none'; script-src-attr 'none';";
- const script_src = policy.nonce !== undefined ?
- `'nonce-${policy.nonce}'` : "'none'";
- rule += ` script-src ${script_src}; script-src-elem ${script_src};`;
- return rule;
-}
-#EXPORT make_csp_rule
-
/* Check if some HTTP header might define CSP rules. */
const csp_header_regex =
/^\s*(content-security-policy|x-webkit-csp|x-content-security-policy)/i;
diff --git a/common/patterns_query_tree.js b/common/patterns_query_tree.js
index 1bbdb39..f8ec405 100644
--- a/common/patterns_query_tree.js
+++ b/common/patterns_query_tree.js
@@ -41,6 +41,8 @@
* proprietary program, I am not going to enforce this in court.
*/
+// TODO! Modify the code to use `Object.create(null)` instead of `{}`.
+
#FROM common/patterns.js IMPORT deconstruct_url
/* "Pattern Tree" is how we refer to the data structure used for querying
diff --git a/common/policy.js b/common/policy.js
new file mode 100644
index 0000000..ebd663f
--- /dev/null
+++ b/common/policy.js
@@ -0,0 +1,106 @@
+/**
+ * This file is part of Haketilo.
+ *
+ * Function: Determining what to do on a given web page.
+ *
+ * Copyright (C) 2021 Wojtek Kosior
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * As additional permission under GNU GPL version 3 section 7, you
+ * may distribute forms of that code without the copy of the GNU
+ * GPL normally required by section 4, provided you include this
+ * license notice and, in case of non-source distribution, a URL
+ * through which recipients can access the Corresponding Source.
+ * If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not
+ * obligated to do so. If you do not wish to do so, delete this
+ * exception statement from your version.
+ *
+ * As a special exception to the GPL, any HTML file which merely
+ * makes function calls to this code, and for that purpose
+ * includes it by reference shall be deemed a separate work for
+ * copyright law purposes. If you modify this code, you may extend
+ * this exception to your version of the code, but you are not
+ * obligated to do so. If you do not wish to do so, delete this
+ * exception statement from your version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * I, Wojtek Kosior, thereby promise not to sue for violation of this file's
+ * license. Although I request that you do not make use this code in a
+ * proprietary program, I am not going to enforce this in court.
+ */
+
+#IMPORT common/patterns_query_tree.js AS pqt
+
+#FROM common/sha256.js IMPORT sha256
+
+/*
+ * CSP rule that either blocks all scripts or only allows scripts with specified
+ * nonce attached.
+ */
+function make_csp(nonce)
+{
+ const rule = nonce ? `nonce-${nonce}` : "none";
+ const csp_dict = {"prefetch-src": "none", "script-src-attr": "none"};
+ Object.assign(csp_dict, {"script-src": rule, "script-src-elem": rule});
+ return Object.entries(csp_dict).map(([a, b]) => `${a} '${b}';`).join(" ");
+}
+
+function decide_policy(patterns_tree, url, default_allow, secret)
+{
+ const policy = {allow: default_allow};
+
+ try {
+ var payloads = pqt.search(patterns_tree, url).next().value;
+ } catch (e) {
+ console.error(e);
+ policy.allow = false;
+ policy.error = true;
+ }
+
+ if (payloads !== undefined) {
+ policy.mapping = Object.keys(payloads).sort()[0];
+ const payload = payloads[policy.mapping];
+ if (payload.allow !== undefined) {
+ policy.allow = payload.allow;
+ } else /* if (payload.identifier) */ {
+ policy.allow = false;
+ policy.payload = payload;
+ /*
+ * Hash a secret and other values into a string that's unpredictable
+ * to someone who does not know these values. What we produce here
+ * is not a true "nonce" because it might get produced multiple
+ * times given the same url and mapping choice. Nevertheless, this
+ * is reasonably good given the limitations WebExtension APIs and
+ * environments give us. If we were using a true nonce, we'd have no
+ * reliable way of passing it to our content scripts.
+ */
+ const nonce_source = [
+ policy.mapping,
+ policy.payload.identifier,
+ url,
+ secret
+ ];
+ policy.nonce = sha256(nonce_source.join(":"));
+ }
+ }
+
+ if (!policy.allow)
+ policy.csp = make_csp(policy.nonce);
+
+ return policy;
+}
+#EXPORT decide_policy
+
+#EXPORT () => ({allow: false, csp: make_csp()}) AS fallback_policy
diff --git a/compute_scripts.awk b/compute_scripts.awk
index b778934..e17d12c 100755
--- a/compute_scripts.awk
+++ b/compute_scripts.awk
@@ -28,7 +28,12 @@ BEGIN {
path_ext_re = "(\\.[-_.a-zA-Z0-9]*)?"
path_re = "^" path_dir_re identifier_re path_ext_re "$"
- directive_args_patterns["IF"] = "^(NOT[[:space:]]+)?" identifier_re "$"
+ if_clause_re = "!?" identifier_re
+ if_AND_re = "([[:space:]]+&&[[:space:]]+" if_clause_re ")*"
+ if_OR_re = "([[:space:]]+[|][|][[:space:]]+" if_clause_re ")*"
+
+ directive_args_patterns["IF"] = ("^" if_clause_re \
+ "(" if_AND_re "|" if_OR_re ")$")
directive_args_patterns["ENDIF"] = "^$"
directive_args_patterns["ELSE"] = "^$"
directive_args_patterns["ELIF"] = "^(NOT[[:space:]]+)?" identifier_re "$"
@@ -215,8 +220,7 @@ function process_file(path, read_path, mode,
if (directive == "IF") {
if (if_nesting_true == if_nesting) {
- if ((last_token(directive_args) in defines) == \
- (directive_args ~ /^[^[:space:]]+$/))
+ if (if_condition_true(directive_args))
if_nesting_true++
else
if_branch_processed = false
@@ -255,8 +259,7 @@ function process_file(path, read_path, mode,
}
if (if_nesting == if_nesting_true + 1 && !if_branch_processed &&
- (last_token(directive_args) in defines) == \
- (directive_args ~ /^[^[:space:]]+$/)) {
+ if_condition_true(directive_args)) {
if_nesting_true++
} else if (if_nesting == if_nesting_true) {
if_branch_processed = true
@@ -323,6 +326,35 @@ function process_file(path, read_path, mode,
delete reading[read_path]
}
+function if_condition_true(directive_args,
+ result, bool, first_iter, word, negated, alt) {
+ first_iter = true
+
+ while (directive_args) {
+ word = first_token(directive_args)
+ sub(/^[^[:space:]]+[[:space:]]*/, "", directive_args)
+ alt = alt || directive_args ~ /^[|][|]/
+ sub(/^[^[:space:]]+[[:space:]]*/, "", directive_args)
+
+ negated = word ~ /^!/
+ sub(/^!/, "", word)
+ bool = (word in defines) != negated
+
+ if (first_iter) {
+ result = bool
+ first_iter = false
+ continue
+ }
+
+ if (alt)
+ result = result || bool
+ else # if (directive_args ~ /^AND/)
+ result = result && bool
+ }
+
+ return result
+}
+
function include_file(root_path, read_path, included_path, line, verbatim,
read_line, result) {
if (validate_path(read_path, included_path, line))
diff --git a/content/main.js b/content/main.js
index 9e98635..d97747f 100644
--- a/content/main.js
+++ b/content/main.js
@@ -46,9 +46,19 @@
#FROM content/page_actions.js IMPORT handle_page_actions
#FROM common/misc.js IMPORT gen_nonce, is_privileged_url, \
- make_csp_rule, csp_header_regex
+ csp_header_regex
#FROM common/browser.js IMPORT browser
+/* CSP rule that blocks scripts according to policy's needs. */
+function make_csp_rule(policy)
+{
+ let rule = "prefetch-src 'none'; script-src-attr 'none';";
+ const script_src = policy.nonce !== undefined ?
+ `'nonce-${policy.nonce}'` : "'none'";
+ rule += ` script-src ${script_src}; script-src-elem ${script_src};`;
+ return rule;
+}
+
document.content_loaded = document.readyState === "complete";
const wait_loaded = e => e.content_loaded ? Promise.resolve() :
new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true}));
diff --git a/manifest.json b/manifest.json
index 7a9edd5..ec94c6e 100644
--- a/manifest.json
+++ b/manifest.json
@@ -11,11 +11,9 @@
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// CC0 1.0 Universal License for more details.
-#IF NOT MOZILLA
-#IF NOT CHROMIUM
+#IF !MOZILLA && !CHROMIUM
#ERROR Target browser not selected! Please define 'MOZILLA' or 'CHROMIUM'.
#ENDIF
-#ENDIF
{
#IF MV2
diff --git a/test/extension_crafting.py b/test/extension_crafting.py
index 9b985b3..df45d26 100644
--- a/test/extension_crafting.py
+++ b/test/extension_crafting.py
@@ -58,6 +58,7 @@ def manifest_template():
'<all_urls>',
'unlimitedStorage'
],
+ 'content_security_policy': "default-src 'self'; script-src 'self' https://serve.scrip.ts;",
'web_accessible_resources': ['testpage.html'],
'background': {
'persistent': True,
diff --git a/test/profiles.py b/test/profiles.py
index 795a0db..acdecb6 100755
--- a/test/profiles.py
+++ b/test/profiles.py
@@ -34,22 +34,9 @@ from .misc_constants import *
class HaketiloFirefox(webdriver.Firefox):
"""
- This wrapper class around selenium.webdriver.Firefox adds a `loaded_scripts`
- instance property that gets resetted to an empty array every time the
- `get()` method is called and also facilitates removing the temporary
- profile directory after Firefox quits.
+ This wrapper class around selenium.webdriver.Firefox facilitates removing
+ the temporary profile directory after Firefox quits.
"""
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
- self.reset_loaded_scripts()
-
- def reset_loaded_scripts(self):
- self.loaded_scripts = []
-
- def get(self, *args, **kwargs):
- self.reset_loaded_scripts()
- super().get(*args, **kwargs)
-
def quit(self, *args, **kwargs):
profile_path = self.firefox_profile.path
super().quit(*args, **kwargs)
@@ -71,8 +58,13 @@ def set_profile_proxy(profile, proxy_host, proxy_port):
profile.set_preference(f'network.proxy.backup.{proto}', '')
profile.set_preference(f'network.proxy.backup.{proto}_port', 0)
-def set_profile_console_logging(profile):
- profile.set_preference('devtools.console.stdout.content', True)
+def set_profile_csp_enabled(profile):
+ """
+ By default, Firefox Driver disables CSP. The extension we're testing uses
+ CSP extensively, so we use this function to prepare a Firefox profile that
+ has it enabled.
+ """
+ profile.set_preference('security.csp.enable', True)
# The function below seems not to work for extensions that are
# temporarily-installed in Firefox safe mode. Testing is needed to see if it
@@ -97,7 +89,7 @@ def firefox_safe_mode(firefox_binary=default_firefox_binary,
"""
profile = webdriver.FirefoxProfile()
set_profile_proxy(profile, proxy_host, proxy_port)
- set_profile_console_logging(profile)
+ set_profile_csp_enabled(profile)
options = Options()
options.add_argument('--safe-mode')
@@ -117,7 +109,7 @@ def firefox_with_profile(firefox_binary=default_firefox_binary,
"""
profile = webdriver.FirefoxProfile(profile_dir)
set_profile_proxy(profile, proxy_host, proxy_port)
- set_profile_console_logging(profile)
+ set_profile_csp_enabled(profile)
set_webextension_uuid(profile, default_haketilo_id)
return HaketiloFirefox(firefox_profile=profile,
diff --git a/test/script_loader.py b/test/script_loader.py
index f66f9ae..53de779 100644
--- a/test/script_loader.py
+++ b/test/script_loader.py
@@ -65,7 +65,7 @@ def load_script(path, code_to_add=None):
awk = subprocess.run(['awk', '-f', str(awk_script), '--', '-D', 'MOZILLA',
'-D', 'MV2', '-D', 'TEST', '-D', 'UNIT_TEST',
- '--output=amalgamate-js:' + key],
+ '-D', 'DEBUG', '--output=amalgamate-js:' + key],
stdout=subprocess.PIPE, cwd=script_root, check=True)
script = awk.stdout.decode()
script_cache[key] = script
diff --git a/test/unit/conftest.py b/test/unit/conftest.py
index f9a17f8..beffaf5 100644
--- a/test/unit/conftest.py
+++ b/test/unit/conftest.py
@@ -34,6 +34,7 @@ from selenium.webdriver.support import expected_conditions as EC
from ..profiles import firefox_safe_mode
from ..server import do_an_internet
from ..extension_crafting import make_extension
+from ..world_wide_library import start_serving_script, dump_scripts
@pytest.fixture(scope="package")
def proxy():
@@ -77,55 +78,55 @@ def webextension(driver, request):
driver.uninstall_addon(addon_id)
ext_path.unlink()
-script_injecting_script = '''\
+script_injector_script = '''\
/*
* Selenium by default executes scripts in some weird one-time context. We want
* separately-loaded scripts to be able to access global variables defined
* before, including those declared with `const` or `let`. To achieve that, we
- * run our scripts by injecting them into the page inside a <script> tag. We use
- * custom properties of the `window` object to communicate with injected code.
+ * run our scripts by injecting them into the page with a <script> tag that runs
+ * javascript served by our proxy. We use custom properties of the `window`
+ * object to communicate with injected code.
*/
-
-const script_elem = document.createElement('script');
-script_elem.textContent = arguments[0];
-
-delete window.haketilo_selenium_return_value;
-delete window.haketilo_selenium_exception;
-window.returnval = (val => window.haketilo_selenium_return_value = val);
-window.arguments = arguments[1];
-
-document.body.append(script_elem);
-
-/*
- * To ease debugging, we want this script to signal all exceptions from the
- * injectee.
- */
-try {
+const inject = async () => {
+ delete window.haketilo_selenium_return_value;
+ delete window.haketilo_selenium_exception;
+ window.returnval = val => window.haketilo_selenium_return_value = val;
+
+ const injectee = document.createElement('script');
+ injectee.src = arguments[0];
+ injectee.type = "application/javascript";
+ injectee.async = true;
+ const prom = new Promise(cb => injectee.onload = cb);
+
+ window.arguments = arguments[1];
+ document.body.append(injectee);
+
+ await prom;
+
+ /*
+ * To ease debugging, we want this script to signal all exceptions from the
+ * injectee.
+ */
if (window.haketilo_selenium_exception !== false)
- throw 'Error in injected script! Check your geckodriver.log!';
-} finally {
- script_elem.remove();
-}
+ throw ['haketilo_selenium_error',
+ 'Error in injected script! Check your geckodriver.log and ./injected_scripts/!'];
-return window.haketilo_selenium_return_value;
+ return window.haketilo_selenium_return_value;
+}
+return inject();
'''
def _execute_in_page_context(driver, script, args):
script = script + '\n;\nwindow.haketilo_selenium_exception = false;'
- driver.loaded_scripts.append(script)
+ script_url = start_serving_script(script)
+
try:
- return driver.execute_script(script_injecting_script, script, args)
+ result = driver.execute_script(script_injector_script, script_url, args)
+ if type(result) == list and result[0] == 'haketilo_selenium_error':
+ raise Exception(result[1])
+ return result
except Exception as e:
- import sys
-
- print("Scripts loaded since driver's last get() method call:",
- file=sys.stderr)
-
- for script in driver.loaded_scripts:
- lines = enumerate(script.split('\n'), 1)
- for err_info in [('===',), *lines]:
- print(*err_info, file=sys.stderr)
-
+ dump_scripts()
raise e from None
# Some fixtures here just define functions that operate on driver. We should
diff --git a/test/unit/test_indexeddb.py b/test/unit/test_indexeddb.py
index 476690c..df3df81 100644
--- a/test/unit/test_indexeddb.py
+++ b/test/unit/test_indexeddb.py
@@ -75,26 +75,9 @@ def make_sample_mapping():
def file_ref(file_name):
return {'file': file_name, 'hash_key': sample_files[file_name]['hash_key']}
-@pytest.mark.get_page('https://gotmyowndoma.in')
-def test_haketilodb_save_remove(execute_in_page):
- """
- indexeddb.js facilitates operating on Haketilo's internal database.
- Verify database operations work properly.
- """
- execute_in_page(indexeddb_js())
- # Mock some unwanted imports.
+def clear_indexeddb(execute_in_page):
execute_in_page(
'''{
- const broadcast_mock = {};
- const nop = () => {};
- for (const key in broadcast)
- broadcast_mock[key] = nop;
- broadcast = broadcast_mock;
- }''')
-
- # Start with no database.
- execute_in_page(
- '''
async function delete_db() {
if (db) {
db.close();
@@ -108,12 +91,13 @@ def test_haketilodb_save_remove(execute_in_page):
}
returnval(delete_db());
- '''
+ }'''
)
+def get_db_contents(execute_in_page):
# Facilitate retrieving all IndexedDB contents.
- execute_in_page(
- '''
+ return execute_in_page(
+ '''{
async function get_database_contents()
{
const db = await get_db();
@@ -130,20 +114,45 @@ def test_haketilodb_save_remove(execute_in_page):
store_names_reqs.forEach(([sn, req]) => result[sn] = req.result);
return result;
}
- ''')
+ returnval(get_database_contents());
+ }''')
+
+def mock_broadcast(execute_in_page):
+ execute_in_page(
+ '''{
+ const broadcast_mock = {};
+ const nop = () => {};
+ for (const key in broadcast)
+ broadcast_mock[key] = nop;
+ broadcast = broadcast_mock;
+ }''')
+
+@pytest.mark.get_page('https://gotmyowndoma.in')
+def test_haketilodb_item_modifications(driver, execute_in_page):
+ """
+ indexeddb.js facilitates operating on Haketilo's internal database.
+ Verify database operations on mappings/resources work properly.
+ """
+ execute_in_page(indexeddb_js())
+ mock_broadcast(execute_in_page)
+
+ # Start with no database.
+ clear_indexeddb(execute_in_page)
sample_item = make_sample_resource()
sample_item['source_copyright'][0]['extra_prop'] = True
- database_contents = execute_in_page(
+ execute_in_page(
'''{
const promise = start_items_transaction(["resources"], arguments[1])
.then(ctx => save_item(arguments[0], ctx).then(() => ctx))
- .then(finalize_items_transaction)
- .then(get_database_contents);
+ .then(finalize_transaction);
returnval(promise);
}''',
sample_item, sample_files_by_hash)
+
+ database_contents = get_db_contents(execute_in_page)
+
assert len(database_contents['files']) == 4
assert all([sample_files_by_hash[file['hash_key']] == file['contents']
for file in database_contents['files']])
@@ -162,31 +171,33 @@ def test_haketilodb_save_remove(execute_in_page):
sample_item['scripts'].append(file_ref('combined.js'))
incomplete_files = {**sample_files_by_hash}
incomplete_files.pop(sample_files['combined.js']['hash_key'])
- result = execute_in_page(
+ exception = execute_in_page(
'''{
- const promise = (async () => {
+ const args = arguments;
+ async function try_add_item()
+ {
const context =
- await start_items_transaction(["resources"], arguments[1]);
+ await start_items_transaction(["resources"], args[1]);
try {
- await save_item(arguments[0], context);
- await finalize_items_transaction(context);
- return {};
+ await save_item(args[0], context);
+ await finalize_transaction(context);
+ return;
} catch(e) {
- var exception = e;
+ return e;
}
-
- return {exception, db_contents: await get_database_contents()};
- })();
- returnval(promise);
+ }
+ returnval(try_add_item());
}''',
sample_item, incomplete_files)
- assert result
- assert 'file not present' in result['exception']
+ previous_database_contents = database_contents
+ database_contents = get_db_contents(execute_in_page)
+
+ assert 'file not present' in exception
for key, val in database_contents.items():
keyfun = lambda item: item.get('hash_key') or item['identifier']
- assert sorted(result['db_contents'][key], key=keyfun) \
- == sorted(val, key=keyfun)
+ assert sorted(previous_database_contents[key], key=keyfun) \
+ == sorted(val, key=keyfun)
# See if adding another item that partially uses first's files works OK.
sample_item = make_sample_mapping()
@@ -194,12 +205,13 @@ def test_haketilodb_save_remove(execute_in_page):
'''{
const promise = start_items_transaction(["mappings"], arguments[1])
.then(ctx => save_item(arguments[0], ctx).then(() => ctx))
- .then(finalize_items_transaction)
- .then(get_database_contents);
+ .then(finalize_transaction);
returnval(promise);
}''',
sample_item, sample_files_by_hash)
+ database_contents = get_db_contents(execute_in_page)
+
names = ['README.md', 'report.spdx', 'LICENSES/somelicense.txt', 'hello.js',
'bye.js']
sample_files_list = [sample_files[name] for name in names]
@@ -222,17 +234,18 @@ def test_haketilodb_save_remove(execute_in_page):
# Try removing the items to get an empty database again.
results = [None, None]
for i, item_type in enumerate(['resource', 'mapping']):
- results[i] = execute_in_page(
+ execute_in_page(
f'''{{
const remover = remove_{item_type};
const promise =
start_items_transaction(["{item_type}s"], {{}})
.then(ctx => remover('helloapple', ctx).then(() => ctx))
- .then(finalize_items_transaction)
- .then(get_database_contents);
+ .then(finalize_transaction);
returnval(promise);
}}''')
+ results[i] = get_db_contents(execute_in_page)
+
names = ['README.md', 'report.spdx']
sample_files_list = [sample_files[name] for name in names]
uses_list = [1, 1]
@@ -271,22 +284,48 @@ def test_haketilodb_save_remove(execute_in_page):
},
'files': sample_files_by_hash
}
- database_contents = execute_in_page(
- '''
- initial_data = arguments[0];
- returnval(delete_db().then(() => get_database_contents()));
- ''',
- initial_data)
+
+ clear_indexeddb(execute_in_page)
+ execute_in_page('initial_data = arguments[0];', initial_data)
+ database_contents = get_db_contents(execute_in_page)
+
assert database_contents['resources'] == [sample_resource]
assert database_contents['mappings'] == [sample_mapping]
+@pytest.mark.get_page('https://gotmyowndoma.in')
+def test_haketilodb_settings(driver, execute_in_page):
+ """
+ indexeddb.js facilitates operating on Haketilo's internal database.
+ Verify database assigning/retrieving values of simple "settings" works
+ properly.
+ """
+ execute_in_page(indexeddb_js())
+ mock_broadcast(execute_in_page)
+
+ # Start with no database.
+ clear_indexeddb(execute_in_page)
+
+ assert get_db_contents(execute_in_page)['settings'] == []
+
+ assert execute_in_page('returnval(get_setting("option15"));') == None
+
+ execute_in_page('returnval(set_setting("option15", "disable"));')
+ assert execute_in_page('returnval(get_setting("option15"));') == 'disable'
+
+ execute_in_page('returnval(set_setting("option15", "enable"));')
+ assert execute_in_page('returnval(get_setting("option15"));') == 'enable'
+
test_page_html = '''
<!DOCTYPE html>
<script src="/testpage.js"></script>
+<script>console.log("inline!")</script>
+<script nonce="123456789">console.log("inline nonce!")</script>
<h2>resources</h2>
<ul id="resources"></ul>
<h2>mappings</h2>
<ul id="mappings"></ul>
+<h2>settings</h2>
+<ul id="settings"></ul>
'''
@pytest.mark.ext_data({
@@ -328,15 +367,21 @@ def test_haketilodb_track(driver, execute_in_page, wait_elem_text):
}
for window in reversed(windows):
driver.switch_to.window(window)
- execute_in_page('initial_data = arguments[0];', initial_data)
-
- # See if track_*() functions properly return the already-existing items.
+ try :
+ driver.execute_script('console.log("uuuuuuu");')
+ execute_in_page('initial_data = arguments[0];', initial_data)
+ except:
+ from time import sleep
+ sleep(100000)
+ execute_in_page('returnval(set_setting("option15", "123"));')
+
+ # See if track.*() functions properly return the already-existing items.
execute_in_page(
'''
function update_item(store_name, change)
{
console.log('update', ...arguments);
- const elem_id = `${store_name}_${change.identifier}`;
+ const elem_id = `${store_name}_${change.key}`;
let elem = document.getElementById(elem_id);
elem = elem || document.createElement("li");
elem.id = elem_id;
@@ -348,35 +393,32 @@ def test_haketilodb_track(driver, execute_in_page, wait_elem_text):
let resource_tracking, resource_items, mapping_tracking, mapping_items;
- async function start_tracking()
+ async function start_reporting()
{
- const update_resource = change => update_item("resources", change);
- const update_mapping = change => update_item("mappings", change);
-
- [resource_tracking, resource_items] =
- await track_resources(update_resource);
- [mapping_tracking, mapping_items] =
- await track_mappings(update_mapping);
-
- for (const item of resource_items)
- update_resource({identifier: item.identifier, new_val: item});
- for (const item of mapping_items)
- update_mapping({identifier: item.identifier, new_val: item});
+ for (const store_name of ["resources", "mappings", "settings"]) {
+ [tracking, items] =
+ await track[store_name](ch => update_item(store_name, ch));
+ const prop = store_name === "settings" ? "name" : "identifier";
+ for (const item of items)
+ update_item(store_name, {key: item[prop], new_val: item});
+ }
}
- returnval(start_tracking());
+ returnval(start_reporting());
''')
item_counts = driver.execute_script(
'''
const childcount = id => document.getElementById(id).childElementCount;
- return ["resources", "mappings"].map(childcount);
+ return ["resources", "mappings", "settings"].map(childcount);
''')
- assert item_counts == [1, 1]
+ assert item_counts == [1, 1, 1]
resource_json = driver.find_element_by_id('resources_helloapple').text
mapping_json = driver.find_element_by_id('mappings_helloapple').text
+ setting_json = driver.find_element_by_id('settings_option15').text
assert json.loads(resource_json) == sample_resource
assert json.loads(mapping_json) == sample_mapping
+ assert json.loads(setting_json) == {'name': 'option15', 'value': '123'}
# See if item additions get tracked properly.
driver.switch_to.window(windows[1])
@@ -398,14 +440,17 @@ def test_haketilodb_track(driver, execute_in_page, wait_elem_text):
'files': sample_files_by_hash
}
execute_in_page('returnval(save_items(arguments[0]));', sample_data)
+ execute_in_page('returnval(set_setting("option22", "abc"));')
driver.switch_to.window(windows[0])
driver.implicitly_wait(10)
resource_json = driver.find_element_by_id('resources_helloapple-copy').text
mapping_json = driver.find_element_by_id('mappings_helloapple-copy').text
+ setting_json = driver.find_element_by_id('settings_option22').text
driver.implicitly_wait(0)
assert json.loads(resource_json) == sample_resource2
assert json.loads(mapping_json) == sample_mapping2
+ assert json.loads(setting_json) == {'name': 'option22', 'value': 'abc'}
# See if item deletions get tracked properly.
driver.switch_to.window(windows[1])
@@ -417,7 +462,8 @@ def test_haketilodb_track(driver, execute_in_page, wait_elem_text):
const ctx = await start_items_transaction(store_names, {});
await remove_resource("helloapple", ctx);
await remove_mapping("helloapple-copy", ctx);
- await finalize_items_transaction(ctx);
+ await finalize_transaction(ctx);
+ await set_setting("option22", null);
}
returnval(remove_items());
}''')
@@ -430,7 +476,8 @@ def test_haketilodb_track(driver, execute_in_page, wait_elem_text):
return False
except WebDriverException:
pass
- return True
+ option_text = driver.find_element_by_id('settings_option22').text
+ return json.loads(option_text)['value'] == None
driver.switch_to.window(windows[0])
WebDriverWait(driver, 10).until(condition_items_absent)
diff --git a/test/unit/test_patterns_query_manager.py b/test/unit/test_patterns_query_manager.py
index 8ae7c28..ae1f490 100644
--- a/test/unit/test_patterns_query_manager.py
+++ b/test/unit/test_patterns_query_manager.py
@@ -25,10 +25,9 @@ from selenium.webdriver.support.ui import WebDriverWait
from ..script_loader import load_script
def simple_sample_mapping(patterns, fruit):
- if type(patterns) is list:
- payloads = dict([(p, {'identifier': fruit}) for p in patterns])
- else:
- payloads = {patterns: {'identifier': fruit}}
+ if type(patterns) is not list:
+ patterns = [patterns]
+ payloads = dict([(p, {'identifier': f'{fruit}-{p}'}) for p in patterns])
return {
'source_copyright': [],
'type': 'mapping',
@@ -36,9 +35,13 @@ def simple_sample_mapping(patterns, fruit):
'payloads': payloads
}
-content_script_re = re.compile(r'this.haketilo_pattern_tree = (.*);')
+content_script_tree_re = re.compile(r'this.haketilo_pattern_tree = (.*);')
def extract_tree_data(content_script_text):
- return json.loads(content_script_re.search(content_script_text)[1])
+ return json.loads(content_script_tree_re.search(content_script_text)[1])
+
+content_script_mapping_re = re.compile(r'this.haketilo_mappings = (.*);')
+def extract_mappings_data(content_script_text):
+ return json.loads(content_script_mapping_re.search(content_script_text)[1])
# Fields that are not relevant for testing are omitted from these mapping
# definitions.
@@ -82,7 +85,7 @@ def test_pqm_tree_building(driver, execute_in_page):
return [{}, initial_mappings];
}
- haketilodb.track_mappings = track_mock;
+ haketilodb.track.mappings = track_mock;
let last_script;
let unregister_called = 0;
@@ -104,7 +107,10 @@ def test_pqm_tree_building(driver, execute_in_page):
tree, last_script, unregister_called]);
''',
'https://gotmyowndoma.in/index.html')
- assert found == dict([(m['identifier'], m) for m in sample_mappings[0:2]])
+ best_pattern = 'https://gotmyowndoma.in/index.html'
+ assert found == \
+ dict([(f'inject-{fruit}', {'identifier': f'{fruit}-{best_pattern}'})
+ for fruit in ('banana', 'orange')])
assert tree == extract_tree_data(content_script)
assert deregistrations == 0
@@ -114,12 +120,8 @@ def test_pqm_tree_building(driver, execute_in_page):
execute_in_page(
'''
- for (const mapping of arguments[0]) {
- mappingchange({
- identifier: mapping.identifier,
- new_val: mapping
- });
- }
+ for (const mapping of arguments[0])
+ mappingchange({key: mapping.identifier, new_val: mapping});
''',
sample_mappings[2:])
WebDriverWait(driver, 10).until(condition_mappings_added)
@@ -129,7 +131,8 @@ def test_pqm_tree_building(driver, execute_in_page):
def condition_odd_removed(driver):
last_script = execute_in_page('returnval(last_script);')
- return all([id not in last_script for id in odd])
+ return (all([id not in last_script for id in odd]) and
+ all([id in last_script for id in even]))
def condition_all_removed(driver):
content_script = execute_in_page('returnval(last_script);')
@@ -137,7 +140,7 @@ def test_pqm_tree_building(driver, execute_in_page):
execute_in_page(
'''
- arguments[0].forEach(identifier => mappingchange({identifier}));
+ arguments[0].forEach(identifier => mappingchange({key: identifier}));
''',
odd)
@@ -145,7 +148,7 @@ def test_pqm_tree_building(driver, execute_in_page):
execute_in_page(
'''
- arguments[0].forEach(identifier => mappingchange({identifier}));
+ arguments[0].forEach(identifier => mappingchange({key: identifier}));
''',
even)
@@ -224,7 +227,7 @@ def test_pqm_script_injection(driver, execute_in_page):
const ctx = await start_items_transaction(["mappings"], {});
for (const id of identifiers)
await remove_mapping(id, ctx);
- await finalize_items_transaction(ctx);
+ await finalize_transaction(ctx);
}
returnval(remove_items());
}''',
diff --git a/test/unit/test_policy_deciding.py b/test/unit/test_policy_deciding.py
new file mode 100644
index 0000000..a360537
--- /dev/null
+++ b/test/unit/test_policy_deciding.py
@@ -0,0 +1,121 @@
+# SPDX-License-Identifier: CC0-1.0
+
+"""
+Haketilo unit tests - determining what to do on a given web page
+"""
+
+# This file is part of Haketilo
+#
+# Copyright (C) 2021, Wojtek Kosior <koszko@koszko.org>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the CC0 1.0 Universal License as published by
+# the Creative Commons Corporation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# CC0 1.0 Universal License for more details.
+
+import re
+from hashlib import sha256
+import pytest
+
+from ..script_loader import load_script
+
+csp_re = re.compile(r'^\S+\s+\S+;(?:\s+\S+\s+\S+;)*$')
+rule_re = re.compile(r'^\s*(?P<src_kind>\S+)\s+(?P<allowed_origins>\S+)$')
+def parse_csp(csp):
+ '''
+ Parsing of CSP string into a dict. A simplified format of CSP is assumed.
+ '''
+ assert csp_re.match(csp)
+
+ result = {}
+
+ for rule in csp.split(';')[:-1]:
+ match = rule_re.match(rule)
+ result[match.group('src_kind')] = match.group('allowed_origins')
+
+ return result
+
+@pytest.mark.get_page('https://gotmyowndoma.in')
+def test_decide_policy(execute_in_page):
+ """
+ policy.js contains code that, using a Pattern Query Tree instance and a URL,
+ decides what Haketilo should do on a page opened at that URL, i.e. whether
+ it should block or allow script execution and whether it should inject its
+ own scripts and which ones. Test that the policy object gets constructed
+ properly.
+ """
+ execute_in_page(load_script('common/policy.js'))
+
+ policy = execute_in_page(
+ '''
+ returnval(decide_policy(pqt.make(), "http://unkno.wn/", true, "abcd"));
+ ''')
+ assert policy['allow'] == True
+ for prop in ('mapping', 'payload', 'nonce', 'csp'):
+ assert prop not in policy
+
+ policy = execute_in_page(
+ '''{
+ const tree = pqt.make();
+ pqt.register(tree, "http://kno.wn", "allowed", {allow: true});
+ returnval(decide_policy(tree, "http://kno.wn/", false, "abcd"));
+ }''')
+ assert policy['allow'] == True
+ assert policy['mapping'] == 'allowed'
+ for prop in ('payload', 'nonce', 'csp'):
+ assert prop not in policy
+
+ policy = execute_in_page(
+ '''
+ returnval(decide_policy(pqt.make(), "http://unkno.wn/", false, "abcd"));
+ '''
+ )
+ assert policy['allow'] == False
+ for prop in ('mapping', 'payload', 'nonce'):
+ assert prop not in policy
+ assert parse_csp(policy['csp']) == {
+ 'prefetch-src': "'none'",
+ 'script-src-attr': "'none'",
+ 'script-src': "'none'",
+ 'script-src-elem': "'none'"
+ }
+
+ policy = execute_in_page(
+ '''{
+ const tree = pqt.make();
+ pqt.register(tree, "http://kno.wn", "disallowed", {allow: false});
+ returnval(decide_policy(tree, "http://kno.wn/", true, "abcd"));
+ }''')
+ assert policy['allow'] == False
+ assert policy['mapping'] == 'disallowed'
+ for prop in ('payload', 'nonce'):
+ assert prop not in policy
+ assert parse_csp(policy['csp']) == {
+ 'prefetch-src': "'none'",
+ 'script-src-attr': "'none'",
+ 'script-src': "'none'",
+ 'script-src-elem': "'none'"
+ }
+
+ policy = execute_in_page(
+ '''{
+ const tree = pqt.make();
+ pqt.register(tree, "http://kno.wn", "m1", {identifier: "res1"});
+ returnval(decide_policy(tree, "http://kno.wn/", true, "abcd"));
+ }''')
+ assert policy['allow'] == False
+ assert policy['mapping'] == 'm1'
+ assert policy['payload'] == {'identifier': 'res1'}
+
+ assert policy['nonce'] == \
+ sha256('m1:res1:http://kno.wn/:abcd'.encode()).digest().hex()
+ assert parse_csp(policy['csp']) == {
+ 'prefetch-src': f"'none'",
+ 'script-src-attr': f"'none'",
+ 'script-src': f"'nonce-{policy['nonce']}'",
+ 'script-src-elem': f"'nonce-{policy['nonce']}'"
+ }
diff --git a/test/unit/test_webrequest.py b/test/unit/test_webrequest.py
new file mode 100644
index 0000000..6af2758
--- /dev/null
+++ b/test/unit/test_webrequest.py
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: CC0-1.0
+
+"""
+Haketilo unit tests - modifying requests using webRequest API
+"""
+
+# This file is part of Haketilo
+#
+# Copyright (C) 2021, Wojtek Kosior <koszko@koszko.org>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the CC0 1.0 Universal License as published by
+# the Creative Commons Corporation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# CC0 1.0 Universal License for more details.
+
+import re
+from hashlib import sha256
+import pytest
+
+from ..script_loader import load_script
+
+def webrequest_js():
+ return (load_script('background/webrequest.js',
+ '#IMPORT common/patterns_query_tree.js AS pqt') +
+ ''';
+ // Mock pattern tree.
+ tree = pqt.make();
+ pqt.register(tree, "https://site.with.scripts.block.ed/***",
+ "disallowed", {allow: false});
+ pqt.register(tree, "https://site.with.paylo.ad/***",
+ "somemapping", {identifier: "someresource"});
+
+ // Mock IndexedDB.
+ haketilodb.track.settings =
+ () => [{}, [{name: "default_allow", value: true}]];
+
+ // Mock stream_filter.
+ stream_filter.apply = (details, headers, policy) => headers;
+
+ // Mock secret and start webrequest operations.
+ start("somesecret");
+ ''')
+
+def are_scripts_allowed(driver, nonce=None):
+ return driver.execute_script(
+ '''
+ document.scripts_allowed = false;
+ const script = document.createElement("script");
+ script.innerHTML = "document.scripts_allowed = true;";
+ if (arguments[0])
+ script.setAttribute("nonce", arguments[0]);
+ document.head.append(script);
+ return document.scripts_allowed;
+ ''',
+ nonce)
+
+@pytest.mark.ext_data({'background_script': webrequest_js})
+@pytest.mark.usefixtures('webextension')
+def test_on_headers_received(driver, execute_in_page):
+ for attempt in range(10):
+ driver.get('https://site.with.scripts.block.ed/')
+
+ if not are_scripts_allowed(driver):
+ break
+ assert attempt != 9
+
+ driver.get('https://site.with.scripts.allow.ed/')
+ assert are_scripts_allowed(driver)
+
+ driver.get('https://site.with.paylo.ad/')
+ assert not are_scripts_allowed(driver)
+ source = 'somemapping:someresource:https://site.with.paylo.ad/index.html:somesecret'
+ assert are_scripts_allowed(driver, sha256(source.encode()).digest().hex())
diff --git a/test/world_wide_library.py b/test/world_wide_library.py
index 860c987..43d3512 100644
--- a/test/world_wide_library.py
+++ b/test/world_wide_library.py
@@ -27,13 +27,99 @@ Our helpful little stand-in for the Internet
# file's license. Although I request that you do not make use this code
# in a proprietary program, I am not going to enforce this in court.
+from hashlib import sha256
+from pathlib import Path
+from shutil import rmtree
+from threading import Lock
+
from .misc_constants import here
+served_scripts = {}
+served_scripts_lock = Lock()
+
+def start_serving_script(script_text):
+ """
+ Register given script so that it is served at
+ https://serve.scrip.ts/?sha256=<script's_sha256_sum>
+
+ Returns the URL at which script will be served.
+
+ This function lacks thread safety. Might moght consider fixing this if it
+ turns
+ """
+ sha256sum = sha256(script_text.encode()).digest().hex()
+ served_scripts_lock.acquire()
+ served_scripts[sha256sum] = script_text
+ served_scripts_lock.release()
+
+ return f'https://serve.scrip.ts/?sha256={sha256sum}'
+
+def serve_script(command, get_params, post_params):
+ """
+ info() callback to pass to request-handling code in server.py. Facilitates
+ serving scripts that have been registered with start_serving_script().
+ """
+ served_scripts_lock.acquire()
+ try:
+ script = served_scripts.get(get_params['sha256'][0])
+ finally:
+ served_scripts_lock.release()
+ if script is None:
+ return 404, {}, b''
+
+ return 200, {'Content-Type': 'application/javascript'}, script
+
+def dump_scripts(directory='./injected_scripts'):
+ """
+ Write all scripts that have been registered with start_serving_script()
+ under the provided directory. If the directory already exists, it is wiped
+ beforehand. If it doesn't exist, it is created.
+ """
+ directory = Path(directory)
+ rmtree(directory, ignore_errors=True)
+ directory.mkdir(parents=True)
+
+ served_scripts_lock.acquire()
+ for sha256, script in served_scripts.items():
+ with open(directory / sha256, 'wt') as file:
+ file.write(script)
+ served_scripts_lock.release()
+
catalog = {
- 'http://gotmyowndoma.in': (302, {'location': 'http://gotmyowndoma.in/index.html'}, None),
- 'http://gotmyowndoma.in/': (302, {'location': 'http://gotmyowndoma.in/index.html'}, None),
- 'http://gotmyowndoma.in/index.html': (200, {}, here / 'data' / 'pages' / 'gotmyowndomain.html'),
- 'https://gotmyowndoma.in': (302, {'location': 'https://gotmyowndoma.in/index.html'}, None),
- 'https://gotmyowndoma.in/': (302, {'location': 'https://gotmyowndoma.in/index.html'}, None),
- 'https://gotmyowndoma.in/index.html': (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html')
+ 'http://gotmyowndoma.in':
+ (302, {'location': 'http://gotmyowndoma.in/index.html'}, None),
+ 'http://gotmyowndoma.in/':
+ (302, {'location': 'http://gotmyowndoma.in/index.html'}, None),
+ 'http://gotmyowndoma.in/index.html':
+ (200, {}, here / 'data' / 'pages' / 'gotmyowndomain.html'),
+
+ 'https://gotmyowndoma.in':
+ (302, {'location': 'https://gotmyowndoma.in/index.html'}, None),
+ 'https://gotmyowndoma.in/':
+ (302, {'location': 'https://gotmyowndoma.in/index.html'}, None),
+ 'https://gotmyowndoma.in/index.html':
+ (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'),
+
+ 'https://serve.scrip.ts/': serve_script,
+
+ 'https://site.with.scripts.block.ed':
+ (302, {'location': 'https://site.with.scripts.block.ed/index.html'}, None),
+ 'https://site.with.scripts.block.ed/':
+ (302, {'location': 'https://site.with.scripts.block.ed/index.html'}, None),
+ 'https://site.with.scripts.block.ed/index.html':
+ (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'),
+
+ 'https://site.with.scripts.allow.ed':
+ (302, {'location': 'https://site.with.scripts.allow.ed/index.html'}, None),
+ 'https://site.with.scripts.allow.ed/':
+ (302, {'location': 'https://site.with.scripts.allow.ed/index.html'}, None),
+ 'https://site.with.scripts.allow.ed/index.html':
+ (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'),
+
+ 'https://site.with.paylo.ad':
+ (302, {'location': 'https://site.with.paylo.ad/index.html'}, None),
+ 'https://site.with.paylo.ad/':
+ (302, {'location': 'https://site.with.paylo.ad/index.html'}, None),
+ 'https://site.with.paylo.ad/index.html':
+ (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html')
}