aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--background/broadcast_broker.js1
-rw-r--r--background/patterns_query_manager.js124
-rwxr-xr-xbuild.sh2
-rw-r--r--common/broadcast.js6
-rw-r--r--common/indexeddb.js4
-rw-r--r--common/message_server.js61
-rwxr-xr-xcompute_scripts.awk43
-rw-r--r--test/script_loader.py16
-rw-r--r--test/unit/test_indexeddb.py2
-rw-r--r--test/unit/test_patterns_query_manager.py236
10 files changed, 464 insertions, 31 deletions
diff --git a/background/broadcast_broker.js b/background/broadcast_broker.js
index 9847d7e..5af3a7b 100644
--- a/background/broadcast_broker.js
+++ b/background/broadcast_broker.js
@@ -141,7 +141,6 @@ function prepare_timeout_cb(sender_ctx, broadcast_data)
function flush(sender_ctx)
{
- console.log('flushing', sender_ctx.prepared_broadcasts);
sender_ctx.prepared_broadcasts.forEach(nv => broadcast(...nv));
sender_ctx.prepared_broadcasts = new Set();
}
diff --git a/background/patterns_query_manager.js b/background/patterns_query_manager.js
new file mode 100644
index 0000000..cb14cb1
--- /dev/null
+++ b/background/patterns_query_manager.js
@@ -0,0 +1,124 @@
+/**
+ * This file is part of Haketilo.
+ *
+ * Function: Instantiate the Pattern Tree data structure, filled with mappings
+ * from IndexedDB.
+ *
+ * Copyright (C) 2021 Wojtek Kosior
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * As additional permission under GNU GPL version 3 section 7, you
+ * may distribute forms of that code without the copy of the GNU
+ * GPL normally required by section 4, provided you include this
+ * license notice and, in case of non-source distribution, a URL
+ * through which recipients can access the Corresponding Source.
+ * If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not
+ * obligated to do so. If you do not wish to do so, delete this
+ * exception statement from your version.
+ *
+ * As a special exception to the GPL, any HTML file which merely
+ * makes function calls to this code, and for that purpose
+ * includes it by reference shall be deemed a separate work for
+ * copyright law purposes. If you modify this code, you may extend
+ * this exception to your version of the code, but you are not
+ * obligated to do so. If you do not wish to do so, delete this
+ * exception statement from your version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * I, Wojtek Kosior, thereby promise not to sue for violation of this file's
+ * license. Although I request that you do not make use this code in a
+ * proprietary program, I am not going to enforce this in court.
+ */
+
+#IMPORT common/patterns_query_tree.js AS pqt
+#IMPORT common/indexeddb.js AS haketilodb
+
+#FROM common/browser.js IMPORT browser
+
+const tree = pqt.make();
+#EXPORT tree
+
+const current_mappings = new Map();
+
+let registered_script = null;
+let script_update_occuring = false;
+let script_update_needed;
+
+async function update_content_script()
+{
+ if (script_update_occuring)
+ return;
+
+ script_update_occuring = true;
+
+ while (script_update_needed) {
+ script_update_needed = false;
+
+ const code = `\
+this.haketilo_pattern_tree = ${JSON.stringify(tree)};
+if (this.haketilo_content_script_main)
+ haketilo_content_script_main();`;
+
+ const new_script = await browser.contentScripts.register({
+ "js": [{code}],
+ "matches": ["<all_urls>"],
+ "allFrames": true,
+ "runAt": "document_start"
+ });
+
+ if (registered_script)
+ registered_script.unregister();
+
+ registered_script = new_script;
+ }
+
+ script_update_occuring = false;
+}
+
+function register_mapping(mapping)
+{
+ for (const pattern in mapping.payloads)
+ pqt.register(tree, pattern, mapping.identifier, mapping);
+ current_mappings.set(mapping.identifier, mapping);
+}
+
+function mapping_changed(change)
+{
+ console.log('mapping changes!', arguments);
+ const old_version = current_mappings.get(change.identifier);
+ if (old_version !== undefined) {
+ for (const pattern in old_version.payloads)
+ pqt.deregister(tree, pattern, change.identifier);
+
+ current_mappings.delete(change.identifier);
+ }
+
+ if (change.new_val !== undefined)
+ register_mapping(change.new_val);
+
+ script_update_needed = true;
+ setTimeout(update_content_script, 0);
+}
+
+async function start()
+{
+ const [tracking, initial_mappings] =
+ await haketilodb.track_mappings(mapping_changed);
+
+ initial_mappings.forEach(register_mapping);
+ script_update_needed = true;
+ await update_content_script();
+}
+#EXPORT start
diff --git a/build.sh b/build.sh
index 8d5b97e..1486073 100755
--- a/build.sh
+++ b/build.sh
@@ -24,7 +24,7 @@ print_usage() {
call_awk() {
local BROWSER_UPCASE="$(printf %s "$BROWSER" | tr '[:lower:]' '[:upper:]')"
- nawk -f compute_scripts.awk -- -M manifest.json -D "$BROWSER_UPCASE" \
+ awk -f compute_scripts.awk -- -M manifest.json -D "$BROWSER_UPCASE" \
-D MV2 --output=files-to-copy --write-js-deps --write-html-deps \
--output-dir="$BUILDDIR"
}
diff --git a/common/broadcast.js b/common/broadcast.js
index b69f352..b7743a6 100644
--- a/common/broadcast.js
+++ b/common/broadcast.js
@@ -43,12 +43,12 @@
#IMPORT common/connection_types.js AS CONNECTION_TYPE
-#FROM common/browser.js IMPORT browser
+#FROM common/message_server.js IMPORT connect_to_background
function sender_connection()
{
return {
- port: browser.runtime.connect({name: CONNECTION_TYPE.BROADCAST_SEND})
+ port: connect_to_background(CONNECTION_TYPE.BROADCAST_SEND)
};
}
#EXPORT sender_connection
@@ -94,7 +94,7 @@ function flush(sender_conn)
function listener_connection(cb)
{
const conn = {
- port: browser.runtime.connect({name: CONNECTION_TYPE.BROADCAST_LISTEN})
+ port: connect_to_background(CONNECTION_TYPE.BROADCAST_LISTEN)
};
conn.port.onMessage.addListener(cb);
diff --git a/common/indexeddb.js b/common/indexeddb.js
index c97c115..096391a 100644
--- a/common/indexeddb.js
+++ b/common/indexeddb.js
@@ -45,7 +45,11 @@
#IMPORT common/broadcast.js
let initial_data = (
+#IF UNIT_TEST
+ {}
+#ELSE
#INCLUDE_VERBATIM default_settings.json
+#ENDIF
);
/* Update when changes are made to database schema. Must have 3 elements */
diff --git a/common/message_server.js b/common/message_server.js
index fd609c7..657e140 100644
--- a/common/message_server.js
+++ b/common/message_server.js
@@ -43,23 +43,68 @@
#FROM common/browser.js IMPORT browser
-var listeners = {};
+let listeners = {};
+let listening = false;
-/* magic should be one of the constants from /common/connection_types.js */
+function raw_listen(port)
+{
+ if (listeners[port.name] === undefined)
+ return;
+
+ listeners[port.name](port);
+}
+/* magic should be one of the constants from /common/connection_types.js */
function listen_for_connection(magic, cb)
{
+ if (!listening) {
+ listening = true;
+ browser.runtime.onConnect.addListener(raw_listen);
+ }
listeners[magic] = cb;
}
+#EXPORT listen_for_connection
-function raw_listen(port)
+/*
+ * Messaging background page from itself might result in messages being silently
+ * discarded. Here we implement an interface (somewhat) compatible with the one
+ * provided by the browser, but which allows for background page to communicate
+ * with itself.
+ */
+function EvTarget()
{
- if (listeners[port.name] === undefined)
- return;
+ this.listeners = new Set();
+ this.addListener = cb => this.listeners.add(cb);
+ this.removeListener = cb => this.listeners.delete(cb);
+ this.dispatch = msg => this.listeners.forEach(l => l(msg));
+}
- listeners[port.name](port);
+function Port(magic)
+{
+ this.name = magic;
+ this.onDisconnect = new EvTarget();
+ this.onMessage = new EvTarget();
+ this.postMessage = msg => this.other.onMessage.dispatch(msg);
+ this.disconnect = () => this.other.onDisconnect.dispatch(this.other);
}
-browser.runtime.onConnect.addListener(raw_listen);
+let bg_page_url;
+function connect_to_background(magic)
+{
+ if (bg_page_url === undefined)
+ bg_page_url = browser.runtime.getURL("_generated_background_page.html");
+ if (typeof document === "undefined" || document.URL !== bg_page_url)
+ return browser.runtime.connect({name: magic});
-#EXPORT listen_for_connection
+ if (!(magic in listeners))
+ throw `no listener for '${magic}'`
+
+ const ports = [new Port(magic), new Port(magic)];
+ ports[0].other = ports[1];
+ ports[1].other = ports[0];
+
+ listeners[magic](ports[0]);
+ return ports[1];
+}
+
+#EXPORT connect_to_background
diff --git a/compute_scripts.awk b/compute_scripts.awk
index 9edc56d..b778934 100755
--- a/compute_scripts.awk
+++ b/compute_scripts.awk
@@ -108,7 +108,8 @@ BEGIN {
function process_file(path, read_path, mode,
line, result, line_part, directive, directive_args,
- if_nesting, if_nesting_true, if_branch_processed) {
+ if_nesting, if_nesting_true, if_branch_processed,
+ additional_line_nr) {
if (path in modes && modes[path] != mode) {
printf "ERROR: File %s used multiple times in different contexts\n",
path > "/dev/stderr"
@@ -151,8 +152,13 @@ function process_file(path, read_path, mode,
printf "ERROR: Could not read %s\n", read_path > "/dev/stderr"
return 1
}
- if (result == 0)
- break
+ if (result == 0) {
+ if (path != js_to_amalgamate || \
+ additional_line_nr == additional_lines_count)
+ break
+
+ line = amalgamation_additional_lines[++additional_line_nr]
+ }
if (line !~ /^#/) {
if (if_nesting_true == if_nesting)
@@ -169,9 +175,15 @@ function process_file(path, read_path, mode,
return 1
}
if (result == 0) {
- printf "ERROR: Unexpected EOF in %s\n",
- read_path > "/dev/stderr"
- return 1
+ if (path == js_to_amalgamate && \
+ additional_line_nr < additional_lines_count) {
+ line_part = \
+ amalgamation_additional_lines[++additional_line_nr]
+ } else {
+ printf "ERROR: Unexpected EOF in %s\n",
+ read_path > "/dev/stderr"
+ return 1
+ }
}
line = line " " line_part
@@ -569,7 +581,7 @@ function print_amalgamation(js_deps, js_deps_count,
}
function print_usage() {
- printf "USAGE: %s compute_scripts.awk -- [-D PREPROCESSOR_DEFINITION]... [-M manifest/to/process/manifest.json]... [-H html/to/process.html]... [-J js/to/process.js]... [--help|-h] [--output-dir=./build] [--write-js-deps] [--write-html-deps] [--output=files-to-copy|--output=amalgamate-js:js/to/process.js]\n",
+ printf "USAGE: %s compute_scripts.awk -- [-D PREPROCESSOR_DEFINITION]... [-M manifest/to/process/manifest.json]... [-H html/to/process.html]... [-J js/to/process.js]... [--help|-h] [--output-dir=./build] [--write-js-deps] [--write-html-deps] [--output=files-to-copy|--output=amalgamate-js:js/to/process.js[:additional-code]]\n",
ARGV[0] > "/dev/stderr"
}
@@ -580,13 +592,16 @@ BEGIN {
option_arg_patterns["J"] = path_re
}
-function main(i, path, letter, dir, max_line_nr, js_deps, js_deps_count) {
+function main(i, path, letter, dir, max_line_nr, js_deps, js_deps_count,
+ amalgamation_additional_code) {
output_dir = "./build"
write_js_deps = false
write_html_deps = false
output = ""
js_to_amalgamate = ""
+ delete amalgamation_additional_lines[0]
+ additional_lines_count = 0
delete main_js_lines[0]
delete manifests_to_process[0]
@@ -629,13 +644,21 @@ function main(i, path, letter, dir, max_line_nr, js_deps, js_deps_count) {
output = "files-to-copy"
} else if (ARGV[i] ~ /^--output=amalgamate-js:/) {
output = "amalgamate-js"
- js_to_amalgamate = ARGV[i]
- sub(/^--output=amalgamate-js:/, "", js_to_amalgamate)
+ amalgamation_additional_code = ARGV[i]
+ sub(/^--output=amalgamate-js:/, "", amalgamation_additional_code)
+ js_to_amalgamate = amalgamation_additional_code
+ sub(/:.*$/, "", js_to_amalgamate)
if (js_to_amalgamate !~ path_re) {
printf "ERROR: amalgamate-js path does not match '%s': %s\n",
path_re, js_to_amalgamate > "/dev/stderr"
return 1
}
+ sub(/^[^:]+:?/, "", amalgamation_additional_code)
+ if (amalgamation_additional_code) {
+ additional_lines_count = split(amalgamation_additional_code,
+ amalgamation_additional_lines,
+ "\n")
+ }
} else {
printf "ERROR: Unknown option '%s'\n", ARGV[i] > "/dev/stderr"
print_usage()
diff --git a/test/script_loader.py b/test/script_loader.py
index edf8143..f66f9ae 100644
--- a/test/script_loader.py
+++ b/test/script_loader.py
@@ -43,10 +43,12 @@ def make_relative_path(path):
script_cache = {}
-def load_script(path):
+def load_script(path, code_to_add=None):
"""
`path` is a .js file path in Haketilo sources. It may be absolute or
- specified relative to Haketilo's project directory.
+ specified relative to Haketilo's project directory. `code_to_add` is
+ optional code to be appended to the end of the main file being imported.
+ it can contain directives like `#IMPORT`.
Return a string containing script from `path` together with all other
scripts it depends on. Dependencies are wrapped in the same way Haketilo's
@@ -57,13 +59,15 @@ def load_script(path):
a dependency to be substituted by a mocked value.
"""
path = make_relative_path(path)
- if str(path) in script_cache:
- return script_cache[str(path)]
+ key = f'{str(path)}:{code_to_add}' if code_to_add is not None else str(path)
+ if key in script_cache:
+ return script_cache[key]
awk = subprocess.run(['awk', '-f', str(awk_script), '--', '-D', 'MOZILLA',
- '-D', 'MV2', '--output=amalgamate-js:' + str(path)],
+ '-D', 'MV2', '-D', 'TEST', '-D', 'UNIT_TEST',
+ '--output=amalgamate-js:' + key],
stdout=subprocess.PIPE, cwd=script_root, check=True)
script = awk.stdout.decode()
- script_cache[str(path)] = script
+ script_cache[key] = script
return script
diff --git a/test/unit/test_indexeddb.py b/test/unit/test_indexeddb.py
index af60e1c..476690c 100644
--- a/test/unit/test_indexeddb.py
+++ b/test/unit/test_indexeddb.py
@@ -85,8 +85,6 @@ def test_haketilodb_save_remove(execute_in_page):
# Mock some unwanted imports.
execute_in_page(
'''{
- initial_data = {};
-
const broadcast_mock = {};
const nop = () => {};
for (const key in broadcast)
diff --git a/test/unit/test_patterns_query_manager.py b/test/unit/test_patterns_query_manager.py
new file mode 100644
index 0000000..8ae7c28
--- /dev/null
+++ b/test/unit/test_patterns_query_manager.py
@@ -0,0 +1,236 @@
+# SPDX-License-Identifier: CC0-1.0
+
+"""
+Haketilo unit tests - building pattern tree and putting it in a content script
+"""
+
+# This file is part of Haketilo
+#
+# Copyright (C) 2021, Wojtek Kosior <koszko@koszko.org>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the CC0 1.0 Universal License as published by
+# the Creative Commons Corporation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# CC0 1.0 Universal License for more details.
+
+import pytest
+import re
+import json
+from selenium.webdriver.support.ui import WebDriverWait
+
+from ..script_loader import load_script
+
+def simple_sample_mapping(patterns, fruit):
+ if type(patterns) is list:
+ payloads = dict([(p, {'identifier': fruit}) for p in patterns])
+ else:
+ payloads = {patterns: {'identifier': fruit}}
+ return {
+ 'source_copyright': [],
+ 'type': 'mapping',
+ 'identifier': f'inject-{fruit}',
+ 'payloads': payloads
+ }
+
+content_script_re = re.compile(r'this.haketilo_pattern_tree = (.*);')
+def extract_tree_data(content_script_text):
+ return json.loads(content_script_re.search(content_script_text)[1])
+
+# Fields that are not relevant for testing are omitted from these mapping
+# definitions.
+sample_mappings = [simple_sample_mapping(pats, fruit) for pats, fruit in [
+ (['https://gotmyowndoma.in/index.html',
+ 'http://gotmyowndoma.in/index.html'], 'banana'),
+ (['https://***.gotmyowndoma.in/index.html',
+ 'https://**.gotmyowndoma.in/index.html',
+ 'https://*.gotmyowndoma.in/index.html',
+ 'https://gotmyowndoma.in/index.html'], 'orange'),
+ ('https://gotmyowndoma.in/index.html/***', 'grape'),
+ ('http://gotmyowndoma.in/index.html/***', 'melon'),
+ ('https://gotmyowndoma.in/index.html', 'peach'),
+ ('https://gotmyowndoma.in/*', 'pear'),
+ ('https://gotmyowndoma.in/**', 'raspberry'),
+ ('https://gotmyowndoma.in/***', 'strawberry'),
+ ('https://***.gotmyowndoma.in/index.html', 'apple'),
+ ('https://***.gotmyowndoma.in/*', 'avocado'),
+ ('https://***.gotmyowndoma.in/**', 'papaya'),
+ ('https://***.gotmyowndoma.in/***', 'kiwi')
+]]
+
+# Even though patterns_query_manager.js is normally meant to run from background
+# page, tests can be as well performed running it from extension's bundled page.
+@pytest.mark.get_page('https://gotmyowndoma.in')
+def test_pqm_tree_building(driver, execute_in_page):
+ """
+ patterns_query_manager.js tracks Haketilo's internal database and builds a
+ constantly-updated pattern tree based on its contents. Mock the database and
+ verify tree building works properly.
+ """
+ execute_in_page(load_script('background/patterns_query_manager.js'))
+ # Mock IndexedDB and build patterns tree.
+ execute_in_page(
+ '''
+ const initial_mappings = arguments[0]
+ let mappingchange;
+ function track_mock(cb)
+ {
+ mappingchange = cb;
+
+ return [{}, initial_mappings];
+ }
+ haketilodb.track_mappings = track_mock;
+
+ let last_script;
+ let unregister_called = 0;
+ async function register_mock(injection)
+ {
+ await new Promise(resolve => setTimeout(resolve, 1));
+ last_script = injection.js[0].code;
+ return {unregister: () => unregister_called++};
+ }
+ browser = {contentScripts: {register: register_mock}};
+
+ returnval(start());
+ ''',
+ sample_mappings[0:2])
+
+ found, tree, content_script, deregistrations = execute_in_page(
+ '''
+ returnval([pqt.search(tree, arguments[0]).next().value,
+ tree, last_script, unregister_called]);
+ ''',
+ 'https://gotmyowndoma.in/index.html')
+ assert found == dict([(m['identifier'], m) for m in sample_mappings[0:2]])
+ assert tree == extract_tree_data(content_script)
+ assert deregistrations == 0
+
+ def condition_mappings_added(driver):
+ last_script = execute_in_page('returnval(last_script);')
+ return all([m['identifier'] in last_script for m in sample_mappings])
+
+ execute_in_page(
+ '''
+ for (const mapping of arguments[0]) {
+ mappingchange({
+ identifier: mapping.identifier,
+ new_val: mapping
+ });
+ }
+ ''',
+ sample_mappings[2:])
+ WebDriverWait(driver, 10).until(condition_mappings_added)
+
+ odd = [m['identifier'] for i, m in enumerate(sample_mappings) if i % 2]
+ even = [m['identifier'] for i, m in enumerate(sample_mappings) if 1 - i % 2]
+
+ def condition_odd_removed(driver):
+ last_script = execute_in_page('returnval(last_script);')
+ return all([id not in last_script for id in odd])
+
+ def condition_all_removed(driver):
+ content_script = execute_in_page('returnval(last_script);')
+ return extract_tree_data(content_script) == {}
+
+ execute_in_page(
+ '''
+ arguments[0].forEach(identifier => mappingchange({identifier}));
+ ''',
+ odd)
+
+ WebDriverWait(driver, 10).until(condition_odd_removed)
+
+ execute_in_page(
+ '''
+ arguments[0].forEach(identifier => mappingchange({identifier}));
+ ''',
+ even)
+
+ WebDriverWait(driver, 10).until(condition_all_removed)
+
+content_js = '''
+let already_run = false;
+this.haketilo_content_script_main = function() {
+ if (already_run)
+ return;
+ already_run = true;
+ document.documentElement.innerHTML = "<body><div id='tree-json'>";
+ document.getElementById("tree-json").innerText =
+ JSON.stringify(this.haketilo_pattern_tree);
+}
+if (this.haketilo_pattern_tree !== undefined)
+ this.haketilo_content_script_main();
+'''
+
+def background_js():
+ pqm_js = load_script('background/patterns_query_manager.js',
+ "#IMPORT background/broadcast_broker.js")
+ return pqm_js + '; broadcast_broker.start(); start();'
+
+@pytest.mark.ext_data({
+ 'content_script': content_js,
+ 'background_script': background_js
+})
+@pytest.mark.usefixtures('webextension')
+def test_pqm_script_injection(driver, execute_in_page):
+ # Let's open a normal page in a second window. Window 0 will be used to make
+ # changed to IndexedDB and window 1 to test the working of content scripts.
+ driver.execute_script('window.open("about:blank", "_blank");')
+ windows = [*driver.window_handles]
+ assert len(windows) == 2
+
+ def run_content_script():
+ driver.switch_to.window(windows[1])
+ driver.get('https://gotmyowndoma.in/index.html')
+ windows[1] = driver.window_handles[1]
+ return driver.execute_script(
+ '''
+ return (document.getElementById("tree-json") || {}).innerText;
+ ''')
+
+ for attempt in range(10):
+ json_txt = run_content_script()
+ if json.loads(json_txt) == {}:
+ break;
+ assert attempt != 9
+
+ driver.switch_to.window(windows[0])
+ execute_in_page(load_script('common/indexeddb.js'))
+
+ sample_data = {
+ 'mappings': dict([(sm['identifier'], {'1.0': sm})
+ for sm in sample_mappings]),
+ 'resources': {},
+ 'files': {}
+ }
+ execute_in_page('returnval(save_items(arguments[0]));', sample_data)
+
+ for attempt in range(10):
+ tree_json = run_content_script()
+ json.loads(tree_json)
+ if all([m['identifier'] in tree_json for m in sample_mappings]):
+ break
+ assert attempt != 9
+
+ driver.switch_to.window(windows[0])
+ execute_in_page(
+ '''{
+ const identifiers = arguments[0];
+ async function remove_items()
+ {
+ const ctx = await start_items_transaction(["mappings"], {});
+ for (const id of identifiers)
+ await remove_mapping(id, ctx);
+ await finalize_items_transaction(ctx);
+ }
+ returnval(remove_items());
+ }''',
+ [sm['identifier'] for sm in sample_mappings])
+
+ for attempt in range(10):
+ if json.loads(run_content_script()) == {}:
+ break
+ assert attempt != 9