From 01e977f922ea29cd2994f96c18e4b3f033b1802d Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Mon, 27 Dec 2021 16:55:28 +0100 Subject: facilitate egistering dynamic content scripts with mappings data --- background/broadcast_broker.js | 1 - background/patterns_query_manager.js | 124 ++++++++++++++++ build.sh | 2 +- common/broadcast.js | 6 +- common/indexeddb.js | 4 + common/message_server.js | 61 ++++++-- compute_scripts.awk | 43 ++++-- test/script_loader.py | 16 ++- test/unit/test_indexeddb.py | 2 - test/unit/test_patterns_query_manager.py | 236 +++++++++++++++++++++++++++++++ 10 files changed, 464 insertions(+), 31 deletions(-) create mode 100644 background/patterns_query_manager.js create mode 100644 test/unit/test_patterns_query_manager.py diff --git a/background/broadcast_broker.js b/background/broadcast_broker.js index 9847d7e..5af3a7b 100644 --- a/background/broadcast_broker.js +++ b/background/broadcast_broker.js @@ -141,7 +141,6 @@ function prepare_timeout_cb(sender_ctx, broadcast_data) function flush(sender_ctx) { - console.log('flushing', sender_ctx.prepared_broadcasts); sender_ctx.prepared_broadcasts.forEach(nv => broadcast(...nv)); sender_ctx.prepared_broadcasts = new Set(); } diff --git a/background/patterns_query_manager.js b/background/patterns_query_manager.js new file mode 100644 index 0000000..cb14cb1 --- /dev/null +++ b/background/patterns_query_manager.js @@ -0,0 +1,124 @@ +/** + * This file is part of Haketilo. + * + * Function: Instantiate the Pattern Tree data structure, filled with mappings + * from IndexedDB. + * + * Copyright (C) 2021 Wojtek Kosior + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * As additional permission under GNU GPL version 3 section 7, you + * may distribute forms of that code without the copy of the GNU + * GPL normally required by section 4, provided you include this + * license notice and, in case of non-source distribution, a URL + * through which recipients can access the Corresponding Source. + * If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not + * obligated to do so. If you do not wish to do so, delete this + * exception statement from your version. + * + * As a special exception to the GPL, any HTML file which merely + * makes function calls to this code, and for that purpose + * includes it by reference shall be deemed a separate work for + * copyright law purposes. If you modify this code, you may extend + * this exception to your version of the code, but you are not + * obligated to do so. If you do not wish to do so, delete this + * exception statement from your version. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * I, Wojtek Kosior, thereby promise not to sue for violation of this file's + * license. Although I request that you do not make use this code in a + * proprietary program, I am not going to enforce this in court. + */ + +#IMPORT common/patterns_query_tree.js AS pqt +#IMPORT common/indexeddb.js AS haketilodb + +#FROM common/browser.js IMPORT browser + +const tree = pqt.make(); +#EXPORT tree + +const current_mappings = new Map(); + +let registered_script = null; +let script_update_occuring = false; +let script_update_needed; + +async function update_content_script() +{ + if (script_update_occuring) + return; + + script_update_occuring = true; + + while (script_update_needed) { + script_update_needed = false; + + const code = `\ +this.haketilo_pattern_tree = ${JSON.stringify(tree)}; +if (this.haketilo_content_script_main) + haketilo_content_script_main();`; + + const new_script = await browser.contentScripts.register({ + "js": [{code}], + "matches": [""], + "allFrames": true, + "runAt": "document_start" + }); + + if (registered_script) + registered_script.unregister(); + + registered_script = new_script; + } + + script_update_occuring = false; +} + +function register_mapping(mapping) +{ + for (const pattern in mapping.payloads) + pqt.register(tree, pattern, mapping.identifier, mapping); + current_mappings.set(mapping.identifier, mapping); +} + +function mapping_changed(change) +{ + console.log('mapping changes!', arguments); + const old_version = current_mappings.get(change.identifier); + if (old_version !== undefined) { + for (const pattern in old_version.payloads) + pqt.deregister(tree, pattern, change.identifier); + + current_mappings.delete(change.identifier); + } + + if (change.new_val !== undefined) + register_mapping(change.new_val); + + script_update_needed = true; + setTimeout(update_content_script, 0); +} + +async function start() +{ + const [tracking, initial_mappings] = + await haketilodb.track_mappings(mapping_changed); + + initial_mappings.forEach(register_mapping); + script_update_needed = true; + await update_content_script(); +} +#EXPORT start diff --git a/build.sh b/build.sh index 8d5b97e..1486073 100755 --- a/build.sh +++ b/build.sh @@ -24,7 +24,7 @@ print_usage() { call_awk() { local BROWSER_UPCASE="$(printf %s "$BROWSER" | tr '[:lower:]' '[:upper:]')" - nawk -f compute_scripts.awk -- -M manifest.json -D "$BROWSER_UPCASE" \ + awk -f compute_scripts.awk -- -M manifest.json -D "$BROWSER_UPCASE" \ -D MV2 --output=files-to-copy --write-js-deps --write-html-deps \ --output-dir="$BUILDDIR" } diff --git a/common/broadcast.js b/common/broadcast.js index b69f352..b7743a6 100644 --- a/common/broadcast.js +++ b/common/broadcast.js @@ -43,12 +43,12 @@ #IMPORT common/connection_types.js AS CONNECTION_TYPE -#FROM common/browser.js IMPORT browser +#FROM common/message_server.js IMPORT connect_to_background function sender_connection() { return { - port: browser.runtime.connect({name: CONNECTION_TYPE.BROADCAST_SEND}) + port: connect_to_background(CONNECTION_TYPE.BROADCAST_SEND) }; } #EXPORT sender_connection @@ -94,7 +94,7 @@ function flush(sender_conn) function listener_connection(cb) { const conn = { - port: browser.runtime.connect({name: CONNECTION_TYPE.BROADCAST_LISTEN}) + port: connect_to_background(CONNECTION_TYPE.BROADCAST_LISTEN) }; conn.port.onMessage.addListener(cb); diff --git a/common/indexeddb.js b/common/indexeddb.js index c97c115..096391a 100644 --- a/common/indexeddb.js +++ b/common/indexeddb.js @@ -45,7 +45,11 @@ #IMPORT common/broadcast.js let initial_data = ( +#IF UNIT_TEST + {} +#ELSE #INCLUDE_VERBATIM default_settings.json +#ENDIF ); /* Update when changes are made to database schema. Must have 3 elements */ diff --git a/common/message_server.js b/common/message_server.js index fd609c7..657e140 100644 --- a/common/message_server.js +++ b/common/message_server.js @@ -43,23 +43,68 @@ #FROM common/browser.js IMPORT browser -var listeners = {}; +let listeners = {}; +let listening = false; -/* magic should be one of the constants from /common/connection_types.js */ +function raw_listen(port) +{ + if (listeners[port.name] === undefined) + return; + + listeners[port.name](port); +} +/* magic should be one of the constants from /common/connection_types.js */ function listen_for_connection(magic, cb) { + if (!listening) { + listening = true; + browser.runtime.onConnect.addListener(raw_listen); + } listeners[magic] = cb; } +#EXPORT listen_for_connection -function raw_listen(port) +/* + * Messaging background page from itself might result in messages being silently + * discarded. Here we implement an interface (somewhat) compatible with the one + * provided by the browser, but which allows for background page to communicate + * with itself. + */ +function EvTarget() { - if (listeners[port.name] === undefined) - return; + this.listeners = new Set(); + this.addListener = cb => this.listeners.add(cb); + this.removeListener = cb => this.listeners.delete(cb); + this.dispatch = msg => this.listeners.forEach(l => l(msg)); +} - listeners[port.name](port); +function Port(magic) +{ + this.name = magic; + this.onDisconnect = new EvTarget(); + this.onMessage = new EvTarget(); + this.postMessage = msg => this.other.onMessage.dispatch(msg); + this.disconnect = () => this.other.onDisconnect.dispatch(this.other); } -browser.runtime.onConnect.addListener(raw_listen); +let bg_page_url; +function connect_to_background(magic) +{ + if (bg_page_url === undefined) + bg_page_url = browser.runtime.getURL("_generated_background_page.html"); + if (typeof document === "undefined" || document.URL !== bg_page_url) + return browser.runtime.connect({name: magic}); -#EXPORT listen_for_connection + if (!(magic in listeners)) + throw `no listener for '${magic}'` + + const ports = [new Port(magic), new Port(magic)]; + ports[0].other = ports[1]; + ports[1].other = ports[0]; + + listeners[magic](ports[0]); + return ports[1]; +} + +#EXPORT connect_to_background diff --git a/compute_scripts.awk b/compute_scripts.awk index 9edc56d..b778934 100755 --- a/compute_scripts.awk +++ b/compute_scripts.awk @@ -108,7 +108,8 @@ BEGIN { function process_file(path, read_path, mode, line, result, line_part, directive, directive_args, - if_nesting, if_nesting_true, if_branch_processed) { + if_nesting, if_nesting_true, if_branch_processed, + additional_line_nr) { if (path in modes && modes[path] != mode) { printf "ERROR: File %s used multiple times in different contexts\n", path > "/dev/stderr" @@ -151,8 +152,13 @@ function process_file(path, read_path, mode, printf "ERROR: Could not read %s\n", read_path > "/dev/stderr" return 1 } - if (result == 0) - break + if (result == 0) { + if (path != js_to_amalgamate || \ + additional_line_nr == additional_lines_count) + break + + line = amalgamation_additional_lines[++additional_line_nr] + } if (line !~ /^#/) { if (if_nesting_true == if_nesting) @@ -169,9 +175,15 @@ function process_file(path, read_path, mode, return 1 } if (result == 0) { - printf "ERROR: Unexpected EOF in %s\n", - read_path > "/dev/stderr" - return 1 + if (path == js_to_amalgamate && \ + additional_line_nr < additional_lines_count) { + line_part = \ + amalgamation_additional_lines[++additional_line_nr] + } else { + printf "ERROR: Unexpected EOF in %s\n", + read_path > "/dev/stderr" + return 1 + } } line = line " " line_part @@ -569,7 +581,7 @@ function print_amalgamation(js_deps, js_deps_count, } function print_usage() { - printf "USAGE: %s compute_scripts.awk -- [-D PREPROCESSOR_DEFINITION]... [-M manifest/to/process/manifest.json]... [-H html/to/process.html]... [-J js/to/process.js]... [--help|-h] [--output-dir=./build] [--write-js-deps] [--write-html-deps] [--output=files-to-copy|--output=amalgamate-js:js/to/process.js]\n", + printf "USAGE: %s compute_scripts.awk -- [-D PREPROCESSOR_DEFINITION]... [-M manifest/to/process/manifest.json]... [-H html/to/process.html]... [-J js/to/process.js]... [--help|-h] [--output-dir=./build] [--write-js-deps] [--write-html-deps] [--output=files-to-copy|--output=amalgamate-js:js/to/process.js[:additional-code]]\n", ARGV[0] > "/dev/stderr" } @@ -580,13 +592,16 @@ BEGIN { option_arg_patterns["J"] = path_re } -function main(i, path, letter, dir, max_line_nr, js_deps, js_deps_count) { +function main(i, path, letter, dir, max_line_nr, js_deps, js_deps_count, + amalgamation_additional_code) { output_dir = "./build" write_js_deps = false write_html_deps = false output = "" js_to_amalgamate = "" + delete amalgamation_additional_lines[0] + additional_lines_count = 0 delete main_js_lines[0] delete manifests_to_process[0] @@ -629,13 +644,21 @@ function main(i, path, letter, dir, max_line_nr, js_deps, js_deps_count) { output = "files-to-copy" } else if (ARGV[i] ~ /^--output=amalgamate-js:/) { output = "amalgamate-js" - js_to_amalgamate = ARGV[i] - sub(/^--output=amalgamate-js:/, "", js_to_amalgamate) + amalgamation_additional_code = ARGV[i] + sub(/^--output=amalgamate-js:/, "", amalgamation_additional_code) + js_to_amalgamate = amalgamation_additional_code + sub(/:.*$/, "", js_to_amalgamate) if (js_to_amalgamate !~ path_re) { printf "ERROR: amalgamate-js path does not match '%s': %s\n", path_re, js_to_amalgamate > "/dev/stderr" return 1 } + sub(/^[^:]+:?/, "", amalgamation_additional_code) + if (amalgamation_additional_code) { + additional_lines_count = split(amalgamation_additional_code, + amalgamation_additional_lines, + "\n") + } } else { printf "ERROR: Unknown option '%s'\n", ARGV[i] > "/dev/stderr" print_usage() diff --git a/test/script_loader.py b/test/script_loader.py index edf8143..f66f9ae 100644 --- a/test/script_loader.py +++ b/test/script_loader.py @@ -43,10 +43,12 @@ def make_relative_path(path): script_cache = {} -def load_script(path): +def load_script(path, code_to_add=None): """ `path` is a .js file path in Haketilo sources. It may be absolute or - specified relative to Haketilo's project directory. + specified relative to Haketilo's project directory. `code_to_add` is + optional code to be appended to the end of the main file being imported. + it can contain directives like `#IMPORT`. Return a string containing script from `path` together with all other scripts it depends on. Dependencies are wrapped in the same way Haketilo's @@ -57,13 +59,15 @@ def load_script(path): a dependency to be substituted by a mocked value. """ path = make_relative_path(path) - if str(path) in script_cache: - return script_cache[str(path)] + key = f'{str(path)}:{code_to_add}' if code_to_add is not None else str(path) + if key in script_cache: + return script_cache[key] awk = subprocess.run(['awk', '-f', str(awk_script), '--', '-D', 'MOZILLA', - '-D', 'MV2', '--output=amalgamate-js:' + str(path)], + '-D', 'MV2', '-D', 'TEST', '-D', 'UNIT_TEST', + '--output=amalgamate-js:' + key], stdout=subprocess.PIPE, cwd=script_root, check=True) script = awk.stdout.decode() - script_cache[str(path)] = script + script_cache[key] = script return script diff --git a/test/unit/test_indexeddb.py b/test/unit/test_indexeddb.py index af60e1c..476690c 100644 --- a/test/unit/test_indexeddb.py +++ b/test/unit/test_indexeddb.py @@ -85,8 +85,6 @@ def test_haketilodb_save_remove(execute_in_page): # Mock some unwanted imports. execute_in_page( '''{ - initial_data = {}; - const broadcast_mock = {}; const nop = () => {}; for (const key in broadcast) diff --git a/test/unit/test_patterns_query_manager.py b/test/unit/test_patterns_query_manager.py new file mode 100644 index 0000000..8ae7c28 --- /dev/null +++ b/test/unit/test_patterns_query_manager.py @@ -0,0 +1,236 @@ +# SPDX-License-Identifier: CC0-1.0 + +""" +Haketilo unit tests - building pattern tree and putting it in a content script +""" + +# This file is part of Haketilo +# +# Copyright (C) 2021, Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the CC0 1.0 Universal License as published by +# the Creative Commons Corporation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# CC0 1.0 Universal License for more details. + +import pytest +import re +import json +from selenium.webdriver.support.ui import WebDriverWait + +from ..script_loader import load_script + +def simple_sample_mapping(patterns, fruit): + if type(patterns) is list: + payloads = dict([(p, {'identifier': fruit}) for p in patterns]) + else: + payloads = {patterns: {'identifier': fruit}} + return { + 'source_copyright': [], + 'type': 'mapping', + 'identifier': f'inject-{fruit}', + 'payloads': payloads + } + +content_script_re = re.compile(r'this.haketilo_pattern_tree = (.*);') +def extract_tree_data(content_script_text): + return json.loads(content_script_re.search(content_script_text)[1]) + +# Fields that are not relevant for testing are omitted from these mapping +# definitions. +sample_mappings = [simple_sample_mapping(pats, fruit) for pats, fruit in [ + (['https://gotmyowndoma.in/index.html', + 'http://gotmyowndoma.in/index.html'], 'banana'), + (['https://***.gotmyowndoma.in/index.html', + 'https://**.gotmyowndoma.in/index.html', + 'https://*.gotmyowndoma.in/index.html', + 'https://gotmyowndoma.in/index.html'], 'orange'), + ('https://gotmyowndoma.in/index.html/***', 'grape'), + ('http://gotmyowndoma.in/index.html/***', 'melon'), + ('https://gotmyowndoma.in/index.html', 'peach'), + ('https://gotmyowndoma.in/*', 'pear'), + ('https://gotmyowndoma.in/**', 'raspberry'), + ('https://gotmyowndoma.in/***', 'strawberry'), + ('https://***.gotmyowndoma.in/index.html', 'apple'), + ('https://***.gotmyowndoma.in/*', 'avocado'), + ('https://***.gotmyowndoma.in/**', 'papaya'), + ('https://***.gotmyowndoma.in/***', 'kiwi') +]] + +# Even though patterns_query_manager.js is normally meant to run from background +# page, tests can be as well performed running it from extension's bundled page. +@pytest.mark.get_page('https://gotmyowndoma.in') +def test_pqm_tree_building(driver, execute_in_page): + """ + patterns_query_manager.js tracks Haketilo's internal database and builds a + constantly-updated pattern tree based on its contents. Mock the database and + verify tree building works properly. + """ + execute_in_page(load_script('background/patterns_query_manager.js')) + # Mock IndexedDB and build patterns tree. + execute_in_page( + ''' + const initial_mappings = arguments[0] + let mappingchange; + function track_mock(cb) + { + mappingchange = cb; + + return [{}, initial_mappings]; + } + haketilodb.track_mappings = track_mock; + + let last_script; + let unregister_called = 0; + async function register_mock(injection) + { + await new Promise(resolve => setTimeout(resolve, 1)); + last_script = injection.js[0].code; + return {unregister: () => unregister_called++}; + } + browser = {contentScripts: {register: register_mock}}; + + returnval(start()); + ''', + sample_mappings[0:2]) + + found, tree, content_script, deregistrations = execute_in_page( + ''' + returnval([pqt.search(tree, arguments[0]).next().value, + tree, last_script, unregister_called]); + ''', + 'https://gotmyowndoma.in/index.html') + assert found == dict([(m['identifier'], m) for m in sample_mappings[0:2]]) + assert tree == extract_tree_data(content_script) + assert deregistrations == 0 + + def condition_mappings_added(driver): + last_script = execute_in_page('returnval(last_script);') + return all([m['identifier'] in last_script for m in sample_mappings]) + + execute_in_page( + ''' + for (const mapping of arguments[0]) { + mappingchange({ + identifier: mapping.identifier, + new_val: mapping + }); + } + ''', + sample_mappings[2:]) + WebDriverWait(driver, 10).until(condition_mappings_added) + + odd = [m['identifier'] for i, m in enumerate(sample_mappings) if i % 2] + even = [m['identifier'] for i, m in enumerate(sample_mappings) if 1 - i % 2] + + def condition_odd_removed(driver): + last_script = execute_in_page('returnval(last_script);') + return all([id not in last_script for id in odd]) + + def condition_all_removed(driver): + content_script = execute_in_page('returnval(last_script);') + return extract_tree_data(content_script) == {} + + execute_in_page( + ''' + arguments[0].forEach(identifier => mappingchange({identifier})); + ''', + odd) + + WebDriverWait(driver, 10).until(condition_odd_removed) + + execute_in_page( + ''' + arguments[0].forEach(identifier => mappingchange({identifier})); + ''', + even) + + WebDriverWait(driver, 10).until(condition_all_removed) + +content_js = ''' +let already_run = false; +this.haketilo_content_script_main = function() { + if (already_run) + return; + already_run = true; + document.documentElement.innerHTML = "
"; + document.getElementById("tree-json").innerText = + JSON.stringify(this.haketilo_pattern_tree); +} +if (this.haketilo_pattern_tree !== undefined) + this.haketilo_content_script_main(); +''' + +def background_js(): + pqm_js = load_script('background/patterns_query_manager.js', + "#IMPORT background/broadcast_broker.js") + return pqm_js + '; broadcast_broker.start(); start();' + +@pytest.mark.ext_data({ + 'content_script': content_js, + 'background_script': background_js +}) +@pytest.mark.usefixtures('webextension') +def test_pqm_script_injection(driver, execute_in_page): + # Let's open a normal page in a second window. Window 0 will be used to make + # changed to IndexedDB and window 1 to test the working of content scripts. + driver.execute_script('window.open("about:blank", "_blank");') + windows = [*driver.window_handles] + assert len(windows) == 2 + + def run_content_script(): + driver.switch_to.window(windows[1]) + driver.get('https://gotmyowndoma.in/index.html') + windows[1] = driver.window_handles[1] + return driver.execute_script( + ''' + return (document.getElementById("tree-json") || {}).innerText; + ''') + + for attempt in range(10): + json_txt = run_content_script() + if json.loads(json_txt) == {}: + break; + assert attempt != 9 + + driver.switch_to.window(windows[0]) + execute_in_page(load_script('common/indexeddb.js')) + + sample_data = { + 'mappings': dict([(sm['identifier'], {'1.0': sm}) + for sm in sample_mappings]), + 'resources': {}, + 'files': {} + } + execute_in_page('returnval(save_items(arguments[0]));', sample_data) + + for attempt in range(10): + tree_json = run_content_script() + json.loads(tree_json) + if all([m['identifier'] in tree_json for m in sample_mappings]): + break + assert attempt != 9 + + driver.switch_to.window(windows[0]) + execute_in_page( + '''{ + const identifiers = arguments[0]; + async function remove_items() + { + const ctx = await start_items_transaction(["mappings"], {}); + for (const id of identifiers) + await remove_mapping(id, ctx); + await finalize_items_transaction(ctx); + } + returnval(remove_items()); + }''', + [sm['identifier'] for sm in sample_mappings]) + + for attempt in range(10): + if json.loads(run_content_script()) == {}: + break + assert attempt != 9 -- cgit v1.2.3