diff options
-rw-r--r-- | common/patterns.js | 48 | ||||
-rw-r--r-- | compute_scripts.awk | 14 | ||||
-rw-r--r-- | copyright | 5 | ||||
-rw-r--r-- | test/__init__.py | 1 | ||||
-rwxr-xr-x | test/profiles.py | 5 | ||||
-rw-r--r-- | test/script_loader.py | 21 | ||||
-rw-r--r-- | test/unit/__init__.py | 2 | ||||
-rw-r--r-- | test/unit/conftest.py | 109 | ||||
-rw-r--r-- | test/unit/test_basic.py (renamed from test/test_unit.py) | 30 | ||||
-rw-r--r-- | test/unit/test_patterns.py | 91 |
10 files changed, 271 insertions, 55 deletions
diff --git a/common/patterns.js b/common/patterns.js index 625be05..635b128 100644 --- a/common/patterns.js +++ b/common/patterns.js @@ -7,15 +7,17 @@ * Redistribution terms are gathered in the `copyright' file. */ -const MAX_URL_PATH_LEN = 12; -const MAX_URL_PATH_CHARS = 255; -const MAX_DOMAIN_LEN = 7; -const MAX_DOMAIN_CHARS = 100; +const MAX = { + URL_PATH_LEN: 12, + URL_PATH_CHARS: 255, + DOMAIN_LEN: 7, + DOMAIN_CHARS: 100 +}; const proto_regex = /^(\w+):\/\/(.*)$/; const user_re = "[^/?#@]+@" -const domain_re = "[^/?#]+"; +const domain_re = "[.a-zA-Z0-9-]+"; const path_re = "[^?#]*"; const query_re = "\\??[^#]*"; @@ -25,11 +27,17 @@ const file_regex = new RegExp(`^(${path_re}).*`); const ftp_regex = new RegExp(`^(${user_re})?(${domain_re})(${path_re}).*`); -function deconstruct_url(url) +function deconstruct_url(url, use_limits=true) { + const max = MAX; + if (!use_limits) { + for (key in MAX) + max[key] = Infinity; + } + const proto_match = proto_regex.exec(url); if (proto_match === null) - return undefined; + throw `bad url '${url}'`; const deco = {proto: proto_match[1]}; @@ -37,20 +45,22 @@ function deconstruct_url(url) deco.path = file_regex.exec(proto_match[2])[1]; } else if (deco.proto === "ftp") { [deco.domain, deco.path] = ftp_regex.exec(proto_match[2]).slice(2, 4); - } else { + } else if (deco.proto === "http" || deco.proto === "https") { const http_match = http_regex.exec(proto_match[2]); if (!http_match) return undefined; [deco.domain, deco.path, deco.query] = http_match.slice(1, 4); + deco.domain = deco.domain.toLowerCase(); + } else { + throw `unsupported protocol in url '${url}'`; } - const leading_dash = deco.path[0] === "/"; deco.trailing_dash = deco.path[deco.path.length - 1] === "/"; if (deco.domain) { - if (deco.domain.length > MAX_DOMAIN_CHARS) { + if (deco.domain.length > max.DOMAIN_CHARS) { const idx = deco.domain.indexOf(".", deco.domain.length - - MAX_DOMAIN_CHARS); + max.DOMAIN_CHARS); if (idx === -1) deco.domain = []; else @@ -59,7 +69,7 @@ function deconstruct_url(url) deco.domain_truncated = true; } - if (deco.path.length > MAX_URL_PATH_CHARS) { + if (deco.path.length > max.URL_PATH_CHARS) { deco.path = deco.path.substring(0, deco.path.lastIndexOf("/")); deco.path_truncated = true; } @@ -67,16 +77,14 @@ function deconstruct_url(url) if (typeof deco.domain === "string") { deco.domain = deco.domain.split("."); - if (deco.domain.splice(0, deco.domain.length - MAX_DOMAIN_LEN).length + if (deco.domain.splice(0, deco.domain.length - max.DOMAIN_LEN).length > 0) deco.domain_truncated = true; } deco.path = deco.path.split("/").filter(s => s !== ""); - if (deco.domain && deco.path.splice(MAX_URL_PATH_LEN).length > 0) + if (deco.domain && deco.path.splice(max.URL_PATH_LEN).length > 0) deco.path_truncated = true; - if (leading_dash || deco.path.length === 0) - deco.path.unshift(""); return deco; } @@ -98,13 +106,14 @@ function* each_domain_pattern(deco) function* each_path_pattern(deco) { - for (let slice = deco.path.length; slice > 0; slice--) { - const path_part = deco.path.slice(0, slice).join("/"); + for (let slice = deco.path.length; slice >= 0; slice--) { + const path_part = ["", ...deco.path.slice(0, slice)].join("/"); const path_wildcards = []; if (slice === deco.path.length && !deco.path_truncated) { if (deco.trailing_dash) yield path_part + "/"; - yield path_part; + if (slice > 0 || deco.proto !== "file") + yield path_part; } if (slice === deco.path.length - 1 && !deco.path_truncated && deco.path[slice] !== "*") @@ -137,5 +146,6 @@ function* each_url_pattern(url) /* * EXPORTS_START * EXPORT each_url_pattern + * EXPORT deconstruct_url * EXPORTS_END */ diff --git a/compute_scripts.awk b/compute_scripts.awk index 2bad3c5..1f3b11e 100644 --- a/compute_scripts.awk +++ b/compute_scripts.awk @@ -105,8 +105,7 @@ function print_exports_code(filename, i, count, export_name) { } } -function wrap_file(filename) { - print "\"use strict\";\n\n({fun: (function() {\n" +function partially_wrap_file(filename) { print_imports_code(filename) printf "\n\n" @@ -114,6 +113,13 @@ function wrap_file(filename) { printf "\n\n" print_exports_code(filename) +} + +function wrap_file(filename) { + print "\"use strict\";\n\n({fun: (function() {\n" + + partially_wrap_file(filename) + print "\n})}).fun();" } @@ -151,7 +157,7 @@ function compute_dependencies(filename, i, count, import_name, next_file) { } function print_usage() { - printf "usage: %2 compute_scripts.awk script_dependencies|wrapped_code FILENAME[...]\n", + printf "usage: %2 compute_scripts.awk script_dependencies|wrapped_code|partially_wrapped_code FILENAME[...]\n", ARGV[0] > "/dev/stderr" exit 1 } @@ -189,6 +195,8 @@ BEGIN { print("exports_init.js") if (compute_dependencies(root_filename) > 0) exit 1 + } else if (operation == "partially_wrapped_code") { + partially_wrap_file(root_filename) } else if (operation == "wrapped_code") { wrap_file(root_filename) } else { @@ -75,11 +75,12 @@ License: AGPL-3+ Comment: Wojtek Kosior promises not to sue even in case of violations of the license. -Files: test/__init__.py test/test_unit.py test/default_profiles/icecat_empty/extensions.json +Files: test/__init__.py test/unit/* + test/default_profiles/icecat_empty/extensions.json Copyright: 2021 Wojtek Kosior <koszko@koszko.org> License: CC0 -Files: test/profiles.py test/script_loader.py +Files: test/profiles.py test/script_loader.py test/unit/conftest.py Copyright: 2021 Wojtek Kosior <koszko@koszko.org> License: GPL-3+ Comment: Wojtek Kosior promises not to sue even in case of violations diff --git a/test/__init__.py b/test/__init__.py index 19b869e..2b351bb 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -1 +1,2 @@ # SPDX-License-Identifier: CC0-1.0 +# Copyright (C) 2021 Wojtek Kosior diff --git a/test/profiles.py b/test/profiles.py index a833097..d6a4efc 100755 --- a/test/profiles.py +++ b/test/profiles.py @@ -43,11 +43,15 @@ def set_profile_proxy(profile, proxy_host, proxy_port): profile.set_preference(f'network.proxy.backup.{proto}', '') profile.set_preference(f'network.proxy.backup.{proto}_port', 0) +def set_profile_console_logging(profile): + profile.set_preference('devtools.console.stdout.content', True) + def firefox_safe_mode(firefox_binary=default_firefox_binary, proxy_host=default_proxy_host, proxy_port=default_proxy_port): profile = webdriver.FirefoxProfile() set_profile_proxy(profile, proxy_host, proxy_port) + set_profile_console_logging(profile) options = Options() options.add_argument('--safe-mode') @@ -61,6 +65,7 @@ def firefox_with_profile(firefox_binary=default_firefox_binary, proxy_port=default_proxy_port): profile = webdriver.FirefoxProfile(profile_dir) set_profile_proxy(profile, proxy_host, proxy_port) + set_profile_console_logging(profile) return webdriver.Firefox(firefox_profile=profile, firefox_binary=firefox_binary) diff --git a/test/script_loader.py b/test/script_loader.py index 22196c3..15269c7 100644 --- a/test/script_loader.py +++ b/test/script_loader.py @@ -49,14 +49,15 @@ def available_scripts(directory): if script_name_regex.match(script.name): yield script -def get_wrapped_script(script_path): +def wrapped_script(script_path, wrap_partially=True): if script_path == 'exports_init.js': with open(script_root / 'MOZILLA_exports_init.js') as script: return script.read() - awk = subprocess.run(['awk', '-f', str(awk_script), 'wrapped_code', - str(script_path)], - stdout=subprocess.PIPE, cwd=script_root, check=True) + command = 'partially_wrapped_code' if wrap_partially else 'wrapped_code' + awk_command = ['awk', '-f', str(awk_script), command, str(script_path)] + awk = subprocess.run(awk_command, stdout=subprocess.PIPE, cwd=script_root, + check=True) return awk.stdout.decode() @@ -67,8 +68,10 @@ def load_script(path, import_dirs): project directory. Return a string containing script from `path` together with all other - scripts it depends on, wrapped in the same way Haketilo's build system wraps - them, with imports properly satisfied. + scripts it depends. Dependencies are wrapped in the same way Haketilo's + build system wraps them, with imports properly satisfied. The main script + being loaded is wrapped partially - it also has its imports satisfied, but + its code is not placed inside an anonymous function, so the """ path = make_relative_path(path) @@ -79,6 +82,8 @@ def load_script(path, import_dirs): str(path), *[str(s) for s in available]], stdout=subprocess.PIPE, cwd=script_root, check=True) - output = awk.stdout.decode() + to_load = awk.stdout.decode().split() + texts = [wrapped_script(path, wrap_partially=(i == len(to_load) - 1)) + for i, path in enumerate(to_load)] - return '\n'.join([get_wrapped_script(path) for path in output.split()]) + return '\n'.join(texts) diff --git a/test/unit/__init__.py b/test/unit/__init__.py new file mode 100644 index 0000000..2b351bb --- /dev/null +++ b/test/unit/__init__.py @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: CC0-1.0 +# Copyright (C) 2021 Wojtek Kosior diff --git a/test/unit/conftest.py b/test/unit/conftest.py new file mode 100644 index 0000000..6877b7a --- /dev/null +++ b/test/unit/conftest.py @@ -0,0 +1,109 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +""" +Common fixtures for Haketilo unit tests +""" + +# This file is part of Haketilo. +# +# Copyright (C) 2021 Wojtek Kosior <koszko@koszko.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this file's +# license. Although I request that you do not make use this code in a +# proprietary program, I am not going to enforce this in court. + +import pytest + +from ..profiles import firefox_safe_mode +from ..server import do_an_internet +from ..script_loader import load_script + +@pytest.fixture(scope="package") +def proxy(): + httpd = do_an_internet() + yield httpd + httpd.shutdown() + +@pytest.fixture(scope="package") +def driver(proxy): + with firefox_safe_mode() as driver: + yield driver + driver.quit() + +script_injecting_script = '''\ +/* + * Selenium by default executes scripts in some weird one-time context. We want + * separately-loaded scripts to be able to access global variables defined + * before, including those declared with `const` or `let`. To achieve that, we + * run our scripts by injecting them into the page inside a <script> tag. We use + * custom properties of the `window` object to communicate with injected code. + */ + +const script_elem = document.createElement('script'); +script_elem.textContent = arguments[0]; + +delete window.haketilo_selenium_return_value; +delete window.haketilo_selenium_exception; +window.returnval = (val => window.haketilo_selenium_return_value = val); +window.arguments = arguments[1]; + +document.body.append(script_elem); + +/* + * To ease debugging, we want this script to forward signal all exceptions from + * the injectee. + */ +try { + if (window.haketilo_selenium_exception !== false) + throw 'Error in injected script! Check your geckodriver.log!'; +} finally { + script_elem.remove(); +} + +return window.haketilo_selenium_return_value; +''' + +def _execute_in_page_context(driver, script, *args): + script = script + '\n;\nwindow.haketilo_selenium_exception = false;' + try: + return driver.execute_script(script_injecting_script, script, args) + except Exception as e: + import sys + lines = enumerate(script.split('\n'), 1) + for err_info in [('Failing script\n',), *lines]: + print(*err_info, file=sys.stderr) + + raise e from None + +@pytest.fixture(scope="package") +def execute_in_page(driver): + def do_execute(script, *args, **kwargs): + if 'page' in kwargs: + driver.get(kwargs['page']) + + return _execute_in_page_context(driver, script, args) + + yield do_execute + +@pytest.fixture(scope="package") +def load_into_page(driver): + def do_load(path, import_dirs, *args, **kwargs): + if 'page' in kwargs: + driver.get(kwargs['page']) + + _execute_in_page_context(driver, load_script(path, import_dirs), args) + + yield do_load diff --git a/test/test_unit.py b/test/unit/test_basic.py index ce46f88..cbe5c8c 100644 --- a/test/test_unit.py +++ b/test/unit/test_basic.py @@ -1,12 +1,11 @@ # SPDX-License-Identifier: CC0-1.0 """ -Haketilo unit tests +Haketilo unit tests - base """ # This file is part of Haketilo # -# Copyright (C) 2021, jahoti # Copyright (C) 2021, Wojtek Kosior # # This program is free software: you can redistribute it and/or modify @@ -19,23 +18,8 @@ Haketilo unit tests # CC0 1.0 Universal License for more details. import pytest -from .profiles import firefox_safe_mode -from .server import do_an_internet -from .script_loader import load_script -@pytest.fixture(scope="module") -def proxy(): - httpd = do_an_internet() - yield httpd - httpd.shutdown() - -@pytest.fixture(scope="module") -def driver(proxy): - with firefox_safe_mode() as driver: - yield driver - driver.quit() - -def test_proxy(driver): +def test_driver(driver): """ A trivial test case that verifies mocked web pages served by proxy can be accessed by the browser driven. @@ -46,12 +30,12 @@ def test_proxy(driver): title = driver.execute_script('return arguments[0].innerText;', element) assert "Schrodinger's Document" in title -def test_script_loader(driver): +def test_script_loader(execute_in_page, load_into_page): """ A trivial test case that verifies Haketilo's .js files can be properly loaded into a test page together with their dependencies. """ - driver.get('http://gotmyowndoma.in') - driver.execute_script(load_script('common/stored_types.js', ['common'])) - get_var_prefix = 'return window.haketilo_exports.TYPE_PREFIX.VAR;' - assert driver.execute_script(get_var_prefix) == '_' + load_into_page('common/stored_types.js', ['common'], + page='https://gotmyowndoma.in') + + assert execute_in_page('returnval(TYPE_PREFIX.VAR);') == '_' diff --git a/test/unit/test_patterns.py b/test/unit/test_patterns.py new file mode 100644 index 0000000..4162fc0 --- /dev/null +++ b/test/unit/test_patterns.py @@ -0,0 +1,91 @@ +# SPDX-License-Identifier: CC0-1.0 + +""" +Haketilo unit tests - URL patterns +""" + +# This file is part of Haketilo +# +# Copyright (C) 2021, Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the CC0 1.0 Universal License as published by +# the Creative Commons Corporation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# CC0 1.0 Universal License for more details. + +import pytest + +from ..script_loader import load_script + +@pytest.fixture(scope="session") +def patterns_code(): + yield load_script('common/patterns.js', ['common']) + +def test_regexes(execute_in_page, patterns_code): + """ + patterns.js contains regexes used for URL parsing. + Verify they work properly. + """ + execute_in_page(patterns_code, page='https://gotmyowndoma.in') + + valid_url = 'https://example.com/a/b?ver=1.2.3#heading2' + valid_url_rest = 'example.com/a/b?ver=1.2.3#heading2' + + # Test matching of URL protocol. + match = execute_in_page('returnval(proto_regex.exec(arguments[0]));', + valid_url) + assert match + assert match[1] == 'https' + assert match[2] == valid_url_rest + + match = execute_in_page('returnval(proto_regex.exec(arguments[0]));', + '://bad-url.missing/protocol') + assert match is None + + # Test matching of http(s) URLs. + match = execute_in_page('returnval(http_regex.exec(arguments[0]));', + valid_url_rest) + assert match + assert match[1] == 'example.com' + assert match[2] == '/a/b' + assert match[3] == '?ver=1.2.3' + + match = execute_in_page('returnval(http_regex.exec(arguments[0]));', + 'another.example.com') + assert match + assert match[1] == 'another.example.com' + assert match[2] == '' + assert match[3] == '' + + match = execute_in_page('returnval(http_regex.exec(arguments[0]));', + '/bad/http/example') + assert match == None + + # Test matching of file URLs. + match = execute_in_page('returnval(file_regex.exec(arguments[0]));', + '/good/file/example') + assert match + assert match[1] == '/good/file/example' + + # Test matching of ftp URLs. + match = execute_in_page('returnval(ftp_regex.exec(arguments[0]));', + 'example.com/a/b#heading2') + assert match + assert match[1] is None + assert match[2] == 'example.com' + assert match[3] == '/a/b' + + match = execute_in_page('returnval(ftp_regex.exec(arguments[0]));', + 'some_user@localhost') + assert match + assert match[1] == 'some_user@' + assert match[2] == 'localhost' + assert match[3] == '' + + match = execute_in_page('returnval(ftp_regex.exec(arguments[0]));', + '@bad.url/') + assert match is None |