aboutsummaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
Diffstat (limited to 'test')
-rw-r--r--test/__init__.py2
-rw-r--r--test/__main__.py59
-rw-r--r--test/data/pages/gotmyowndomain.html35
-rw-r--r--test/data/pages/gotmyowndomain_https.html35
-rw-r--r--test/default_profiles/icecat_empty/extensions.json1
-rw-r--r--test/misc_constants.py70
-rwxr-xr-xtest/profiles.py102
-rw-r--r--test/proxy_core.py141
-rw-r--r--test/script_loader.py89
-rwxr-xr-xtest/server.py108
-rw-r--r--test/unit/__init__.py2
-rw-r--r--test/unit/conftest.py115
-rw-r--r--test/unit/test_basic.py41
-rw-r--r--test/unit/test_patterns.py154
-rw-r--r--test/unit/test_patterns_query_tree.py475
-rw-r--r--test/world_wide_library.py39
16 files changed, 1468 insertions, 0 deletions
diff --git a/test/__init__.py b/test/__init__.py
new file mode 100644
index 0000000..2b351bb
--- /dev/null
+++ b/test/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: CC0-1.0
+# Copyright (C) 2021 Wojtek Kosior
diff --git a/test/__main__.py b/test/__main__.py
new file mode 100644
index 0000000..c3437ea
--- /dev/null
+++ b/test/__main__.py
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+"""
+Run a Firefox-type browser with WebDriver attached and Python console open
+"""
+
+# This file is part of Haketilo.
+#
+# Copyright (C) 2021 jahoti <jahoti@tilde.team>
+# Copyright (C) 2021 Wojtek Kosior <koszko@koszko.org>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+import sys
+import time
+import code
+
+from .server import do_an_internet
+from .misc_constants import *
+from .profiles import firefox_safe_mode
+
+def fail(msg, error_code):
+ print('Error:', msg)
+ print('Usage:', sys.argv[0], '[certificates_directory] [proxy_port]')
+ sys.exit(error_code)
+
+certdir = Path(sys.argv[1]).resolve() if len(sys.argv) > 1 else default_cert_dir
+if not certdir.is_dir():
+ fail('selected certificate directory does not exist.', 2)
+
+port = sys.argv[2] if len(sys.argv) > 2 else str(default_proxy_port)
+if not port.isnumeric():
+ fail('port must be an integer.', 3)
+
+httpd = do_an_internet(certdir, int(port))
+driver = firefox_safe_mode(proxy_port=int(port))
+
+print("You can now control the browser through 'driver' object")
+
+code.InteractiveConsole(locals=globals()).interact()
+
+driver.quit()
+httpd.shutdown()
diff --git a/test/data/pages/gotmyowndomain.html b/test/data/pages/gotmyowndomain.html
new file mode 100644
index 0000000..42c26cc
--- /dev/null
+++ b/test/data/pages/gotmyowndomain.html
@@ -0,0 +1,35 @@
+<!DOCTYPE html>
+<!--
+ SPDX-License-Identifier: AGPL-3.0-or-later
+
+ Sample testig page
+
+ This file is part of Haketilo.
+
+ Copyright (C) 2021 jahoti <jahoti@tilde.team>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as
+ published by the Free Software Foundation, either version 3 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ -->
+<html>
+ <head>
+ <meta name=charset value="latin1">
+ <title>Schrodinger's Document</title>
+ </head>
+ <body>
+ A nice, simple page for testing.
+ <script>
+ document.write('<p><b>Or so you thought...</b></p>');
+ </script>
+ </body>
+</html>
diff --git a/test/data/pages/gotmyowndomain_https.html b/test/data/pages/gotmyowndomain_https.html
new file mode 100644
index 0000000..95c0be4
--- /dev/null
+++ b/test/data/pages/gotmyowndomain_https.html
@@ -0,0 +1,35 @@
+<!DOCTYPE html>
+<!--
+ SPDX-License-Identifier: AGPL-3.0-or-later
+
+ Sample testig page to serve over HTTPS
+
+ This file is part of Haketilo.
+
+ Copyright (C) 2021 jahoti <jahoti@tilde.team>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as
+ published by the Free Software Foundation, either version 3 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ -->
+<html>
+ <head>
+ <meta name=charset value="latin1">
+ <title>Schrodinger's Document</title>
+ </head>
+ <body>
+ A nice, simple page for testing (using HTTPS).
+ <script>
+ document.write('<p><b>Or so you thought...</b></p>');
+ </script>
+ </body>
+</html>
diff --git a/test/default_profiles/icecat_empty/extensions.json b/test/default_profiles/icecat_empty/extensions.json
new file mode 100644
index 0000000..5f74ff3
--- /dev/null
+++ b/test/default_profiles/icecat_empty/extensions.json
@@ -0,0 +1 @@
+{"schemaVersion":25,"addons":[{"id":"jid1-KtlZuoiikVfFew@jetpack","location":"app-global","userDisabled":true,"path":"/usr/lib/icecat/browser/extensions/jid1-KtlZuoiikVfFew@jetpack"},{"id":"uBlock0@raymondhill.net","location":"app-global","userDisabled":true,"path":"/usr/lib/icecat/browser/extensions/uBlock0@raymondhill.net.xpi"},{"id":"SubmitMe@0xbeef.coffee","location":"app-global","userDisabled":true,"path":"/usr/lib/icecat/browser/extensions/SubmitMe@0xbeef.coffee"},{"id":"FreeUSPS@0xbeef.coffee","location":"app-global","userDisabled":true,"path":"/usr/lib/icecat/browser/extensions/FreeUSPS@0xbeef.coffee"},{"id":"tortm-browser-button@jeremybenthum","location":"app-global","userDisabled":true,"path":"/usr/lib/icecat/browser/extensions/tortm-browser-button@jeremybenthum"},{"id":"tprb.addon@searxes.danwin1210.me","location":"app-global","userDisabled":true,"path":"/usr/lib/icecat/browser/extensions/tprb.addon@searxes.danwin1210.me"},{"id":"SimpleSumOfUs@0xbeef.coffee","location":"app-global","userDisabled":true,"path":"/usr/lib/icecat/browser/extensions/SimpleSumOfUs@0xbeef.coffee"}]} \ No newline at end of file
diff --git a/test/misc_constants.py b/test/misc_constants.py
new file mode 100644
index 0000000..22432a6
--- /dev/null
+++ b/test/misc_constants.py
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+"""
+Miscellaneous data that were found useful
+"""
+
+# This file is part of Haketilo.
+#
+# Copyright (C) 2021 jahoti <jahoti@tilde.team>
+# Copyright (C) 2021 Wojtek Kosior <koszko@koszko.org>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+from pathlib import Path
+
+here = Path(__file__).resolve().parent
+
+default_firefox_binary = '/usr/lib/icecat/icecat'
+# The browser might be loading some globally-installed add-ons by default. They
+# could interfere with the tests, so we'll disable all of them.
+default_clean_profile_dir = here / 'default_profile' / 'icecat_empty'
+
+default_proxy_host = '127.0.0.1'
+default_proxy_port = 1337
+
+default_cert_dir = here / 'certs'
+
+mime_types = {
+ "7z": "application/x-7z-compressed", "oga": "audio/ogg",
+ "abw": "application/x-abiword", "ogv": "video/ogg",
+ "arc": "application/x-freearc", "ogx": "application/ogg",
+ "bin": "application/octet-stream", "opus": "audio/opus",
+ "bz": "application/x-bzip", "otf": "font/otf",
+ "bz2": "application/x-bzip2", "pdf": "application/pdf",
+ "css": "text/css", "png": "image/png",
+ "csv": "text/csv", "sh": "application/x-sh",
+ "gif": "image/gif", "svg": "image/svg+xml",
+ "gz": "application/gzip", "tar": "application/x-tar",
+ "htm": "text/html", "ts": "video/mp2t",
+ "html": "text/html", "ttf": "font/ttf",
+ "ico": "image/vnd.microsoft.icon", "txt": "text/plain",
+ "js": "text/javascript", "wav": "audio/wav",
+ "jpeg": "image/jpeg", "weba": "audio/webm",
+ "jpg": "image/jpeg", "webm": "video/webm",
+ "json": "application/json", "woff": "font/woff",
+ "mjs": "text/javascript", "woff2": "font/woff2",
+ "mp3": "audio/mpeg", "xhtml": "application/xhtml+xml",
+ "mp4": "video/mp4", "zip": "application/zip",
+ "mpeg": "video/mpeg",
+ "odp": "application/vnd.oasis.opendocument.presentation",
+ "ods": "application/vnd.oasis.opendocument.spreadsheet",
+ "odt": "application/vnd.oasis.opendocument.text",
+ "xml": "application/xml" # text/xml if readable from casual users
+}
diff --git a/test/profiles.py b/test/profiles.py
new file mode 100755
index 0000000..1530aea
--- /dev/null
+++ b/test/profiles.py
@@ -0,0 +1,102 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""
+Browser profiles and Selenium driver initialization
+"""
+
+# This file is part of Haketilo.
+#
+# Copyright (C) 2021 Wojtek Kosior <koszko@koszko.org>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this file's
+# license. Although I request that you do not make use this code in a
+# proprietary program, I am not going to enforce this in court.
+
+from selenium import webdriver
+from selenium.webdriver.firefox.options import Options
+import time
+
+from .misc_constants import *
+
+class HaketiloFirefox(webdriver.Firefox):
+ """
+ This wrapper class around selenium.webdriver.Firefox adds a `loaded_scripts`
+ instance property that gets resetted to an empty array every time the
+ `get()` method is called.
+ """
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.reset_loaded_scripts()
+
+ def reset_loaded_scripts(self):
+ self.loaded_scripts = []
+
+ def get(self, *args, **kwargs):
+ self.reset_loaded_scripts()
+ super().get(*args, **kwargs)
+
+def set_profile_proxy(profile, proxy_host, proxy_port):
+ """
+ Create a Firefox profile that uses the specified HTTP proxy for all
+ protocols.
+ """
+ # proxy type 1 designates "manual"
+ profile.set_preference('network.proxy.type', 1)
+ profile.set_preference('network.proxy.no_proxies_on', '')
+ profile.set_preference('network.proxy.share_proxy_settings', True)
+
+ for proto in ['http', 'ftp', 'socks', 'ssl']:
+ profile.set_preference(f'network.proxy.{proto}', proxy_host)
+ profile.set_preference(f'network.proxy.{proto}_port', proxy_port)
+ profile.set_preference(f'network.proxy.backup.{proto}', '')
+ profile.set_preference(f'network.proxy.backup.{proto}_port', 0)
+
+def set_profile_console_logging(profile):
+ profile.set_preference('devtools.console.stdout.content', True)
+
+def firefox_safe_mode(firefox_binary=default_firefox_binary,
+ proxy_host=default_proxy_host,
+ proxy_port=default_proxy_port):
+ """
+ Initialize a Firefox instance controlled by selenium. The instance is
+ started in safe mode.
+ """
+ profile = webdriver.FirefoxProfile()
+ set_profile_proxy(profile, proxy_host, proxy_port)
+ set_profile_console_logging(profile)
+
+ options = Options()
+ options.add_argument('--safe-mode')
+
+ return HaketiloFirefox(options=options, firefox_profile=profile,
+ firefox_binary=firefox_binary)
+
+def firefox_with_profile(firefox_binary=default_firefox_binary,
+ profile_dir=default_clean_profile_dir,
+ proxy_host=default_proxy_host,
+ proxy_port=default_proxy_port):
+ """
+ Initialize a Firefox instance controlled by selenium. The instance is
+ started using an empty profile (either the default one or the one passed to
+ `configure` script). The empty profile is meant to make Firefox start with
+ globally-installed extensions disabled.
+ """
+ profile = webdriver.FirefoxProfile(profile_dir)
+ set_profile_proxy(profile, proxy_host, proxy_port)
+ set_profile_console_logging(profile)
+
+ return HaketiloFirefox(firefox_profile=profile,
+ firefox_binary=firefox_binary)
diff --git a/test/proxy_core.py b/test/proxy_core.py
new file mode 100644
index 0000000..d31302a
--- /dev/null
+++ b/test/proxy_core.py
@@ -0,0 +1,141 @@
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""
+The core for a "virtual network" proxy.
+"""
+
+# This file is part of Haketilo.
+#
+# Copyright (c) 2015, inaz2
+# Copyright (C) 2021 jahoti <jahoti@tilde.team>
+# Copyright (C) 2021 Wojtek Kosior <koszko@koszko.org>
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of proxy2 nor the names of its contributors may be used to
+# endorse or promote products derived from this software without specific
+# prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this file's
+# license. Although I request that you do not make use this code in a way
+# incompliant with the license, I am not going to enforce this in court.
+
+from pathlib import Path
+import socket, ssl, subprocess, sys, threading
+from http.server import HTTPServer, BaseHTTPRequestHandler
+from socketserver import ThreadingMixIn
+
+lock = threading.Lock()
+
+class ProxyRequestHandler(BaseHTTPRequestHandler):
+ """
+ Handles a network request made to the proxy. Configures SSL encryption when
+ needed.
+ """
+ def __init__(self, *args, **kwargs):
+ """
+ Initialize self. Uses the same arguments as
+ http.server.BaseHTTPRequestHandler's constructor but also expect a
+ `certdir` keyword argument with appropriate path.
+ """
+ self.certdir = Path(kwargs.pop('certdir')).resolve()
+ super().__init__(*args, **kwargs)
+
+ def log_error(self, *args, **kwargs):
+ """
+ Like log_error in http.server.BaseHTTPRequestHandler but suppresses
+ "Request timed out: timeout('timed out',)".
+ """
+ if not isinstance(args[0], socket.timeout):
+ super().log_error(*args, **kwargs)
+
+ def get_cert(self, hostname):
+ """
+ If needed, generate a signed x509 certificate for `hostname`. Return
+ paths to certificate's key file and to certificate itself in a tuple.
+ """
+ root_keyfile = self.certdir / 'rootCA.key'
+ root_certfile = self.certdir / 'rootCA.pem'
+ keyfile = self.certdir / 'site.key'
+ certfile = self.certdir / f'{hostname}.crt'
+
+ with lock:
+ requestfile = self.certdir / f'{hostname}.csr'
+ if not certfile.exists():
+ subprocess.run([
+ 'openssl', 'req', '-new', '-key', str(keyfile),
+ '-subj', f'/CN={hostname}', '-out', str(requestfile)
+ ], check=True)
+ subprocess.run([
+ 'openssl', 'x509', '-req', '-in', str(requestfile),
+ '-CA', str(root_certfile), '-CAkey', str(root_keyfile),
+ '-CAcreateserial', '-out', str(certfile), '-days', '1024'
+ ], check=True)
+
+ return keyfile, certfile
+
+ def do_CONNECT(self):
+ """Wrap the connection with SSL using on-demand signed certificate."""
+ hostname = self.path.split(':')[0]
+ sslargs = {'server_side': True}
+ sslargs['keyfile'], sslargs['certfile'] = self.get_cert(hostname)
+
+ self.send_response(200)
+ self.end_headers()
+
+ self.connection = ssl.wrap_socket(self.connection, **sslargs)
+ self.rfile = self.connection.makefile('rb', self.rbufsize)
+ self.wfile = self.connection.makefile('wb', self.wbufsize)
+
+ connection_header = self.headers.get('Proxy-Connection', '').lower()
+ self.close_connection = int(connection_header == 'close')
+
+ def do_GET(self):
+ content_length = int(self.headers.get('Content-Length', 0))
+ req_body = self.rfile.read(content_length) if content_length else None
+
+ if self.path[0] == '/':
+ secure = 's' if isinstance(self.connection, ssl.SSLSocket) else ''
+ self.path = f'http{secure}://{self.headers["Host"]}{self.path}'
+
+ self.handle_request(req_body)
+
+ do_OPTIONS = do_DELETE = do_PUT = do_HEAD = do_POST = do_GET
+
+ def handle_request(self, req_body):
+ """Default handler that does nothing. Please override."""
+ pass
+
+
+class ThreadingHTTPServer(ThreadingMixIn, HTTPServer):
+ """The actual proxy server"""
+ address_family, daemon_threads = socket.AF_INET6, True
+
+ def handle_error(self, request, client_address):
+ """
+ Like handle_error in http.server.HTTPServer but suppresses socket/ssl
+ related errors.
+ """
+ cls, e = sys.exc_info()[:2]
+ if not (cls is socket.error or cls is ssl.SSLError):
+ return super().handle_error(request, client_address)
diff --git a/test/script_loader.py b/test/script_loader.py
new file mode 100644
index 0000000..15269c7
--- /dev/null
+++ b/test/script_loader.py
@@ -0,0 +1,89 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""
+Loading of parts of Haketilo source for testing in browser
+"""
+
+# This file is part of Haketilo.
+#
+# Copyright (C) 2021 Wojtek Kosior <koszko@koszko.org>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this file's
+# license. Although I request that you do not make use this code in a
+# proprietary program, I am not going to enforce this in court.
+
+from pathlib import Path
+import subprocess, re
+
+from .misc_constants import *
+
+script_root = here.parent
+awk_script = script_root / 'compute_scripts.awk'
+
+def make_relative_path(path):
+ path = Path(path)
+
+ if path.is_absolute():
+ path = path.relative_to(script_root)
+
+ return path
+
+"""Used to ignore hidden files and emacs auto-save files."""
+script_name_regex = re.compile(r'^[^.#].*\.js$')
+
+def available_scripts(directory):
+ for script in directory.rglob('*.js'):
+ if script_name_regex.match(script.name):
+ yield script
+
+def wrapped_script(script_path, wrap_partially=True):
+ if script_path == 'exports_init.js':
+ with open(script_root / 'MOZILLA_exports_init.js') as script:
+ return script.read()
+
+ command = 'partially_wrapped_code' if wrap_partially else 'wrapped_code'
+ awk_command = ['awk', '-f', str(awk_script), command, str(script_path)]
+ awk = subprocess.run(awk_command, stdout=subprocess.PIPE, cwd=script_root,
+ check=True)
+
+ return awk.stdout.decode()
+
+def load_script(path, import_dirs):
+ """
+ `path` and `import_dirs` are .js file path and a list of directory paths,
+ respectively. They may be absolute or specified relative to Haketilo's
+ project directory.
+
+ Return a string containing script from `path` together with all other
+ scripts it depends. Dependencies are wrapped in the same way Haketilo's
+ build system wraps them, with imports properly satisfied. The main script
+ being loaded is wrapped partially - it also has its imports satisfied, but
+ its code is not placed inside an anonymous function, so the
+ """
+ path = make_relative_path(path)
+
+ import_dirs = [make_relative_path(dir) for dir in import_dirs]
+ available = [s for dir in import_dirs for s in available_scripts(dir)]
+
+ awk = subprocess.run(['awk', '-f', str(awk_script), 'script_dependencies',
+ str(path), *[str(s) for s in available]],
+ stdout=subprocess.PIPE, cwd=script_root, check=True)
+
+ to_load = awk.stdout.decode().split()
+ texts = [wrapped_script(path, wrap_partially=(i == len(to_load) - 1))
+ for i, path in enumerate(to_load)]
+
+ return '\n'.join(texts)
diff --git a/test/server.py b/test/server.py
new file mode 100755
index 0000000..6013955
--- /dev/null
+++ b/test/server.py
@@ -0,0 +1,108 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+"""
+A modular "virtual network" proxy,
+wrapping the classes in proxy_core.py
+"""
+
+# This file is part of Haketilo.
+#
+# Copyright (C) 2021 jahoti <jahoti@tilde.team>
+# Copyright (C) 2021 Wojtek Kosior <koszko@koszko.org>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+from pathlib import Path
+from urllib.parse import parse_qs
+from threading import Thread
+
+from .proxy_core import ProxyRequestHandler, ThreadingHTTPServer
+from .misc_constants import *
+from .world_wide_library import catalog as internet
+
+class RequestHijacker(ProxyRequestHandler):
+ def handle_request(self, req_body):
+ path_components = self.path.split('?', maxsplit=1)
+ path = path_components[0]
+ try:
+ # Response format: (status_code, headers (dict. of strings),
+ # body as bytes or filename containing body as string)
+ if path in internet:
+ info = internet[path]
+ if type(info) is tuple:
+ status_code, headers, body_file = info
+ resp_body = b''
+ if body_file is not None:
+ if 'Content-Type' not in headers:
+ ext = body_file.suffix[1:]
+ if ext and ext in mime_types:
+ headers['Content-Type'] = mime_types[ext]
+
+ with open(body_file, mode='rb') as f:
+ resp_body = f.read()
+ else:
+ # A function to evaluate to get the response
+ get_params, post_params = {}, {}
+ if len(path_components) == 2:
+ get_params = parse_qs(path_components[1])
+
+ # Parse POST parameters; currently only supports
+ # application/x-www-form-urlencoded
+ if req_body:
+ post_params = parse_qs(req_body.encode())
+
+ status_code, headers, resp_body = info(self.command, get_params, post_params)
+ if type(resp_body) == str:
+ resp_body = resp_body.encode()
+
+ if type(status_code) != int or status_code <= 0:
+ raise Exception('Invalid status code %r' % status_code)
+
+ for header, header_value in headers.items():
+ if type(header) != str:
+ raise Exception('Invalid header key %r' % header)
+
+ elif type(header_value) != str:
+ raise Exception('Invalid header value %r' % header_value)
+ else:
+ status_code, headers = 404, {'Content-Type': 'text/plain'}
+ resp_body = b'Handler for this URL not found.'
+
+ except Exception as e:
+ status_code, headers, resp_body = 500, {'Content-Type': 'text/plain'}, b'Internal Error:\n' + repr(e).encode()
+
+ headers['Content-Length'] = str(len(resp_body))
+ self.send_response(status_code)
+ for header, header_value in headers.items():
+ self.send_header(header, header_value)
+
+ self.end_headers()
+ if resp_body:
+ self.wfile.write(resp_body)
+
+def do_an_internet(certdir=default_cert_dir, port=default_proxy_port):
+ """Start up the proxy/server"""
+ class RequestHijackerWithCertdir(RequestHijacker):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, certdir=certdir, **kwargs)
+
+ httpd = ThreadingHTTPServer(('', port), RequestHijackerWithCertdir)
+ Thread(target=httpd.serve_forever).start()
+
+ return httpd
diff --git a/test/unit/__init__.py b/test/unit/__init__.py
new file mode 100644
index 0000000..2b351bb
--- /dev/null
+++ b/test/unit/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: CC0-1.0
+# Copyright (C) 2021 Wojtek Kosior
diff --git a/test/unit/conftest.py b/test/unit/conftest.py
new file mode 100644
index 0000000..1500006
--- /dev/null
+++ b/test/unit/conftest.py
@@ -0,0 +1,115 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""
+Common fixtures for Haketilo unit tests
+"""
+
+# This file is part of Haketilo.
+#
+# Copyright (C) 2021 Wojtek Kosior <koszko@koszko.org>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this file's
+# license. Although I request that you do not make use this code in a
+# proprietary program, I am not going to enforce this in court.
+
+import pytest
+
+from ..profiles import firefox_safe_mode
+from ..server import do_an_internet
+from ..script_loader import load_script
+
+@pytest.fixture(scope="package")
+def proxy():
+ httpd = do_an_internet()
+ yield httpd
+ httpd.shutdown()
+
+@pytest.fixture(scope="package")
+def driver(proxy):
+ with firefox_safe_mode() as driver:
+ yield driver
+ driver.quit()
+
+script_injecting_script = '''\
+/*
+ * Selenium by default executes scripts in some weird one-time context. We want
+ * separately-loaded scripts to be able to access global variables defined
+ * before, including those declared with `const` or `let`. To achieve that, we
+ * run our scripts by injecting them into the page inside a <script> tag. We use
+ * custom properties of the `window` object to communicate with injected code.
+ */
+
+const script_elem = document.createElement('script');
+script_elem.textContent = arguments[0];
+
+delete window.haketilo_selenium_return_value;
+delete window.haketilo_selenium_exception;
+window.returnval = (val => window.haketilo_selenium_return_value = val);
+window.arguments = arguments[1];
+
+document.body.append(script_elem);
+
+/*
+ * To ease debugging, we want this script to forward signal all exceptions from
+ * the injectee.
+ */
+try {
+ if (window.haketilo_selenium_exception !== false)
+ throw 'Error in injected script! Check your geckodriver.log!';
+} finally {
+ script_elem.remove();
+}
+
+return window.haketilo_selenium_return_value;
+'''
+
+def _execute_in_page_context(driver, script, args):
+ script = script + '\n;\nwindow.haketilo_selenium_exception = false;'
+ driver.loaded_scripts.append(script)
+ try:
+ return driver.execute_script(script_injecting_script, script, args)
+ except Exception as e:
+ import sys
+
+ print("Scripts loaded since driver's last get() method call:",
+ file=sys.stderr)
+
+ for script in driver.loaded_scripts:
+ lines = enumerate(script.split('\n'), 1)
+ for err_info in [('===',), *lines]:
+ print(*err_info, file=sys.stderr)
+
+ raise e from None
+
+@pytest.fixture(scope="package")
+def execute_in_page(driver):
+ def do_execute(script, *args, **kwargs):
+ if 'page' in kwargs:
+ driver.get(kwargs['page'])
+
+ return _execute_in_page_context(driver, script, args)
+
+ yield do_execute
+
+@pytest.fixture(scope="package")
+def load_into_page(driver):
+ def do_load(path, import_dirs, *args, **kwargs):
+ if 'page' in kwargs:
+ driver.get(kwargs['page'])
+
+ _execute_in_page_context(driver, load_script(path, import_dirs), args)
+
+ yield do_load
diff --git a/test/unit/test_basic.py b/test/unit/test_basic.py
new file mode 100644
index 0000000..cbe5c8c
--- /dev/null
+++ b/test/unit/test_basic.py
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: CC0-1.0
+
+"""
+Haketilo unit tests - base
+"""
+
+# This file is part of Haketilo
+#
+# Copyright (C) 2021, Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the CC0 1.0 Universal License as published by
+# the Creative Commons Corporation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# CC0 1.0 Universal License for more details.
+
+import pytest
+
+def test_driver(driver):
+ """
+ A trivial test case that verifies mocked web pages served by proxy can be
+ accessed by the browser driven.
+ """
+ for proto in ['http://', 'https://']:
+ driver.get(proto + 'gotmyowndoma.in')
+ element = driver.find_element_by_tag_name('title')
+ title = driver.execute_script('return arguments[0].innerText;', element)
+ assert "Schrodinger's Document" in title
+
+def test_script_loader(execute_in_page, load_into_page):
+ """
+ A trivial test case that verifies Haketilo's .js files can be properly
+ loaded into a test page together with their dependencies.
+ """
+ load_into_page('common/stored_types.js', ['common'],
+ page='https://gotmyowndoma.in')
+
+ assert execute_in_page('returnval(TYPE_PREFIX.VAR);') == '_'
diff --git a/test/unit/test_patterns.py b/test/unit/test_patterns.py
new file mode 100644
index 0000000..802bf4e
--- /dev/null
+++ b/test/unit/test_patterns.py
@@ -0,0 +1,154 @@
+# SPDX-License-Identifier: CC0-1.0
+
+"""
+Haketilo unit tests - URL patterns
+"""
+
+# This file is part of Haketilo
+#
+# Copyright (C) 2021, Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the CC0 1.0 Universal License as published by
+# the Creative Commons Corporation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# CC0 1.0 Universal License for more details.
+
+import pytest
+
+from ..script_loader import load_script
+
+@pytest.fixture(scope="session")
+def patterns_code():
+ yield load_script('common/patterns.js', ['common'])
+
+def test_regexes(execute_in_page, patterns_code):
+ """
+ patterns.js contains regexes used for URL parsing.
+ Verify they work properly.
+ """
+ execute_in_page(patterns_code, page='https://gotmyowndoma.in')
+
+ valid_url = 'https://example.com/a/b?ver=1.2.3#heading2'
+ valid_url_rest = 'example.com/a/b?ver=1.2.3#heading2'
+
+ # Test matching of URL protocol.
+ match = execute_in_page('returnval(proto_regex.exec(arguments[0]));',
+ valid_url)
+ assert match
+ assert match[1] == 'https'
+ assert match[2] == valid_url_rest
+
+ match = execute_in_page('returnval(proto_regex.exec(arguments[0]));',
+ '://bad-url.missing/protocol')
+ assert match is None
+
+ # Test matching of http(s) URLs.
+ match = execute_in_page('returnval(http_regex.exec(arguments[0]));',
+ valid_url_rest)
+ assert match
+ assert match[1] == 'example.com'
+ assert match[2] == '/a/b'
+ assert match[3] == '?ver=1.2.3'
+
+ match = execute_in_page('returnval(http_regex.exec(arguments[0]));',
+ 'another.example.com')
+ assert match
+ assert match[1] == 'another.example.com'
+ assert match[2] == ''
+ assert match[3] == ''
+
+ match = execute_in_page('returnval(http_regex.exec(arguments[0]));',
+ '/bad/http/example')
+ assert match == None
+
+ # Test matching of file URLs.
+ match = execute_in_page('returnval(file_regex.exec(arguments[0]));',
+ '/good/file/example')
+ assert match
+ assert match[1] == '/good/file/example'
+
+ # Test matching of ftp URLs.
+ match = execute_in_page('returnval(ftp_regex.exec(arguments[0]));',
+ 'example.com/a/b#heading2')
+ assert match
+ assert match[1] is None
+ assert match[2] == 'example.com'
+ assert match[3] == '/a/b'
+
+ match = execute_in_page('returnval(ftp_regex.exec(arguments[0]));',
+ 'some_user@localhost')
+ assert match
+ assert match[1] == 'some_user@'
+ assert match[2] == 'localhost'
+ assert match[3] == ''
+
+ match = execute_in_page('returnval(ftp_regex.exec(arguments[0]));',
+ '@bad.url/')
+ assert match is None
+
+def test_deconstruct_url(execute_in_page, patterns_code):
+ """
+ patterns.js contains deconstruct_url() function that handles URL parsing.
+ Verify it works properly.
+ """
+ execute_in_page(patterns_code, page='https://gotmyowndoma.in')
+
+ deco = execute_in_page('returnval(deconstruct_url(arguments[0]));',
+ 'https://eXaMpLe.com/a/b?ver=1.2.3#heading2')
+ assert deco
+ assert deco['trailing_slash'] == False
+ assert deco['proto'] == 'https'
+ assert deco['domain'] == ['example', 'com']
+ assert deco['path'] == ['a', 'b']
+
+ deco = execute_in_page('returnval(deconstruct_url(arguments[0]));',
+ 'http://**.example.com/')
+ assert deco
+ assert deco['trailing_slash'] == True
+ assert deco['proto'] == 'http'
+ assert deco['domain'] == ['**', 'example', 'com']
+ assert deco['path'] == []
+
+ deco = execute_in_page('returnval(deconstruct_url(arguments[0]));',
+ 'ftp://user@ftp.example.com/all///passwords.txt/')
+ assert deco
+ assert deco['trailing_slash'] == True
+ assert deco['proto'] == 'ftp'
+ assert deco['domain'] == ['ftp', 'example', 'com']
+ assert deco['path'] == ['all', 'passwords.txt']
+
+ deco = execute_in_page('returnval(deconstruct_url(arguments[0]));',
+ 'ftp://mirror.edu.pl.eu.org')
+ assert deco
+ assert deco['trailing_slash'] == False
+ assert deco['proto'] == 'ftp'
+ assert deco['domain'] == ['mirror', 'edu', 'pl', 'eu', 'org']
+ assert deco['path'] == []
+
+ deco = execute_in_page('returnval(deconstruct_url(arguments[0]));',
+ 'file:///mnt/parabola_chroot///etc/passwd')
+ assert deco
+ assert deco['trailing_slash'] == False
+ assert deco['proto'] == 'file'
+ assert deco['path'] == ['mnt', 'parabola_chroot', 'etc', 'passwd']
+ assert 'domain' not in deco
+
+ for bad_url in [
+ '://bad-url.missing/protocol',
+ 'http:/example.com/a/b',
+ 'unknown://example.com/a/b',
+ 'idontfancypineapple',
+ 'ftp://@example.org/',
+ 'https:///some/path/',
+ 'file://non-absolute/path'
+ ]:
+ with pytest.raises(Exception, match=r'Error in injected script'):
+ deco = execute_in_page('returnval(deconstruct_url(arguments[0]));',
+ bad_url)
+
+ # at some point we might also consider testing url deconstruction with
+ # length limits...
diff --git a/test/unit/test_patterns_query_tree.py b/test/unit/test_patterns_query_tree.py
new file mode 100644
index 0000000..e282592
--- /dev/null
+++ b/test/unit/test_patterns_query_tree.py
@@ -0,0 +1,475 @@
+# SPDX-License-Identifier: CC0-1.0
+
+"""
+Haketilo unit tests - URL patterns
+"""
+
+# This file is part of Haketilo
+#
+# Copyright (C) 2021, Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the CC0 1.0 Universal License as published by
+# the Creative Commons Corporation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# CC0 1.0 Universal License for more details.
+
+import pytest
+
+from ..script_loader import load_script
+
+@pytest.fixture(scope="session")
+def patterns_tree_code():
+ yield load_script('common/patterns_query_tree.js', ['common'])
+
+def test_modify_branch(execute_in_page, patterns_tree_code):
+ """
+ patterns_query_tree.js contains Pattern Tree data structure that allows
+ arrays of string labels to be mapped to items.
+ Verify operations modifying a single branch of such tree work properly.
+ """
+ execute_in_page(patterns_tree_code, page='https://gotmyowndoma.in')
+ execute_in_page(
+ '''
+ let items_added;
+ let items_removed;
+
+ function _item_adder(item, array)
+ {
+ items_added++;
+ return [...(array || []), item];
+ }
+
+ function item_adder(item)
+ {
+ items_added = 0;
+ return array => _item_adder(item, array);
+ }
+
+ function _item_remover(array)
+ {
+ if (array !== null) {
+ items_removed++;
+ array.pop();
+ }
+ return (array && array.length > 0) ? array : null;
+ }
+
+ function item_remover()
+ {
+ items_removed = 0;
+ return _item_remover;
+ }''')
+
+ # Let's construct some tree branch while checking that each addition gives
+ # the right result.
+ branch = execute_in_page(
+ '''{
+ const branch = empty_node();
+ modify_sequence(branch, ['com', 'example'], item_adder('some_item'));
+ returnval(branch);
+ }''')
+ assert branch == {
+ 'literal_match': None,
+ 'wildcard_matches': [None, None, None],
+ 'children': {
+ 'com': {
+ 'literal_match': None,
+ 'wildcard_matches': [None, None, None],
+ 'children': {
+ 'example': {
+ 'literal_match': ['some_item'],
+ 'wildcard_matches': [None, None, None],
+ 'children': {
+ }
+ }
+ }
+ }
+ }
+ }
+
+ branch, items_added = execute_in_page(
+ '''{
+ const branch = arguments[0];
+ modify_sequence(branch, ['com', 'example'], item_adder('other_item'));
+ returnval([branch, items_added]);
+ }''', branch)
+ assert items_added == 1
+ assert branch['children']['com']['children']['example']['literal_match'] \
+ == ['some_item', 'other_item']
+
+ for i in range(3):
+ for expected_array in [['third_item'], ['third_item', '4th_item']]:
+ wildcard = '*' * (i + 1)
+ branch, items_added = execute_in_page(
+ '''{
+ const branch = arguments[0];
+ modify_sequence(branch, ['com', 'sample', arguments[1]],
+ item_adder(arguments[2]));
+ returnval([branch, items_added]);
+ }''',
+ branch, wildcard, expected_array[-1])
+ assert items_added == 2
+ sample = branch['children']['com']['children']['sample']
+ assert sample['wildcard_matches'][i] == expected_array
+ assert sample['children'][wildcard]['literal_match'] \
+ == expected_array
+
+ branch, items_added = execute_in_page(
+ '''{
+ const branch = arguments[0];
+ modify_sequence(branch, ['org', 'koszko', '***', '123'],
+ item_adder('5th_item'));
+ returnval([branch, items_added]);
+ }''',
+ branch)
+ assert items_added == 1
+ assert branch['children']['org']['children']['koszko']['children']['***']\
+ ['children']['123']['literal_match'] == ['5th_item']
+
+ # Let's verify that removing a nonexistent element doesn't modify the tree.
+ branch2, items_removed = execute_in_page(
+ '''{
+ const branch = arguments[0];
+ modify_sequence(branch, ['com', 'not', 'registered', '*'],
+ item_remover());
+ returnval([branch, items_removed]);
+ }''',
+ branch)
+ assert branch == branch2
+ assert items_removed == 0
+
+ # Let's remove all elements in the tree branch while checking that each
+ # removal gives the right result.
+ branch, items_removed = execute_in_page(
+ '''{
+ const branch = arguments[0];
+ modify_sequence(branch, ['org', 'koszko', '***', '123'],
+ item_remover());
+ returnval([branch, items_removed]);
+ }''',
+ branch)
+ assert items_removed == 1
+ assert 'org' not in branch['children']
+
+ for i in range(3):
+ for expected_array in [['third_item'], None]:
+ wildcard = '*' * (i + 1)
+ branch, items_removed = execute_in_page(
+ '''{
+ const branch = arguments[0];
+ modify_sequence(branch, ['com', 'sample', arguments[1]],
+ item_remover());
+ returnval([branch, items_removed]);
+ }''',
+ branch, wildcard)
+ assert items_removed == 2
+ if i == 2 and expected_array == []:
+ break
+ sample = branch['children']['com']['children'].get('sample', {})
+ assert sample.get('wildcard_matches', [None, None, None])[i] \
+ == expected_array
+ assert sample.get('children', {}).get(wildcard, {})\
+ .get('literal_match') == expected_array
+
+ for i in range(2):
+ branch, items_removed = execute_in_page(
+ '''{
+ const branch = arguments[0];
+ modify_sequence(branch, ['com', 'example'], item_remover());
+ returnval([branch, items_removed]);
+ }''',
+ branch)
+ assert items_removed == 1
+ if i == 0:
+ assert branch['children']['com']['children']['example']\
+ ['literal_match'] == ['some_item']
+ else:
+ assert branch == {
+ 'literal_match': None,
+ 'wildcard_matches': [None, None, None],
+ 'children': {
+ }
+ }
+
+def test_search_branch(execute_in_page, patterns_tree_code):
+ """
+ patterns_query_tree.js contains Pattern Tree data structure that allows
+ arrays of string labels to be mapped to items.
+ Verify searching a single branch of such tree work properly.
+ """
+ execute_in_page(patterns_tree_code, page='https://gotmyowndoma.in')
+ execute_in_page(
+ '''
+ const item_adder = item => (array => [...(array || []), item]);
+ ''')
+
+ # Let's construct some tree branch to test on.
+ execute_in_page(
+ '''
+ var branch = empty_node();
+
+ for (const [item, sequence] of [
+ ['(root)', []],
+ ['***', ['***']],
+ ['**', ['**']],
+ ['*', ['*']],
+
+ ['a', ['a']],
+ ['A', ['a']],
+ ['b', ['b']],
+
+ ['a/***', ['a', '***']],
+ ['A/***', ['a', '***']],
+ ['a/**', ['a', '**']],
+ ['A/**', ['a', '**']],
+ ['a/*', ['a', '*']],
+ ['A/*', ['a', '*']],
+ ['a/sth', ['a', 'sth']],
+ ['A/sth', ['a', 'sth']],
+
+ ['b/***', ['b', '***']],
+ ['b/**', ['b', '**']],
+ ['b/*', ['b', '*']],
+ ['b/sth', ['b', 'sth']],
+ ])
+ modify_sequence(branch, sequence, item_adder(item));
+ ''')
+
+ # Let's make the actual searches on our testing branch.
+ for sequence, expected in [
+ ([], [{'(root)'}, {'***'}]),
+ (['a'], [{'a', 'A'}, {'a/***', 'A/***'}, {'*'}, {'***'}]),
+ (['b'], [{'b'}, {'b/***'}, {'*'}, {'***'}]),
+ (['c'], [ {'*'}, {'***'}]),
+ (['***'], [{'***'}, {'*'} ]),
+ (['**'], [{'**'}, {'*'}, {'***'}]),
+ (['**'], [{'**'}, {'*'}, {'***'}]),
+ (['*'], [{'*'}, {'***'}]),
+
+ (['a', 'sth'], [{'a/sth', 'A/sth'}, {'a/*', 'A/*'}, {'a/***', 'A/***'}, {'**'}, {'***'}]),
+ (['b', 'sth'], [{'b/sth'}, {'b/*'}, {'b/***'}, {'**'}, {'***'}]),
+ (['a', 'hts'], [ {'a/*', 'A/*'}, {'a/***', 'A/***'}, {'**'}, {'***'}]),
+ (['b', 'hts'], [ {'b/*'}, {'b/***'}, {'**'}, {'***'}]),
+ (['a', '***'], [{'a/***', 'A/***'}, {'a/*', 'A/*'}, {'**'}, {'***'}]),
+ (['b', '***'], [{'b/***'}, {'b/*'}, {'**'}, {'***'}]),
+ (['a', '**'], [{'a/**', 'A/**'}, {'a/*', 'A/*'}, {'a/***', 'A/***'}, {'**'}, {'***'}]),
+ (['b', '**'], [{'b/**'}, {'b/*'}, {'b/***'}, {'**'}, {'***'}]),
+ (['a', '*'], [{'a/*', 'A/*'}, {'a/***', 'A/***'}, {'**'}, {'***'}]),
+ (['b', '*'], [{'b/*'}, {'b/***'}, {'**'}, {'***'}]),
+
+ (['a', 'c', 'd'], [{'a/**', 'A/**'}, {'a/***', 'A/***'}, {'**'}, {'***'}]),
+ (['b', 'c', 'd'], [{'b/**'}, {'b/***'}, {'**'}, {'***'}])
+ ]:
+ result = execute_in_page(
+ '''
+ returnval([...search_sequence(branch, arguments[0])]);
+ ''',
+ sequence)
+
+ try:
+ assert len(result) == len(expected)
+
+ for expected_set, result_array in zip(expected, result):
+ assert len(expected_set) == len(result_array)
+ assert expected_set == set(result_array)
+ except Exception as e:
+ import sys
+ print('sequence:', sequence, '\nexpected:', expected,
+ '\nresult:', result, file=sys.stderr)
+ raise e from None
+
+def test_pattern_tree(execute_in_page, patterns_tree_code):
+ """
+ patterns_query_tree.js contains Pattern Tree data structure that allows
+ arrays of string labels to be mapped to items.
+ Verify operations on entire such tree work properly.
+ """
+ execute_in_page(patterns_tree_code, page='https://gotmyowndoma.in')
+
+ # Perform tests with all possible patterns for a simple URL.
+ url = 'https://example.com'
+ patterns = [
+ 'https://example.com',
+ 'https://example.com/***',
+ 'https://***.example.com',
+ 'https://***.example.com/***'
+ ]
+ bad_patterns = [
+ 'http://example.com',
+ 'https://a.example.com',
+ 'https://*.example.com',
+ 'https://**.example.com',
+ 'https://example.com/a',
+ 'https://example.com/*',
+ 'https://example.com/**',
+ ]
+
+ expected = [{'key': p} for p in patterns]
+
+ tree, result = execute_in_page(
+ '''{
+ const tree = pattern_tree.make();
+ for (const pattern of arguments[0].concat(arguments[1])) {
+ pattern_tree.register(tree, pattern, 'key', pattern);
+ pattern_tree.register(tree, pattern + '/', 'key', pattern + '/');
+ }
+ returnval([tree, [...pattern_tree.search(tree, arguments[2])]]);
+ }''',
+ patterns, bad_patterns, url)
+ assert expected == result
+
+ # Also verify that deregistering half of the good patterns works correctly.
+ patterns_removed = [pattern for i, pattern in enumerate(patterns) if i % 2]
+ patterns = [pattern for i, pattern in enumerate(patterns) if not (i % 2)]
+ expected = [{'key': p} for p in patterns]
+ tree, result = execute_in_page(
+ '''{
+ const tree = arguments[0];
+ for (const pattern of arguments[1]) {
+ pattern_tree.deregister(tree, pattern, 'key');
+ pattern_tree.deregister(tree, pattern + '/', 'key');
+ }
+ returnval([tree, [...pattern_tree.search(tree, arguments[2])]]);
+ }''',
+ tree, patterns_removed, url)
+ assert expected == result
+
+ # Also verify that deregistering all the patterns works correctly.
+ tree = execute_in_page(
+ '''{
+ const tree = arguments[0];
+ for (const pattern of arguments[1].concat(arguments[2])) {
+ pattern_tree.deregister(tree, pattern, 'key');
+ pattern_tree.deregister(tree, pattern + '/', 'key');
+ }
+ returnval(tree);
+ }''',
+ tree, patterns, bad_patterns)
+ assert tree == {}
+
+ # Perform tests with all possible patterns for a complex URL.
+ url = 'http://settings.query.example.com/google/tries/destroy/adblockers//'
+ patterns = [
+ 'http://settings.query.example.com/google/tries/destroy/adblockers',
+ 'http://settings.query.example.com/google/tries/destroy/adblockers/***',
+ 'http://settings.query.example.com/google/tries/destroy/*',
+ 'http://settings.query.example.com/google/tries/destroy/***',
+ 'http://settings.query.example.com/google/tries/**',
+ 'http://settings.query.example.com/google/tries/***',
+ 'http://settings.query.example.com/google/**',
+ 'http://settings.query.example.com/google/***',
+ 'http://settings.query.example.com/**',
+ 'http://settings.query.example.com/***',
+
+ 'http://***.settings.query.example.com/google/tries/destroy/adblockers',
+ 'http://***.settings.query.example.com/google/tries/destroy/adblockers/***',
+ 'http://***.settings.query.example.com/google/tries/destroy/*',
+ 'http://***.settings.query.example.com/google/tries/destroy/***',
+ 'http://***.settings.query.example.com/google/tries/**',
+ 'http://***.settings.query.example.com/google/tries/***',
+ 'http://***.settings.query.example.com/google/**',
+ 'http://***.settings.query.example.com/google/***',
+ 'http://***.settings.query.example.com/**',
+ 'http://***.settings.query.example.com/***',
+ 'http://*.query.example.com/google/tries/destroy/adblockers',
+ 'http://*.query.example.com/google/tries/destroy/adblockers/***',
+ 'http://*.query.example.com/google/tries/destroy/*',
+ 'http://*.query.example.com/google/tries/destroy/***',
+ 'http://*.query.example.com/google/tries/**',
+ 'http://*.query.example.com/google/tries/***',
+ 'http://*.query.example.com/google/**',
+ 'http://*.query.example.com/google/***',
+ 'http://*.query.example.com/**',
+ 'http://*.query.example.com/***',
+ 'http://***.query.example.com/google/tries/destroy/adblockers',
+ 'http://***.query.example.com/google/tries/destroy/adblockers/***',
+ 'http://***.query.example.com/google/tries/destroy/*',
+ 'http://***.query.example.com/google/tries/destroy/***',
+ 'http://***.query.example.com/google/tries/**',
+ 'http://***.query.example.com/google/tries/***',
+ 'http://***.query.example.com/google/**',
+ 'http://***.query.example.com/google/***',
+ 'http://***.query.example.com/**',
+ 'http://***.query.example.com/***',
+ 'http://**.example.com/google/tries/destroy/adblockers',
+ 'http://**.example.com/google/tries/destroy/adblockers/***',
+ 'http://**.example.com/google/tries/destroy/*',
+ 'http://**.example.com/google/tries/destroy/***',
+ 'http://**.example.com/google/tries/**',
+ 'http://**.example.com/google/tries/***',
+ 'http://**.example.com/google/**',
+ 'http://**.example.com/google/***',
+ 'http://**.example.com/**',
+ 'http://**.example.com/***',
+ 'http://***.example.com/google/tries/destroy/adblockers',
+ 'http://***.example.com/google/tries/destroy/adblockers/***',
+ 'http://***.example.com/google/tries/destroy/*',
+ 'http://***.example.com/google/tries/destroy/***',
+ 'http://***.example.com/google/tries/**',
+ 'http://***.example.com/google/tries/***',
+ 'http://***.example.com/google/**',
+ 'http://***.example.com/google/***',
+ 'http://***.example.com/**',
+ 'http://***.example.com/***'
+ ]
+ bad_patterns = [
+ 'https://settings.query.example.com/google/tries/destroy/adblockers',
+ 'http://settings.query.example.com/google/tries/destroy/adblockers/a',
+ 'http://settings.query.example.com/google/tries/destroy/adblockers/*',
+ 'http://settings.query.example.com/google/tries/destroy/adblockers/**',
+ 'http://settings.query.example.com/google/tries/destroy/a',
+ 'http://settings.query.example.com/google/tries/destroy/**',
+ 'http://settings.query.example.com/google/tries/*',
+ 'http://a.settings.query.example.com/google/tries/destroy/adblockers',
+ 'http://*.settings.query.example.com/google/tries/destroy/adblockers',
+ 'http://**.settings.query.example.com/google/tries/destroy/adblockers',
+ 'http://a.query.example.com/google/tries/destroy/adblockers',
+ 'http://**.query.example.com/google/tries/destroy/adblockers',
+ 'http://*.example.com/google/tries/destroy/adblockers'
+ ]
+
+ expected = [{'key': p + s} for p in patterns for s in ['/', '']]
+
+ tree, result = execute_in_page(
+ '''{
+ const tree = pattern_tree.make();
+ for (const pattern of arguments[0].concat(arguments[1])) {
+ pattern_tree.register(tree, pattern, 'key', pattern);
+ pattern_tree.register(tree, pattern + '/', 'key', pattern + '/');
+ }
+ returnval([tree, [...pattern_tree.search(tree, arguments[2])]]);
+ }''',
+ patterns, bad_patterns, url)
+ assert expected == result
+
+ # Also verify that deregistering all patterns with trailing slash works
+ # correctly.
+ expected = [{'key': p} for p in patterns]
+ tree, result = execute_in_page(
+ '''{
+ const tree = arguments[0];
+ for (const pattern of arguments[1])
+ pattern_tree.deregister(tree, pattern + '/', 'key');
+ returnval([tree, [...pattern_tree.search(tree, arguments[2])]]);
+ }''',
+ tree, patterns, url)
+ assert expected == result
+
+ # Also verify that deregistering all the patterns works correctly.
+ tree = execute_in_page(
+ '''{
+ const tree = arguments[0];
+ for (const pattern of arguments[1])
+ pattern_tree.deregister(tree, pattern, 'key');
+ for (const pattern of arguments[2]) {
+ pattern_tree.deregister(tree, pattern, 'key');
+ pattern_tree.deregister(tree, pattern + '/', 'key');
+ }
+ returnval(tree);
+ }''',
+ tree, patterns, bad_patterns)
+ assert tree == {}
diff --git a/test/world_wide_library.py b/test/world_wide_library.py
new file mode 100644
index 0000000..860c987
--- /dev/null
+++ b/test/world_wide_library.py
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+"""
+Our helpful little stand-in for the Internet
+"""
+
+# This file is part of Haketilo.
+#
+# Copyright (C) 2021 jahoti <jahoti@tilde.team>
+# Copyright (C) 2021 Wojtek Kosior <koszko@koszko.org>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+from .misc_constants import here
+
+catalog = {
+ 'http://gotmyowndoma.in': (302, {'location': 'http://gotmyowndoma.in/index.html'}, None),
+ 'http://gotmyowndoma.in/': (302, {'location': 'http://gotmyowndoma.in/index.html'}, None),
+ 'http://gotmyowndoma.in/index.html': (200, {}, here / 'data' / 'pages' / 'gotmyowndomain.html'),
+ 'https://gotmyowndoma.in': (302, {'location': 'https://gotmyowndoma.in/index.html'}, None),
+ 'https://gotmyowndoma.in/': (302, {'location': 'https://gotmyowndoma.in/index.html'}, None),
+ 'https://gotmyowndoma.in/index.html': (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html')
+}