diff options
Diffstat (limited to 'test')
-rw-r--r-- | test/__init__.py | 2 | ||||
-rw-r--r-- | test/__main__.py | 59 | ||||
-rw-r--r-- | test/data/pages/gotmyowndomain.html | 27 | ||||
-rw-r--r-- | test/data/pages/gotmyowndomain_https.html | 27 | ||||
-rw-r--r-- | test/default_profiles/icecat_empty/extensions.json | 1 | ||||
-rwxr-xr-x | test/gorilla.py | 92 | ||||
-rwxr-xr-x | test/init.sh | 16 | ||||
-rw-r--r-- | test/misc_constants.py | 42 | ||||
-rwxr-xr-x | test/profiles.py | 71 | ||||
-rw-r--r-- | test/proxy_core.py | 191 | ||||
-rw-r--r-- | test/script_loader.py | 89 | ||||
-rwxr-xr-x | test/server.py | 195 | ||||
-rw-r--r-- | test/unit/__init__.py | 2 | ||||
-rw-r--r-- | test/unit/conftest.py | 109 | ||||
-rw-r--r-- | test/unit/test_basic.py | 41 | ||||
-rw-r--r-- | test/unit/test_patterns.py | 91 | ||||
-rw-r--r-- | test/world_wide_library.py | 40 |
17 files changed, 814 insertions, 281 deletions
diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..2b351bb --- /dev/null +++ b/test/__init__.py @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: CC0-1.0 +# Copyright (C) 2021 Wojtek Kosior diff --git a/test/__main__.py b/test/__main__.py new file mode 100644 index 0000000..c3437ea --- /dev/null +++ b/test/__main__.py @@ -0,0 +1,59 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +""" +Run a Firefox-type browser with WebDriver attached and Python console open +""" + +# This file is part of Haketilo. +# +# Copyright (C) 2021 jahoti <jahoti@tilde.team> +# Copyright (C) 2021 Wojtek Kosior <koszko@koszko.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +import sys +import time +import code + +from .server import do_an_internet +from .misc_constants import * +from .profiles import firefox_safe_mode + +def fail(msg, error_code): + print('Error:', msg) + print('Usage:', sys.argv[0], '[certificates_directory] [proxy_port]') + sys.exit(error_code) + +certdir = Path(sys.argv[1]).resolve() if len(sys.argv) > 1 else default_cert_dir +if not certdir.is_dir(): + fail('selected certificate directory does not exist.', 2) + +port = sys.argv[2] if len(sys.argv) > 2 else str(default_proxy_port) +if not port.isnumeric(): + fail('port must be an integer.', 3) + +httpd = do_an_internet(certdir, int(port)) +driver = firefox_safe_mode(proxy_port=int(port)) + +print("You can now control the browser through 'driver' object") + +code.InteractiveConsole(locals=globals()).interact() + +driver.quit() +httpd.shutdown() diff --git a/test/data/pages/gotmyowndomain.html b/test/data/pages/gotmyowndomain.html index daa9ab7..42c26cc 100644 --- a/test/data/pages/gotmyowndomain.html +++ b/test/data/pages/gotmyowndomain.html @@ -1,9 +1,30 @@ +<!DOCTYPE html> +<!-- + SPDX-License-Identifier: AGPL-3.0-or-later + + Sample testig page + + This file is part of Haketilo. + + Copyright (C) 2021 jahoti <jahoti@tilde.team> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + --> <html> <head> <meta name=charset value="latin1"> - <title> - Schrodinger's Document - </title> + <title>Schrodinger's Document</title> </head> <body> A nice, simple page for testing. diff --git a/test/data/pages/gotmyowndomain_https.html b/test/data/pages/gotmyowndomain_https.html index ec2aa1f..95c0be4 100644 --- a/test/data/pages/gotmyowndomain_https.html +++ b/test/data/pages/gotmyowndomain_https.html @@ -1,9 +1,30 @@ +<!DOCTYPE html> +<!-- + SPDX-License-Identifier: AGPL-3.0-or-later + + Sample testig page to serve over HTTPS + + This file is part of Haketilo. + + Copyright (C) 2021 jahoti <jahoti@tilde.team> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + --> <html> <head> <meta name=charset value="latin1"> - <title> - Schrodinger's Document - </title> + <title>Schrodinger's Document</title> </head> <body> A nice, simple page for testing (using HTTPS). diff --git a/test/default_profiles/icecat_empty/extensions.json b/test/default_profiles/icecat_empty/extensions.json new file mode 100644 index 0000000..5f74ff3 --- /dev/null +++ b/test/default_profiles/icecat_empty/extensions.json @@ -0,0 +1 @@ +{"schemaVersion":25,"addons":[{"id":"jid1-KtlZuoiikVfFew@jetpack","location":"app-global","userDisabled":true,"path":"/usr/lib/icecat/browser/extensions/jid1-KtlZuoiikVfFew@jetpack"},{"id":"uBlock0@raymondhill.net","location":"app-global","userDisabled":true,"path":"/usr/lib/icecat/browser/extensions/uBlock0@raymondhill.net.xpi"},{"id":"SubmitMe@0xbeef.coffee","location":"app-global","userDisabled":true,"path":"/usr/lib/icecat/browser/extensions/SubmitMe@0xbeef.coffee"},{"id":"FreeUSPS@0xbeef.coffee","location":"app-global","userDisabled":true,"path":"/usr/lib/icecat/browser/extensions/FreeUSPS@0xbeef.coffee"},{"id":"tortm-browser-button@jeremybenthum","location":"app-global","userDisabled":true,"path":"/usr/lib/icecat/browser/extensions/tortm-browser-button@jeremybenthum"},{"id":"tprb.addon@searxes.danwin1210.me","location":"app-global","userDisabled":true,"path":"/usr/lib/icecat/browser/extensions/tprb.addon@searxes.danwin1210.me"},{"id":"SimpleSumOfUs@0xbeef.coffee","location":"app-global","userDisabled":true,"path":"/usr/lib/icecat/browser/extensions/SimpleSumOfUs@0xbeef.coffee"}]}
\ No newline at end of file diff --git a/test/gorilla.py b/test/gorilla.py deleted file mode 100755 index 5bf64f5..0000000 --- a/test/gorilla.py +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright (C) 2021 jahoti <jahoti@tilde.team> -# Licensing information is collated in the `copyright` file - -""" -A partial re-implementation of Hydrilla in Python, for testing purposes. - -This will eventually be replaced with a build of the actual thing. -""" - -import json, os, sys - -def load_db(path): - DB = {'script': {}, 'bag': {}, 'page': {}} - if path[-1] != os.sep: - path += os.sep - - DB['path'] = path - for file in os.listdir(path): - subdir = path + file - - if (os.st.S_IFMT(os.stat(subdir).st_mode) & os.st.S_IFDIR) == 0: - continue - - with open(subdir + os.sep + 'index.json') as f: - data = json.load(f) - - type_, data['file'] = data['type'], file - name_key = 'pattern' if type_ == 'page' else 'name' - - DB[type_][data[name_key]] = data - del data['type'], data[name_key] - if type_ == 'script': - with open(path + file + os.sep + data['location']) as f: - data['text'] = f.read() - - return DB - - -def mkhachette(patterns): - patterns = set(patterns) - if '*' in patterns: - patterns.remove('*') - patterns.update(DB['page'].keys()) - - scripts, bags, pages, new_bags = {}, {}, {}, [] - for pattern in patterns: - pages[pattern] = data = DB['page'][pattern] - type_, name = data['payload'] - if type_ == 'script': - scripts[name] = DB['script'][name] - else: - new_bags.append(name) - - while new_bags: - name = new_bags.pop(0) - if name in bags: - continue - - bags[name] = data = DB['bag'][name]['components'] - for type_, name in data: - if type_ == 'script': - scripts[name] = DB['script'][name] - else: - new_bags.append(name) - - items, path = [], DB['path'] - for script, data in scripts.items(): - item = {'hash': data['sha256']} - with open(path + data['file'] + os.sep + data['location']) as f: - item['text'] = f.read() - - items.append({'s' + script: item}) - - for bag, data in bags.items(): - items.append({'b' + bag: [[type_[0], name] for type_, name in data]}) - - for page, data in pages.items(): - type_, name = data['payload'] - items.append({'p' + page: {'components': [type_[0], name]}}) - - return items - - -if __name__ == '__main__': - if len(sys.argv) < 3 or not os.path.isdir(sys.argv[1]): - sys.stderr.write('Usage: %s [hydrilla content path] (page pattern (page pattern (...)))' % sys.argv[0]) - sys.exit(1) - - DB = load_db(sys.argv[1]) - print(json.dumps(mkhachette(sys.argv[2:]))) diff --git a/test/init.sh b/test/init.sh deleted file mode 100755 index c0e7647..0000000 --- a/test/init.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/sh -# -# Copyright (c) 2015, inaz2 -# Copyright (C) 2021 jahoti <jahoti@tilde.team> -# Licensing information is collated in the `copyright` file - -# Initialize the root certificate for the tests proxy server -# Make sure this is run in the directory where they will be put! - - -if [ -n "$1" ]; then - cd "$1" -fi -openssl genrsa -out ca.key 2048 -openssl genrsa -out cert.key 2048 -openssl req -new -x509 -days 3650 -key ca.key -out ca.crt -subj "/CN=Hachette Test" diff --git a/test/misc_constants.py b/test/misc_constants.py index c6c134f..22432a6 100644 --- a/test/misc_constants.py +++ b/test/misc_constants.py @@ -1,10 +1,46 @@ -# Copyright (C) 2021 jahoti <jahoti@tilde.team> -# Licensing information is collated in the `copyright` file +# SPDX-License-Identifier: AGPL-3.0-or-later """ Miscellaneous data that were found useful """ +# This file is part of Haketilo. +# +# Copyright (C) 2021 jahoti <jahoti@tilde.team> +# Copyright (C) 2021 Wojtek Kosior <koszko@koszko.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +from pathlib import Path + +here = Path(__file__).resolve().parent + +default_firefox_binary = '/usr/lib/icecat/icecat' +# The browser might be loading some globally-installed add-ons by default. They +# could interfere with the tests, so we'll disable all of them. +default_clean_profile_dir = here / 'default_profile' / 'icecat_empty' + +default_proxy_host = '127.0.0.1' +default_proxy_port = 1337 + +default_cert_dir = here / 'certs' + mime_types = { "7z": "application/x-7z-compressed", "oga": "audio/ogg", "abw": "application/x-abiword", "ogv": "video/ogg", @@ -22,7 +58,7 @@ mime_types = { "js": "text/javascript", "wav": "audio/wav", "jpeg": "image/jpeg", "weba": "audio/webm", "jpg": "image/jpeg", "webm": "video/webm", - "json": "application/json", "woff": "font/woff", + "json": "application/json", "woff": "font/woff", "mjs": "text/javascript", "woff2": "font/woff2", "mp3": "audio/mpeg", "xhtml": "application/xhtml+xml", "mp4": "video/mp4", "zip": "application/zip", diff --git a/test/profiles.py b/test/profiles.py new file mode 100755 index 0000000..d6a4efc --- /dev/null +++ b/test/profiles.py @@ -0,0 +1,71 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +""" +Browser profiles and Selenium driver initialization +""" + +# This file is part of Haketilo. +# +# Copyright (C) 2021 Wojtek Kosior <koszko@koszko.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this file's +# license. Although I request that you do not make use this code in a +# proprietary program, I am not going to enforce this in court. + +from selenium import webdriver +from selenium.webdriver.firefox.options import Options +import time + +from .misc_constants import * + +def set_profile_proxy(profile, proxy_host, proxy_port): + # proxy type 1 designates "manual" + profile.set_preference('network.proxy.type', 1) + profile.set_preference('network.proxy.no_proxies_on', '') + profile.set_preference('network.proxy.share_proxy_settings', True) + + for proto in ['http', 'ftp', 'socks', 'ssl']: + profile.set_preference(f'network.proxy.{proto}', proxy_host) + profile.set_preference(f'network.proxy.{proto}_port', proxy_port) + profile.set_preference(f'network.proxy.backup.{proto}', '') + profile.set_preference(f'network.proxy.backup.{proto}_port', 0) + +def set_profile_console_logging(profile): + profile.set_preference('devtools.console.stdout.content', True) + +def firefox_safe_mode(firefox_binary=default_firefox_binary, + proxy_host=default_proxy_host, + proxy_port=default_proxy_port): + profile = webdriver.FirefoxProfile() + set_profile_proxy(profile, proxy_host, proxy_port) + set_profile_console_logging(profile) + + options = Options() + options.add_argument('--safe-mode') + + return webdriver.Firefox(options=options, firefox_profile=profile, + firefox_binary=firefox_binary) + +def firefox_with_profile(firefox_binary=default_firefox_binary, + profile_dir=default_clean_profile_dir, + proxy_host=default_proxy_host, + proxy_port=default_proxy_port): + profile = webdriver.FirefoxProfile(profile_dir) + set_profile_proxy(profile, proxy_host, proxy_port) + set_profile_console_logging(profile) + + return webdriver.Firefox(firefox_profile=profile, + firefox_binary=firefox_binary) diff --git a/test/proxy_core.py b/test/proxy_core.py index dd4225d..d31302a 100644 --- a/test/proxy_core.py +++ b/test/proxy_core.py @@ -1,74 +1,141 @@ -# Copyright (c) 2015, inaz2 -# Copyright (C) 2021 jahoti <jahoti@tilde.team> -# Licensing information is collated in the `copyright` file +# SPDX-License-Identifier: BSD-3-Clause """ -The core for a "virtual network" proxy - -Be sure to set certdir to your intended certificates directory before running. +The core for a "virtual network" proxy. """ -import os, socket, ssl, subprocess, sys, threading, time +# This file is part of Haketilo. +# +# Copyright (c) 2015, inaz2 +# Copyright (C) 2021 jahoti <jahoti@tilde.team> +# Copyright (C) 2021 Wojtek Kosior <koszko@koszko.org> +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of proxy2 nor the names of its contributors may be used to +# endorse or promote products derived from this software without specific +# prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this file's +# license. Although I request that you do not make use this code in a way +# incompliant with the license, I am not going to enforce this in court. + +from pathlib import Path +import socket, ssl, subprocess, sys, threading from http.server import HTTPServer, BaseHTTPRequestHandler from socketserver import ThreadingMixIn -gen_cert_req, lock = 'openssl req -new -key %scert.key -subj /CN=%s', threading.Lock() -sign_cert_req = 'openssl x509 -req -days 3650 -CA %sca.crt -CAkey %sca.key -set_serial %d -out %s' - -def popen(command, *args, **kwargs): - return subprocess.Popen((command % args).split(' '), **kwargs) +lock = threading.Lock() class ProxyRequestHandler(BaseHTTPRequestHandler): - """Handles a network request made to the proxy""" - def log_error(self, format, *args): - # suppress "Request timed out: timeout('timed out',)" - if isinstance(args[0], socket.timeout): - return - - self.log_message(format, *args) - - def do_CONNECT(self): - hostname = self.path.split(':')[0] - certpath = '%s%s.crt' % (certdir, hostname if hostname != 'ca' else 'CA') - - with lock: - if not os.path.isfile(certpath): - p1 = popen(gen_cert_req, certdir, hostname, stdout=subprocess.PIPE).stdout - popen(sign_cert_req, certdir, certdir, time.time() * 1000, certpath, stdin=p1, stderr=subprocess.PIPE).communicate() - - self.send_response(200) - self.end_headers() - - self.connection = ssl.wrap_socket(self.connection, keyfile=certdir+'cert.key', certfile=certpath, server_side=True) - self.rfile = self.connection.makefile('rb', self.rbufsize) - self.wfile = self.connection.makefile('wb', self.wbufsize) - - self.close_connection = int(self.headers.get('Proxy-Connection', '').lower() == 'close') - - def proxy(self): - content_length = int(self.headers.get('Content-Length', 0)) - req_body = self.rfile.read(content_length) if content_length else None - - if self.path[0] == '/': - if isinstance(self.connection, ssl.SSLSocket): - self.path = 'https://%s%s' % (self.headers['Host'], self.path) - else: - self.path = 'http://%s%s' % (self.headers['Host'], self.path) - - self.handle_request(req_body) - - do_OPTIONS = do_DELETE = do_PUT = do_HEAD = do_POST = do_GET = proxy - - def handle_request(self, req_body): - pass + """ + Handles a network request made to the proxy. Configures SSL encryption when + needed. + """ + def __init__(self, *args, **kwargs): + """ + Initialize self. Uses the same arguments as + http.server.BaseHTTPRequestHandler's constructor but also expect a + `certdir` keyword argument with appropriate path. + """ + self.certdir = Path(kwargs.pop('certdir')).resolve() + super().__init__(*args, **kwargs) + + def log_error(self, *args, **kwargs): + """ + Like log_error in http.server.BaseHTTPRequestHandler but suppresses + "Request timed out: timeout('timed out',)". + """ + if not isinstance(args[0], socket.timeout): + super().log_error(*args, **kwargs) + + def get_cert(self, hostname): + """ + If needed, generate a signed x509 certificate for `hostname`. Return + paths to certificate's key file and to certificate itself in a tuple. + """ + root_keyfile = self.certdir / 'rootCA.key' + root_certfile = self.certdir / 'rootCA.pem' + keyfile = self.certdir / 'site.key' + certfile = self.certdir / f'{hostname}.crt' + + with lock: + requestfile = self.certdir / f'{hostname}.csr' + if not certfile.exists(): + subprocess.run([ + 'openssl', 'req', '-new', '-key', str(keyfile), + '-subj', f'/CN={hostname}', '-out', str(requestfile) + ], check=True) + subprocess.run([ + 'openssl', 'x509', '-req', '-in', str(requestfile), + '-CA', str(root_certfile), '-CAkey', str(root_keyfile), + '-CAcreateserial', '-out', str(certfile), '-days', '1024' + ], check=True) + + return keyfile, certfile + + def do_CONNECT(self): + """Wrap the connection with SSL using on-demand signed certificate.""" + hostname = self.path.split(':')[0] + sslargs = {'server_side': True} + sslargs['keyfile'], sslargs['certfile'] = self.get_cert(hostname) + + self.send_response(200) + self.end_headers() + + self.connection = ssl.wrap_socket(self.connection, **sslargs) + self.rfile = self.connection.makefile('rb', self.rbufsize) + self.wfile = self.connection.makefile('wb', self.wbufsize) + + connection_header = self.headers.get('Proxy-Connection', '').lower() + self.close_connection = int(connection_header == 'close') + + def do_GET(self): + content_length = int(self.headers.get('Content-Length', 0)) + req_body = self.rfile.read(content_length) if content_length else None + + if self.path[0] == '/': + secure = 's' if isinstance(self.connection, ssl.SSLSocket) else '' + self.path = f'http{secure}://{self.headers["Host"]}{self.path}' + + self.handle_request(req_body) + + do_OPTIONS = do_DELETE = do_PUT = do_HEAD = do_POST = do_GET + + def handle_request(self, req_body): + """Default handler that does nothing. Please override.""" + pass class ThreadingHTTPServer(ThreadingMixIn, HTTPServer): - """The actual proxy server""" - address_family, daemon_threads = socket.AF_INET6, True - - def handle_error(self, request, client_address): - # suppress socket/ssl related errors - cls, e = sys.exc_info()[:2] - if not (cls is socket.error or cls is ssl.SSLError): - return HTTPServer.handle_error(self, request, client_address) + """The actual proxy server""" + address_family, daemon_threads = socket.AF_INET6, True + + def handle_error(self, request, client_address): + """ + Like handle_error in http.server.HTTPServer but suppresses socket/ssl + related errors. + """ + cls, e = sys.exc_info()[:2] + if not (cls is socket.error or cls is ssl.SSLError): + return super().handle_error(request, client_address) diff --git a/test/script_loader.py b/test/script_loader.py new file mode 100644 index 0000000..15269c7 --- /dev/null +++ b/test/script_loader.py @@ -0,0 +1,89 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +""" +Loading of parts of Haketilo source for testing in browser +""" + +# This file is part of Haketilo. +# +# Copyright (C) 2021 Wojtek Kosior <koszko@koszko.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this file's +# license. Although I request that you do not make use this code in a +# proprietary program, I am not going to enforce this in court. + +from pathlib import Path +import subprocess, re + +from .misc_constants import * + +script_root = here.parent +awk_script = script_root / 'compute_scripts.awk' + +def make_relative_path(path): + path = Path(path) + + if path.is_absolute(): + path = path.relative_to(script_root) + + return path + +"""Used to ignore hidden files and emacs auto-save files.""" +script_name_regex = re.compile(r'^[^.#].*\.js$') + +def available_scripts(directory): + for script in directory.rglob('*.js'): + if script_name_regex.match(script.name): + yield script + +def wrapped_script(script_path, wrap_partially=True): + if script_path == 'exports_init.js': + with open(script_root / 'MOZILLA_exports_init.js') as script: + return script.read() + + command = 'partially_wrapped_code' if wrap_partially else 'wrapped_code' + awk_command = ['awk', '-f', str(awk_script), command, str(script_path)] + awk = subprocess.run(awk_command, stdout=subprocess.PIPE, cwd=script_root, + check=True) + + return awk.stdout.decode() + +def load_script(path, import_dirs): + """ + `path` and `import_dirs` are .js file path and a list of directory paths, + respectively. They may be absolute or specified relative to Haketilo's + project directory. + + Return a string containing script from `path` together with all other + scripts it depends. Dependencies are wrapped in the same way Haketilo's + build system wraps them, with imports properly satisfied. The main script + being loaded is wrapped partially - it also has its imports satisfied, but + its code is not placed inside an anonymous function, so the + """ + path = make_relative_path(path) + + import_dirs = [make_relative_path(dir) for dir in import_dirs] + available = [s for dir in import_dirs for s in available_scripts(dir)] + + awk = subprocess.run(['awk', '-f', str(awk_script), 'script_dependencies', + str(path), *[str(s) for s in available]], + stdout=subprocess.PIPE, cwd=script_root, check=True) + + to_load = awk.stdout.decode().split() + texts = [wrapped_script(path, wrap_partially=(i == len(to_load) - 1)) + for i, path in enumerate(to_load)] + + return '\n'.join(texts) diff --git a/test/server.py b/test/server.py index 58a84bd..6013955 100755 --- a/test/server.py +++ b/test/server.py @@ -1,101 +1,108 @@ -#!/usr/bin/env python3 -# -# Copyright (C) 2021 jahoti <jahoti@tilde.team> -# Licensing information is collated in the `copyright` file +# SPDX-License-Identifier: AGPL-3.0-or-later """ A modular "virtual network" proxy, wrapping the classes in proxy_core.py """ -import proxy_core +# This file is part of Haketilo. +# +# Copyright (C) 2021 jahoti <jahoti@tilde.team> +# Copyright (C) 2021 Wojtek Kosior <koszko@koszko.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +from pathlib import Path from urllib.parse import parse_qs -from misc_constants import * -from world_wide_library import catalog as internet - -class RequestHijacker(proxy_core.ProxyRequestHandler): - def handle_request(self, req_body): - path_components = self.path.split('?', maxsplit=1) - path = path_components[0] - try: - # Response format: (status_code, headers (dict. of strings), - # body as bytes or filename containing body as string) - if path in internet: - info = internet[path] - if type(info) == tuple: - status_code, headers, body_file = info - if type(body_file) == str: - if 'Content-Type' not in headers and '.' in body_file: - ext = body_file.rsplit('.', maxsplit=1)[-1] - if ext in mime_types: - headers['Content-Type'] = mime_types[ext] - - with open(body_file, mode='rb') as f: - body_file = f.read() - - else: - # A function to evaluate to get the response - get_params, post_params = {}, {} - if len(path_components) == 2: - get_params = parse_qs(path_components[1]) - - # Parse POST parameters; currently only supports - # application/x-www-form-urlencoded - if req_body: - post_params = parse_qs(req_body.encode()) - - status_code, headers, body_file = info(self.command, get_params, post_params) - if type(body_file) == str: - body_file = body_file.encode() - - if type(status_code) != int or status_code <= 0: - raise Exception('Invalid status code %r' % status_code) - - for header, header_value in headers.items(): - if type(header) != str: - raise Exception('Invalid header key %r' % header) - - elif type(header_value) != str: - raise Exception('Invalid header value %r' % header_value) - else: - status_code, headers = 404, {'Content-Type': 'text/plain'} - body_file = b'Handler for this URL not found.' - - except Exception as e: - status_code, headers, body_file = 500, {'Content-Type': 'text/plain'}, b'Internal Error:\n' + repr(e).encode() - - headers['Content-Length'] = str(len(body_file)) - self.send_response(status_code) - for header, header_value in headers.items(): - self.send_header(header, header_value) - - self.end_headers() - self.wfile.write(body_file) - - - -def do_an_internet(certdir, port): - """Start up the proxy/server""" - proxy_core.certdir = certdir - httpd = proxy_core.ThreadingHTTPServer(('', port), RequestHijacker) - httpd.serve_forever() - -if __name__ == '__main__': - import sys - def fail(msg, error_code): - print('Error:', msg) - print('Usage:', sys.argv[0], '[certificates directory] (port)') - sys.exit(error_code) - - if len(sys.argv) < 2: - fail('missing required argument "certificates directory".', 1) - - certdir = sys.argv[1] - if not proxy_core.os.path.isdir(certdir): - fail('selected certificate directory does not exist.', 2) - - port = sys.argv[2] if len(sys.argv) > 2 else '1337' - if not port.isnumeric(): - fail('port must be an integer.', 3) - - do_an_internet(certdir, int(port)) +from threading import Thread + +from .proxy_core import ProxyRequestHandler, ThreadingHTTPServer +from .misc_constants import * +from .world_wide_library import catalog as internet + +class RequestHijacker(ProxyRequestHandler): + def handle_request(self, req_body): + path_components = self.path.split('?', maxsplit=1) + path = path_components[0] + try: + # Response format: (status_code, headers (dict. of strings), + # body as bytes or filename containing body as string) + if path in internet: + info = internet[path] + if type(info) is tuple: + status_code, headers, body_file = info + resp_body = b'' + if body_file is not None: + if 'Content-Type' not in headers: + ext = body_file.suffix[1:] + if ext and ext in mime_types: + headers['Content-Type'] = mime_types[ext] + + with open(body_file, mode='rb') as f: + resp_body = f.read() + else: + # A function to evaluate to get the response + get_params, post_params = {}, {} + if len(path_components) == 2: + get_params = parse_qs(path_components[1]) + + # Parse POST parameters; currently only supports + # application/x-www-form-urlencoded + if req_body: + post_params = parse_qs(req_body.encode()) + + status_code, headers, resp_body = info(self.command, get_params, post_params) + if type(resp_body) == str: + resp_body = resp_body.encode() + + if type(status_code) != int or status_code <= 0: + raise Exception('Invalid status code %r' % status_code) + + for header, header_value in headers.items(): + if type(header) != str: + raise Exception('Invalid header key %r' % header) + + elif type(header_value) != str: + raise Exception('Invalid header value %r' % header_value) + else: + status_code, headers = 404, {'Content-Type': 'text/plain'} + resp_body = b'Handler for this URL not found.' + + except Exception as e: + status_code, headers, resp_body = 500, {'Content-Type': 'text/plain'}, b'Internal Error:\n' + repr(e).encode() + + headers['Content-Length'] = str(len(resp_body)) + self.send_response(status_code) + for header, header_value in headers.items(): + self.send_header(header, header_value) + + self.end_headers() + if resp_body: + self.wfile.write(resp_body) + +def do_an_internet(certdir=default_cert_dir, port=default_proxy_port): + """Start up the proxy/server""" + class RequestHijackerWithCertdir(RequestHijacker): + def __init__(self, *args, **kwargs): + super().__init__(*args, certdir=certdir, **kwargs) + + httpd = ThreadingHTTPServer(('', port), RequestHijackerWithCertdir) + Thread(target=httpd.serve_forever).start() + + return httpd diff --git a/test/unit/__init__.py b/test/unit/__init__.py new file mode 100644 index 0000000..2b351bb --- /dev/null +++ b/test/unit/__init__.py @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: CC0-1.0 +# Copyright (C) 2021 Wojtek Kosior diff --git a/test/unit/conftest.py b/test/unit/conftest.py new file mode 100644 index 0000000..6877b7a --- /dev/null +++ b/test/unit/conftest.py @@ -0,0 +1,109 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +""" +Common fixtures for Haketilo unit tests +""" + +# This file is part of Haketilo. +# +# Copyright (C) 2021 Wojtek Kosior <koszko@koszko.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this file's +# license. Although I request that you do not make use this code in a +# proprietary program, I am not going to enforce this in court. + +import pytest + +from ..profiles import firefox_safe_mode +from ..server import do_an_internet +from ..script_loader import load_script + +@pytest.fixture(scope="package") +def proxy(): + httpd = do_an_internet() + yield httpd + httpd.shutdown() + +@pytest.fixture(scope="package") +def driver(proxy): + with firefox_safe_mode() as driver: + yield driver + driver.quit() + +script_injecting_script = '''\ +/* + * Selenium by default executes scripts in some weird one-time context. We want + * separately-loaded scripts to be able to access global variables defined + * before, including those declared with `const` or `let`. To achieve that, we + * run our scripts by injecting them into the page inside a <script> tag. We use + * custom properties of the `window` object to communicate with injected code. + */ + +const script_elem = document.createElement('script'); +script_elem.textContent = arguments[0]; + +delete window.haketilo_selenium_return_value; +delete window.haketilo_selenium_exception; +window.returnval = (val => window.haketilo_selenium_return_value = val); +window.arguments = arguments[1]; + +document.body.append(script_elem); + +/* + * To ease debugging, we want this script to forward signal all exceptions from + * the injectee. + */ +try { + if (window.haketilo_selenium_exception !== false) + throw 'Error in injected script! Check your geckodriver.log!'; +} finally { + script_elem.remove(); +} + +return window.haketilo_selenium_return_value; +''' + +def _execute_in_page_context(driver, script, *args): + script = script + '\n;\nwindow.haketilo_selenium_exception = false;' + try: + return driver.execute_script(script_injecting_script, script, args) + except Exception as e: + import sys + lines = enumerate(script.split('\n'), 1) + for err_info in [('Failing script\n',), *lines]: + print(*err_info, file=sys.stderr) + + raise e from None + +@pytest.fixture(scope="package") +def execute_in_page(driver): + def do_execute(script, *args, **kwargs): + if 'page' in kwargs: + driver.get(kwargs['page']) + + return _execute_in_page_context(driver, script, args) + + yield do_execute + +@pytest.fixture(scope="package") +def load_into_page(driver): + def do_load(path, import_dirs, *args, **kwargs): + if 'page' in kwargs: + driver.get(kwargs['page']) + + _execute_in_page_context(driver, load_script(path, import_dirs), args) + + yield do_load diff --git a/test/unit/test_basic.py b/test/unit/test_basic.py new file mode 100644 index 0000000..cbe5c8c --- /dev/null +++ b/test/unit/test_basic.py @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: CC0-1.0 + +""" +Haketilo unit tests - base +""" + +# This file is part of Haketilo +# +# Copyright (C) 2021, Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the CC0 1.0 Universal License as published by +# the Creative Commons Corporation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# CC0 1.0 Universal License for more details. + +import pytest + +def test_driver(driver): + """ + A trivial test case that verifies mocked web pages served by proxy can be + accessed by the browser driven. + """ + for proto in ['http://', 'https://']: + driver.get(proto + 'gotmyowndoma.in') + element = driver.find_element_by_tag_name('title') + title = driver.execute_script('return arguments[0].innerText;', element) + assert "Schrodinger's Document" in title + +def test_script_loader(execute_in_page, load_into_page): + """ + A trivial test case that verifies Haketilo's .js files can be properly + loaded into a test page together with their dependencies. + """ + load_into_page('common/stored_types.js', ['common'], + page='https://gotmyowndoma.in') + + assert execute_in_page('returnval(TYPE_PREFIX.VAR);') == '_' diff --git a/test/unit/test_patterns.py b/test/unit/test_patterns.py new file mode 100644 index 0000000..4162fc0 --- /dev/null +++ b/test/unit/test_patterns.py @@ -0,0 +1,91 @@ +# SPDX-License-Identifier: CC0-1.0 + +""" +Haketilo unit tests - URL patterns +""" + +# This file is part of Haketilo +# +# Copyright (C) 2021, Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the CC0 1.0 Universal License as published by +# the Creative Commons Corporation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# CC0 1.0 Universal License for more details. + +import pytest + +from ..script_loader import load_script + +@pytest.fixture(scope="session") +def patterns_code(): + yield load_script('common/patterns.js', ['common']) + +def test_regexes(execute_in_page, patterns_code): + """ + patterns.js contains regexes used for URL parsing. + Verify they work properly. + """ + execute_in_page(patterns_code, page='https://gotmyowndoma.in') + + valid_url = 'https://example.com/a/b?ver=1.2.3#heading2' + valid_url_rest = 'example.com/a/b?ver=1.2.3#heading2' + + # Test matching of URL protocol. + match = execute_in_page('returnval(proto_regex.exec(arguments[0]));', + valid_url) + assert match + assert match[1] == 'https' + assert match[2] == valid_url_rest + + match = execute_in_page('returnval(proto_regex.exec(arguments[0]));', + '://bad-url.missing/protocol') + assert match is None + + # Test matching of http(s) URLs. + match = execute_in_page('returnval(http_regex.exec(arguments[0]));', + valid_url_rest) + assert match + assert match[1] == 'example.com' + assert match[2] == '/a/b' + assert match[3] == '?ver=1.2.3' + + match = execute_in_page('returnval(http_regex.exec(arguments[0]));', + 'another.example.com') + assert match + assert match[1] == 'another.example.com' + assert match[2] == '' + assert match[3] == '' + + match = execute_in_page('returnval(http_regex.exec(arguments[0]));', + '/bad/http/example') + assert match == None + + # Test matching of file URLs. + match = execute_in_page('returnval(file_regex.exec(arguments[0]));', + '/good/file/example') + assert match + assert match[1] == '/good/file/example' + + # Test matching of ftp URLs. + match = execute_in_page('returnval(ftp_regex.exec(arguments[0]));', + 'example.com/a/b#heading2') + assert match + assert match[1] is None + assert match[2] == 'example.com' + assert match[3] == '/a/b' + + match = execute_in_page('returnval(ftp_regex.exec(arguments[0]));', + 'some_user@localhost') + assert match + assert match[1] == 'some_user@' + assert match[2] == 'localhost' + assert match[3] == '' + + match = execute_in_page('returnval(ftp_regex.exec(arguments[0]));', + '@bad.url/') + assert match is None diff --git a/test/world_wide_library.py b/test/world_wide_library.py index fc9e095..860c987 100644 --- a/test/world_wide_library.py +++ b/test/world_wide_library.py @@ -1,15 +1,39 @@ -# Copyright (C) 2021 jahoti <jahoti@tilde.team> -# Licensing information is collated in the `copyright` file +# SPDX-License-Identifier: AGPL-3.0-or-later """ Our helpful little stand-in for the Internet """ +# This file is part of Haketilo. +# +# Copyright (C) 2021 jahoti <jahoti@tilde.team> +# Copyright (C) 2021 Wojtek Kosior <koszko@koszko.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +from .misc_constants import here + catalog = { - 'http://gotmyowndoma.in': (302, {'location': 'http://gotmyowndoma.in/index.html'}, b''), - 'http://gotmyowndoma.in/': (302, {'location': 'http://gotmyowndoma.in/index.html'}, b''), - 'http://gotmyowndoma.in/index.html': (200, {}, 'data/pages/gotmyowndomain.html'), - 'https://gotmyowndoma.in': (302, {'location': 'https://gotmyowndoma.in/index.html'}, b''), - 'https://gotmyowndoma.in/': (302, {'location': 'https://gotmyowndoma.in/index.html'}, b''), - 'https://gotmyowndoma.in/index.html': (200, {}, 'data/pages/gotmyowndomain_https.html') + 'http://gotmyowndoma.in': (302, {'location': 'http://gotmyowndoma.in/index.html'}, None), + 'http://gotmyowndoma.in/': (302, {'location': 'http://gotmyowndoma.in/index.html'}, None), + 'http://gotmyowndoma.in/index.html': (200, {}, here / 'data' / 'pages' / 'gotmyowndomain.html'), + 'https://gotmyowndoma.in': (302, {'location': 'https://gotmyowndoma.in/index.html'}, None), + 'https://gotmyowndoma.in/': (302, {'location': 'https://gotmyowndoma.in/index.html'}, None), + 'https://gotmyowndoma.in/index.html': (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html') } |