From b1444d9c9ea065d7c97d5809c3ec5259cb01a1da Mon Sep 17 00:00:00 2001 From: jahoti Date: Mon, 6 Sep 2021 00:00:00 +0000 Subject: Incorporate test suite from jahoti branch --- test/data/pages/gotmyowndomain.html | 14 +++++ test/data/pages/gotmyowndomain_https.html | 14 +++++ test/gorilla.py | 92 +++++++++++++++++++++++++++ test/init.sh | 19 ++++++ test/misc_constants.py | 34 ++++++++++ test/proxy_core.py | 74 ++++++++++++++++++++++ test/server.py | 101 ++++++++++++++++++++++++++++++ test/world_wide_library.py | 15 +++++ 8 files changed, 363 insertions(+) create mode 100644 test/data/pages/gotmyowndomain.html create mode 100644 test/data/pages/gotmyowndomain_https.html create mode 100755 test/gorilla.py create mode 100755 test/init.sh create mode 100644 test/misc_constants.py create mode 100644 test/proxy_core.py create mode 100755 test/server.py create mode 100644 test/world_wide_library.py (limited to 'test') diff --git a/test/data/pages/gotmyowndomain.html b/test/data/pages/gotmyowndomain.html new file mode 100644 index 0000000..daa9ab7 --- /dev/null +++ b/test/data/pages/gotmyowndomain.html @@ -0,0 +1,14 @@ + + + + + Schrodinger's Document + + + + A nice, simple page for testing. + + + diff --git a/test/data/pages/gotmyowndomain_https.html b/test/data/pages/gotmyowndomain_https.html new file mode 100644 index 0000000..ec2aa1f --- /dev/null +++ b/test/data/pages/gotmyowndomain_https.html @@ -0,0 +1,14 @@ + + + + + Schrodinger's Document + + + + A nice, simple page for testing (using HTTPS). + + + diff --git a/test/gorilla.py b/test/gorilla.py new file mode 100755 index 0000000..5bf64f5 --- /dev/null +++ b/test/gorilla.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2021 jahoti +# Licensing information is collated in the `copyright` file + +""" +A partial re-implementation of Hydrilla in Python, for testing purposes. + +This will eventually be replaced with a build of the actual thing. +""" + +import json, os, sys + +def load_db(path): + DB = {'script': {}, 'bag': {}, 'page': {}} + if path[-1] != os.sep: + path += os.sep + + DB['path'] = path + for file in os.listdir(path): + subdir = path + file + + if (os.st.S_IFMT(os.stat(subdir).st_mode) & os.st.S_IFDIR) == 0: + continue + + with open(subdir + os.sep + 'index.json') as f: + data = json.load(f) + + type_, data['file'] = data['type'], file + name_key = 'pattern' if type_ == 'page' else 'name' + + DB[type_][data[name_key]] = data + del data['type'], data[name_key] + if type_ == 'script': + with open(path + file + os.sep + data['location']) as f: + data['text'] = f.read() + + return DB + + +def mkhachette(patterns): + patterns = set(patterns) + if '*' in patterns: + patterns.remove('*') + patterns.update(DB['page'].keys()) + + scripts, bags, pages, new_bags = {}, {}, {}, [] + for pattern in patterns: + pages[pattern] = data = DB['page'][pattern] + type_, name = data['payload'] + if type_ == 'script': + scripts[name] = DB['script'][name] + else: + new_bags.append(name) + + while new_bags: + name = new_bags.pop(0) + if name in bags: + continue + + bags[name] = data = DB['bag'][name]['components'] + for type_, name in data: + if type_ == 'script': + scripts[name] = DB['script'][name] + else: + new_bags.append(name) + + items, path = [], DB['path'] + for script, data in scripts.items(): + item = {'hash': data['sha256']} + with open(path + data['file'] + os.sep + data['location']) as f: + item['text'] = f.read() + + items.append({'s' + script: item}) + + for bag, data in bags.items(): + items.append({'b' + bag: [[type_[0], name] for type_, name in data]}) + + for page, data in pages.items(): + type_, name = data['payload'] + items.append({'p' + page: {'components': [type_[0], name]}}) + + return items + + +if __name__ == '__main__': + if len(sys.argv) < 3 or not os.path.isdir(sys.argv[1]): + sys.stderr.write('Usage: %s [hydrilla content path] (page pattern (page pattern (...)))' % sys.argv[0]) + sys.exit(1) + + DB = load_db(sys.argv[1]) + print(json.dumps(mkhachette(sys.argv[2:]))) diff --git a/test/init.sh b/test/init.sh new file mode 100755 index 0000000..5a8d198 --- /dev/null +++ b/test/init.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright (C) 2021 jahoti +# Licensing information is collated in the `copyright` file + +# Initialize the root certificate for the tests proxy server +# Make sure this is run in the directory where they will be put! + +gen_keys () { + while [ -n "$1" ]; do + openssl genrsa -out "${1}".key 4096 + done +} + +if [ -n "$1" ]; then + cd "$1" +fi +gen_keys ca cert +openssl req -new -x509 -days ${2:-183} -key ca.key -out ca.crt -subj "/CN=Hachette Test" diff --git a/test/misc_constants.py b/test/misc_constants.py new file mode 100644 index 0000000..c6c134f --- /dev/null +++ b/test/misc_constants.py @@ -0,0 +1,34 @@ +# Copyright (C) 2021 jahoti +# Licensing information is collated in the `copyright` file + +""" +Miscellaneous data that were found useful +""" + +mime_types = { + "7z": "application/x-7z-compressed", "oga": "audio/ogg", + "abw": "application/x-abiword", "ogv": "video/ogg", + "arc": "application/x-freearc", "ogx": "application/ogg", + "bin": "application/octet-stream", "opus": "audio/opus", + "bz": "application/x-bzip", "otf": "font/otf", + "bz2": "application/x-bzip2", "pdf": "application/pdf", + "css": "text/css", "png": "image/png", + "csv": "text/csv", "sh": "application/x-sh", + "gif": "image/gif", "svg": "image/svg+xml", + "gz": "application/gzip", "tar": "application/x-tar", + "htm": "text/html", "ts": "video/mp2t", + "html": "text/html", "ttf": "font/ttf", + "ico": "image/vnd.microsoft.icon", "txt": "text/plain", + "js": "text/javascript", "wav": "audio/wav", + "jpeg": "image/jpeg", "weba": "audio/webm", + "jpg": "image/jpeg", "webm": "video/webm", + "json": "application/json", "woff": "font/woff", + "mjs": "text/javascript", "woff2": "font/woff2", + "mp3": "audio/mpeg", "xhtml": "application/xhtml+xml", + "mp4": "video/mp4", "zip": "application/zip", + "mpeg": "video/mpeg", + "odp": "application/vnd.oasis.opendocument.presentation", + "ods": "application/vnd.oasis.opendocument.spreadsheet", + "odt": "application/vnd.oasis.opendocument.text", + "xml": "application/xml" # text/xml if readable from casual users +} diff --git a/test/proxy_core.py b/test/proxy_core.py new file mode 100644 index 0000000..dd4225d --- /dev/null +++ b/test/proxy_core.py @@ -0,0 +1,74 @@ +# Copyright (c) 2015, inaz2 +# Copyright (C) 2021 jahoti +# Licensing information is collated in the `copyright` file + +""" +The core for a "virtual network" proxy + +Be sure to set certdir to your intended certificates directory before running. +""" + +import os, socket, ssl, subprocess, sys, threading, time +from http.server import HTTPServer, BaseHTTPRequestHandler +from socketserver import ThreadingMixIn + +gen_cert_req, lock = 'openssl req -new -key %scert.key -subj /CN=%s', threading.Lock() +sign_cert_req = 'openssl x509 -req -days 3650 -CA %sca.crt -CAkey %sca.key -set_serial %d -out %s' + +def popen(command, *args, **kwargs): + return subprocess.Popen((command % args).split(' '), **kwargs) + +class ProxyRequestHandler(BaseHTTPRequestHandler): + """Handles a network request made to the proxy""" + def log_error(self, format, *args): + # suppress "Request timed out: timeout('timed out',)" + if isinstance(args[0], socket.timeout): + return + + self.log_message(format, *args) + + def do_CONNECT(self): + hostname = self.path.split(':')[0] + certpath = '%s%s.crt' % (certdir, hostname if hostname != 'ca' else 'CA') + + with lock: + if not os.path.isfile(certpath): + p1 = popen(gen_cert_req, certdir, hostname, stdout=subprocess.PIPE).stdout + popen(sign_cert_req, certdir, certdir, time.time() * 1000, certpath, stdin=p1, stderr=subprocess.PIPE).communicate() + + self.send_response(200) + self.end_headers() + + self.connection = ssl.wrap_socket(self.connection, keyfile=certdir+'cert.key', certfile=certpath, server_side=True) + self.rfile = self.connection.makefile('rb', self.rbufsize) + self.wfile = self.connection.makefile('wb', self.wbufsize) + + self.close_connection = int(self.headers.get('Proxy-Connection', '').lower() == 'close') + + def proxy(self): + content_length = int(self.headers.get('Content-Length', 0)) + req_body = self.rfile.read(content_length) if content_length else None + + if self.path[0] == '/': + if isinstance(self.connection, ssl.SSLSocket): + self.path = 'https://%s%s' % (self.headers['Host'], self.path) + else: + self.path = 'http://%s%s' % (self.headers['Host'], self.path) + + self.handle_request(req_body) + + do_OPTIONS = do_DELETE = do_PUT = do_HEAD = do_POST = do_GET = proxy + + def handle_request(self, req_body): + pass + + +class ThreadingHTTPServer(ThreadingMixIn, HTTPServer): + """The actual proxy server""" + address_family, daemon_threads = socket.AF_INET6, True + + def handle_error(self, request, client_address): + # suppress socket/ssl related errors + cls, e = sys.exc_info()[:2] + if not (cls is socket.error or cls is ssl.SSLError): + return HTTPServer.handle_error(self, request, client_address) diff --git a/test/server.py b/test/server.py new file mode 100755 index 0000000..58a84bd --- /dev/null +++ b/test/server.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2021 jahoti +# Licensing information is collated in the `copyright` file + +""" +A modular "virtual network" proxy, +wrapping the classes in proxy_core.py +""" + +import proxy_core +from urllib.parse import parse_qs +from misc_constants import * +from world_wide_library import catalog as internet + +class RequestHijacker(proxy_core.ProxyRequestHandler): + def handle_request(self, req_body): + path_components = self.path.split('?', maxsplit=1) + path = path_components[0] + try: + # Response format: (status_code, headers (dict. of strings), + # body as bytes or filename containing body as string) + if path in internet: + info = internet[path] + if type(info) == tuple: + status_code, headers, body_file = info + if type(body_file) == str: + if 'Content-Type' not in headers and '.' in body_file: + ext = body_file.rsplit('.', maxsplit=1)[-1] + if ext in mime_types: + headers['Content-Type'] = mime_types[ext] + + with open(body_file, mode='rb') as f: + body_file = f.read() + + else: + # A function to evaluate to get the response + get_params, post_params = {}, {} + if len(path_components) == 2: + get_params = parse_qs(path_components[1]) + + # Parse POST parameters; currently only supports + # application/x-www-form-urlencoded + if req_body: + post_params = parse_qs(req_body.encode()) + + status_code, headers, body_file = info(self.command, get_params, post_params) + if type(body_file) == str: + body_file = body_file.encode() + + if type(status_code) != int or status_code <= 0: + raise Exception('Invalid status code %r' % status_code) + + for header, header_value in headers.items(): + if type(header) != str: + raise Exception('Invalid header key %r' % header) + + elif type(header_value) != str: + raise Exception('Invalid header value %r' % header_value) + else: + status_code, headers = 404, {'Content-Type': 'text/plain'} + body_file = b'Handler for this URL not found.' + + except Exception as e: + status_code, headers, body_file = 500, {'Content-Type': 'text/plain'}, b'Internal Error:\n' + repr(e).encode() + + headers['Content-Length'] = str(len(body_file)) + self.send_response(status_code) + for header, header_value in headers.items(): + self.send_header(header, header_value) + + self.end_headers() + self.wfile.write(body_file) + + + +def do_an_internet(certdir, port): + """Start up the proxy/server""" + proxy_core.certdir = certdir + httpd = proxy_core.ThreadingHTTPServer(('', port), RequestHijacker) + httpd.serve_forever() + +if __name__ == '__main__': + import sys + def fail(msg, error_code): + print('Error:', msg) + print('Usage:', sys.argv[0], '[certificates directory] (port)') + sys.exit(error_code) + + if len(sys.argv) < 2: + fail('missing required argument "certificates directory".', 1) + + certdir = sys.argv[1] + if not proxy_core.os.path.isdir(certdir): + fail('selected certificate directory does not exist.', 2) + + port = sys.argv[2] if len(sys.argv) > 2 else '1337' + if not port.isnumeric(): + fail('port must be an integer.', 3) + + do_an_internet(certdir, int(port)) diff --git a/test/world_wide_library.py b/test/world_wide_library.py new file mode 100644 index 0000000..fc9e095 --- /dev/null +++ b/test/world_wide_library.py @@ -0,0 +1,15 @@ +# Copyright (C) 2021 jahoti +# Licensing information is collated in the `copyright` file + +""" +Our helpful little stand-in for the Internet +""" + +catalog = { + 'http://gotmyowndoma.in': (302, {'location': 'http://gotmyowndoma.in/index.html'}, b''), + 'http://gotmyowndoma.in/': (302, {'location': 'http://gotmyowndoma.in/index.html'}, b''), + 'http://gotmyowndoma.in/index.html': (200, {}, 'data/pages/gotmyowndomain.html'), + 'https://gotmyowndoma.in': (302, {'location': 'https://gotmyowndoma.in/index.html'}, b''), + 'https://gotmyowndoma.in/': (302, {'location': 'https://gotmyowndoma.in/index.html'}, b''), + 'https://gotmyowndoma.in/index.html': (200, {}, 'data/pages/gotmyowndomain_https.html') +} -- cgit v1.2.3