# SPDX-License-Identifier: AGPL-3.0-or-later """ Our helpful little stand-in for the Internet """ # This file is part of Haketilo. # # Copyright (C) 2021 jahoti <jahoti@tilde.team> # Copyright (C) 2021 Wojtek Kosior <koszko@koszko.org> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as # published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. # # # I, Wojtek Kosior, thereby promise not to sue for violation of this # file's license. Although I request that you do not make use of this code # in a proprietary program, I am not going to enforce this in court. from hashlib import sha256 from pathlib import Path from shutil import rmtree from threading import Lock from uuid import uuid4 import json import functools as ft import operator as op from .misc_constants import here from .unit.utils import * # sample repo data # TODO: instead of having the entire catalog defined here, make it possible to # add catalog items from within individual test files. served_scripts = {} served_scripts_lock = Lock() def start_serving_script(script_text): """ Register given script so that it is served at https://serve.scrip.ts/?sha256=<script's_sha256_sum> Returns the URL at which script will be served. This function lacks thread safety. Might moght consider fixing this if it turns """ sha256sum = sha256(script_text.encode()).digest().hex() served_scripts_lock.acquire() served_scripts[sha256sum] = script_text served_scripts_lock.release() return f'https://serve.scrip.ts/?sha256={sha256sum}' def serve_script(command, get_params, post_params): """ info() callback to pass to request-handling code in server.py. Facilitates serving scripts that have been registered with start_serving_script(). """ served_scripts_lock.acquire() try: script = served_scripts.get(get_params['sha256'][0]) finally: served_scripts_lock.release() if script is None: return 404, {}, b'' return 200, {'Content-Type': 'application/javascript'}, script def dump_scripts(directory=(Path.cwd() / 'injected_scripts')): """ Write all scripts that have been registered with start_serving_script() under the provided directory. If the directory already exists, it is wiped beforehand. If it doesn't exist, it is created. """ directory = Path(directory) rmtree(directory, ignore_errors=True) directory.mkdir(parents=True) served_scripts_lock.acquire() for sha256, script in served_scripts.items(): with open(directory / sha256, 'wt') as file: file.write(script) served_scripts_lock.release() some_data = '{"some": "data"}' # used by handler function of https://counterdoma.in request_counter = 0 def serve_counter(command, get_params, post_params): global request_counter request_counter += 1 return ( 200, {'Cache-Control': 'private, max-age=0, no-store'}, json.dumps({'counter': request_counter}) ) # Mock a Hydrilla repository. make_handler = lambda txt: lambda c, g, p: (200, {}, txt) # Mock files in the repository. sample_contents = [f'Mi povas manĝi vitron, ĝi ne damaĝas min {i}' for i in range(9)] sample_hashes = [sha256(c.encode()).digest().hex() for c in sample_contents] file_url = ft.partial(op.concat, 'https://hydril.la/file/sha256/') sample_files_catalog = dict([(file_url(h), make_handler(c)) for h, c in zip(sample_hashes, sample_contents)]) # Mock resources and mappings in the repository. sample_resource_templates = [] for deps in [(0, 1, 2, 3), (3, 4, 5, 6), (6, 7, 8, 9)]: letters = [chr(ord('a') + i) for i in deps] sample_resource_templates.append({ 'id_suffix': ''.join(letters), 'files_count': deps[0], 'dependencies': [{'identifier': f'resource-{l}'} for l in letters] }) suffixes = [srt['id_suffix'] for srt in sample_resource_templates] sample_resource_templates.append({ 'id_suffix': '-'.join(suffixes), 'files_count': 2, 'dependencies': [{'identifier': f'resource-{suf}'} for suf in suffixes] }) for i in range(10): sample_resource_templates.append({ 'id_suffix': chr(ord('a') + i), 'files_count': i, 'dependencies': [] }) # The one below will generate items with schema still at version 1, so required # mappings will be ignored. sample_resource_templates.append({ 'id_suffix': 'a-w-required-mapping-v1', 'files_count': 1, 'dependencies': [], 'required_mappings': [{'identifier': 'mapping-a'}], 'include_in_query': False }) sample_resource_templates.append({ 'id_suffix': 'a-w-required-mapping-v2', 'files_count': 1, 'dependencies': [], 'required_mappings': [{'identifier': 'mapping-a'}], 'schema_ver': '2', 'include_in_query': False }) sample_resources_catalog = {} sample_mappings_catalog = {} sample_queries = {} for srt in sample_resource_templates: resource = make_sample_resource() resource['identifier'] = f'resource-{srt["id_suffix"]}' resource['long_name'] = resource['identifier'].upper() resource['uuid'] = str(uuid4()) resource['dependencies'] = srt['dependencies'] resource['source_copyright'] = [] resource['scripts'] = [] for i in range(srt['files_count']): file_ref = {'file': f'file_{i}', 'sha256': sample_hashes[i]} resource[('source_copyright', 'scripts')[i & 1]].append(file_ref) resource_versions = [resource['version'], resource['version'].copy()] resource_versions[1][-1] += 1 mapping = make_sample_mapping() mapping['identifier'] = f'mapping-{srt["id_suffix"]}' mapping['long_name'] = mapping['identifier'].upper() mapping['uuid'] = str(uuid4()) mapping['source_copyright'] = resource['source_copyright'] mapping_versions = [mapping['version'], mapping['version'].copy()] mapping_versions[1][-1] += 1 sufs = [srt["id_suffix"], *[l for l in srt["id_suffix"] if l.isalpha()]] patterns = [f'https://example_{suf}.com/*' for suf in set(sufs)] mapping['payloads'] = {} for pat in patterns: mapping['payloads'][pat] = {'identifier': resource['identifier']} if not srt.get('include_in_query', True): continue sample_queries.setdefault(pat.replace('*', 'something'), []).append({ 'identifier': mapping['identifier'], 'long_name': mapping['long_name'], 'version': mapping_versions[1] }) for item in resource, mapping: if 'required_mappings' in srt: item['required_mappings'] = srt['required_mappings'] if 'schema_ver' in srt: item['$schema'] = item['$schema'].replace('1', srt['schema_ver']) for item, versions, catalog in [ (resource, resource_versions, sample_resources_catalog), (mapping, mapping_versions, sample_mappings_catalog) ]: fmt = f'https://hydril.la/{item["type"]}/{item["identifier"]}%s' # Make 2 versions of each item so that we can test updates. for ver in versions: item['version'] = ver for fmt_arg in ('.json', '/' + item_version_string(item)): catalog[fmt % fmt_arg] = make_handler(json.dumps(item)) def serve_query(command, get_params, post_params): response = { '$schema': 'https://hydrilla.koszko.org/schemas/api_query_result-1.schema.json', 'generated_by': { 'name': 'human', 'version': 'sapiens-0.8.15' }, 'mappings': sample_queries[get_params['url'][0]] } return (200, {}, json.dumps(response)) sample_queries_catalog = dict([(f'https://hydril.la/{suf}query', serve_query) for suf in ('', '1/', '2/', '3/', '4/')]) catalog = { 'http://gotmyowndoma.in': (302, {'location': 'http://gotmyowndoma.in/index.html'}, None), 'http://gotmyowndoma.in/': (302, {'location': 'http://gotmyowndoma.in/index.html'}, None), 'http://gotmyowndoma.in/index.html': (200, {}, here / 'data' / 'pages' / 'gotmyowndomain.html'), 'https://gotmyowndoma.in': (302, {'location': 'https://gotmyowndoma.in/index.html'}, None), 'https://gotmyowndoma.in/': (302, {'location': 'https://gotmyowndoma.in/index.html'}, None), 'https://gotmyowndoma.in/index.html': (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'), 'https://gotmyowndoma.in/scripts_to_block_1.html': (200, {}, here / 'data' / 'pages' / 'scripts_to_block_1.html'), 'https://gotmyowndoma.in/scripts_to_block_2.xml': (200, {}, here / 'data' / 'pages' / 'scripts_to_block_2.xml'), 'https://anotherdoma.in/resource/blocked/by/CORS.json': lambda command, get_params, post_params: (200, {}, some_data), 'https://counterdoma.in/': serve_counter, 'https://serve.scrip.ts/': serve_script, 'https://site.with.scripts.block.ed': (302, {'location': 'https://site.with.scripts.block.ed/index.html'}, None), 'https://site.with.scripts.block.ed/': (302, {'location': 'https://site.with.scripts.block.ed/index.html'}, None), 'https://site.with.scripts.block.ed/index.html': (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'), 'https://site.with.scripts.allow.ed': (302, {'location': 'https://site.with.scripts.allow.ed/index.html'}, None), 'https://site.with.scripts.allow.ed/': (302, {'location': 'https://site.with.scripts.allow.ed/index.html'}, None), 'https://site.with.scripts.allow.ed/index.html': (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'), 'https://site.with.paylo.ad': (302, {'location': 'https://site.with.paylo.ad/index.html'}, None), 'https://site.with.paylo.ad/': (302, {'location': 'https://site.with.paylo.ad/index.html'}, None), 'https://site.with.paylo.ad/index.html': (200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'), **sample_files_catalog, **sample_resources_catalog, **sample_mappings_catalog, **sample_queries_catalog }