From 52d12a4fa124daa1595529e3e7008276a7986d95 Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Mon, 13 Jun 2022 11:06:49 +0200 Subject: unfinished partial work --- src/hydrilla/__init__.py | 10 +- src/hydrilla/builder/__init__.py | 7 + src/hydrilla/builder/__main__.py | 9 + src/hydrilla/builder/_version.py | 5 + src/hydrilla/builder/build.py | 485 +++++++++++++++++++++ src/hydrilla/builder/common_errors.py | 65 +++ src/hydrilla/builder/local_apt.py | 432 ++++++++++++++++++ src/hydrilla/builder/piggybacking.py | 117 +++++ src/hydrilla/exceptions.py | 40 ++ src/hydrilla/item_infos.py | 344 +++++++++++++++ src/hydrilla/json_instances.py | 207 +++++++++ src/hydrilla/locales/en_US/LC_MESSAGES/messages.po | 252 +++++++++++ src/hydrilla/locales/pl_PL/LC_MESSAGES/messages.po | 258 +++++++++++ src/hydrilla/mitmproxy_launcher/__main__.py | 11 + src/hydrilla/mitmproxy_launcher/launch.py | 77 ++++ src/hydrilla/pattern_tree.py | 339 ++++++++++++++ src/hydrilla/proxy/__init__.py | 5 + src/hydrilla/proxy/addon.py | 177 ++++++++ src/hydrilla/proxy/flow_handlers.py | 383 ++++++++++++++++ src/hydrilla/proxy/policies.py | 76 ++++ src/hydrilla/proxy/state.py | 73 ++++ src/hydrilla/proxy/store.py | 40 ++ src/hydrilla/py.typed | 5 + src/hydrilla/schemas/1.x | 1 + src/hydrilla/schemas/2.x | 1 + src/hydrilla/server/config.json | 3 - src/hydrilla/server/config.py | 6 +- .../locales/en_US/LC_MESSAGES/hydrilla-messages.po | 151 ------- src/hydrilla/server/serve.py | 406 ++++------------- src/hydrilla/translations.py | 104 +++++ src/hydrilla/url_patterns.py | 181 ++++++++ src/hydrilla/versions.py | 59 +++ 32 files changed, 3848 insertions(+), 481 deletions(-) create mode 100644 src/hydrilla/builder/__init__.py create mode 100644 src/hydrilla/builder/__main__.py create mode 100644 src/hydrilla/builder/_version.py create mode 100644 src/hydrilla/builder/build.py create mode 100644 src/hydrilla/builder/common_errors.py create mode 100644 src/hydrilla/builder/local_apt.py create mode 100644 src/hydrilla/builder/piggybacking.py create mode 100644 src/hydrilla/exceptions.py create mode 100644 src/hydrilla/item_infos.py create mode 100644 src/hydrilla/json_instances.py create mode 100644 src/hydrilla/locales/en_US/LC_MESSAGES/messages.po create mode 100644 src/hydrilla/locales/pl_PL/LC_MESSAGES/messages.po create mode 100644 src/hydrilla/mitmproxy_launcher/__main__.py create mode 100644 src/hydrilla/mitmproxy_launcher/launch.py create mode 100644 src/hydrilla/pattern_tree.py create mode 100644 src/hydrilla/proxy/__init__.py create mode 100644 src/hydrilla/proxy/addon.py create mode 100644 src/hydrilla/proxy/flow_handlers.py create mode 100644 src/hydrilla/proxy/policies.py create mode 100644 src/hydrilla/proxy/state.py create mode 100644 src/hydrilla/proxy/store.py create mode 100644 src/hydrilla/py.typed create mode 160000 src/hydrilla/schemas/1.x create mode 160000 src/hydrilla/schemas/2.x delete mode 100644 src/hydrilla/server/locales/en_US/LC_MESSAGES/hydrilla-messages.po create mode 100644 src/hydrilla/translations.py create mode 100644 src/hydrilla/url_patterns.py create mode 100644 src/hydrilla/versions.py (limited to 'src/hydrilla') diff --git a/src/hydrilla/__init__.py b/src/hydrilla/__init__.py index 6aeb276..d382ead 100644 --- a/src/hydrilla/__init__.py +++ b/src/hydrilla/__init__.py @@ -1,7 +1,5 @@ -# SPDX-License-Identifier: 0BSD +# SPDX-License-Identifier: CC0-1.0 -# Copyright (C) 2013-2020, PyPA - -# https://packaging.python.org/en/latest/guides/packaging-namespace-packages/#pkgutil-style-namespace-packages - -__path__ = __import__('pkgutil').extend_path(__path__, __name__) +# Copyright (C) 2022 Wojtek Kosior +# +# Available under the terms of Creative Commons Zero v1.0 Universal. diff --git a/src/hydrilla/builder/__init__.py b/src/hydrilla/builder/__init__.py new file mode 100644 index 0000000..73dc579 --- /dev/null +++ b/src/hydrilla/builder/__init__.py @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior +# +# Available under the terms of Creative Commons Zero v1.0 Universal. + +from .build import Build diff --git a/src/hydrilla/builder/__main__.py b/src/hydrilla/builder/__main__.py new file mode 100644 index 0000000..87dc9e2 --- /dev/null +++ b/src/hydrilla/builder/__main__.py @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior +# +# Available under the terms of Creative Commons Zero v1.0 Universal. + +from . import build + +build.perform() diff --git a/src/hydrilla/builder/_version.py b/src/hydrilla/builder/_version.py new file mode 100644 index 0000000..2feb153 --- /dev/null +++ b/src/hydrilla/builder/_version.py @@ -0,0 +1,5 @@ +# coding: utf-8 +# file generated by setuptools_scm +# don't change, don't track in version control +version = '1.1b1' +version_tuple = (1, '1b1') diff --git a/src/hydrilla/builder/build.py b/src/hydrilla/builder/build.py new file mode 100644 index 0000000..acc6576 --- /dev/null +++ b/src/hydrilla/builder/build.py @@ -0,0 +1,485 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Building Hydrilla packages. +# +# This file is part of Hydrilla +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +# Enable using with Python 3.7. +from __future__ import annotations + +import json +import re +import zipfile +import subprocess +from pathlib import Path, PurePosixPath +from hashlib import sha256 +from sys import stderr +from contextlib import contextmanager +from tempfile import TemporaryDirectory, TemporaryFile +from typing import Optional, Iterable, Iterator, Union + +import jsonschema # type: ignore +import click + +from .. import _version, json_instances, versions +from ..translations import smart_gettext as _ +from . import local_apt +from .piggybacking import Piggybacked +from .common_errors import * + +here = Path(__file__).resolve().parent + +schemas_root = 'https://hydrilla.koszko.org/schemas' + +generated_by = { + 'name': 'hydrilla.builder', + 'version': _version.version +} + +class ReuseError(SubprocessError): + """ + Exception used to report various problems when calling the REUSE tool. + """ + +def generate_spdx_report(root: Path) -> bytes: + """ + Use REUSE tool to generate an SPDX report for sources under 'root' and + return the report's contents as 'bytes'. + + In case the directory tree under 'root' does not constitute a + REUSE-compliant package, as exception is raised with linting report + included in it. + + In case the reuse tool is not installed, an exception is also raised. + """ + for command in [ + ['reuse', '--root', str(root), 'lint'], + ['reuse', '--root', str(root), 'spdx'] + ]: + try: + cp = subprocess.run(command, capture_output=True, text=True) + except FileNotFoundError: + msg = _('couldnt_execute_{}_is_it_installed').format('reuse') + raise ReuseError(msg) + + if cp.returncode != 0: + msg = _('command_{}_failed').format(' '.join(command)) + raise ReuseError(msg, cp) + + return cp.stdout.encode() + +class FileRef: + """Represent reference to a file in the package.""" + def __init__(self, path: PurePosixPath, contents: bytes) -> None: + """Initialize FileRef.""" + self.include_in_distribution = False + self.include_in_source_archive = True + self.path = path + self.contents = contents + + self.contents_hash = sha256(contents).digest().hex() + + def make_ref_dict(self) -> dict[str, str]: + """ + Represent the file reference through a dict that can be included in JSON + defintions. + """ + return { + 'file': str(self.path), + 'sha256': self.contents_hash + } + +@contextmanager +def piggybacked_system(piggyback_def: Optional[dict], + piggyback_files: Optional[Path]) \ + -> Iterator[Piggybacked]: + """ + Resolve resources from a foreign software packaging system. Optionally, use + package files (.deb's, etc.) from a specified directory instead of resolving + and downloading them. + """ + if piggyback_def is None: + yield Piggybacked() + else: + # apt is the only supported system right now + assert piggyback_def['system'] == 'apt' + + with local_apt.piggybacked_system(piggyback_def, piggyback_files) \ + as piggybacked: + yield piggybacked + +class Build: + """ + Build a Hydrilla package. + """ + def __init__(self, srcdir: Path, index_json_path: Path, + piggyback_files: Optional[Path]=None): + """ + Initialize a build. All files to be included in a distribution package + are loaded into memory, all data gets validated and all necessary + computations (e.g. preparing of hashes) are performed. + """ + self.srcdir = srcdir.resolve() + self.piggyback_files = piggyback_files + if piggyback_files is None: + piggyback_default_path = \ + srcdir.parent / f'{srcdir.name}.foreign-packages' + if piggyback_default_path.exists(): + self.piggyback_files = piggyback_default_path + + self.files_by_path: dict[PurePosixPath, FileRef] = {} + self.resource_list: list[dict] = [] + self.mapping_list: list[dict] = [] + + if not index_json_path.is_absolute(): + index_json_path = (self.srcdir / index_json_path) + + index_obj = json_instances.read_instance(index_json_path) + schema_fmt = 'package_source-{}.schema.json' + major = json_instances.validate_instance(index_obj, schema_fmt) + + index_desired_path = PurePosixPath('index.json') + self.files_by_path[index_desired_path] = \ + FileRef(index_desired_path, index_json_path.read_bytes()) + + self._process_index_json(index_obj, major) + + def _process_file(self, filename: Union[str, PurePosixPath], + piggybacked: Piggybacked, + include_in_distribution: bool=True): + """ + Resolve 'filename' relative to srcdir, load it to memory (if not loaded + before), compute its hash and store its information in + 'self.files_by_path'. + + 'filename' shall represent a relative path withing package directory. + + if 'include_in_distribution' is True it shall cause the file to not only + be included in the source package's zipfile, but also written as one of + built package's files. + + For each file an attempt is made to resolve it using 'piggybacked' + object. If a file is found and pulled from foreign software packaging + system this way, it gets automatically excluded from inclusion in + Hydrilla source package's zipfile. + + Return file's reference object that can be included in JSON defintions + of various kinds. + """ + include_in_source_archive = True + + desired_path = PurePosixPath(filename) + if '..' in desired_path.parts: + msg = _('path_contains_double_dot_{}').format(filename) + raise FileReferenceError(msg) + + path = piggybacked.resolve_file(desired_path) + if path is None: + path = (self.srcdir / desired_path).resolve() + if not path.is_relative_to(self.srcdir): + raise FileReferenceError(_('loading_{}_outside_package_dir') + .format(filename)) + + if str(path.relative_to(self.srcdir)) == 'index.json': + raise FileReferenceError(_('loading_reserved_index_json')) + else: + include_in_source_archive = False + + file_ref = self.files_by_path.get(desired_path) + if file_ref is None: + if not path.is_file(): + msg = _('referenced_file_{}_missing').format(desired_path) + raise FileReferenceError(msg) + + file_ref = FileRef(desired_path, path.read_bytes()) + self.files_by_path[desired_path] = file_ref + + if include_in_distribution: + file_ref.include_in_distribution = True + + if not include_in_source_archive: + file_ref.include_in_source_archive = False + + return file_ref.make_ref_dict() + + def _prepare_source_package_zip(self, source_name: str, + piggybacked: Piggybacked) -> str: + """ + Create and store in memory a .zip archive containing files needed to + build this source package. + + 'src_dir_name' shall not contain any slashes ('/'). + + Return zipfile's sha256 sum's hexstring. + """ + tf = TemporaryFile() + source_dir_path = PurePosixPath(source_name) + piggybacked_dir_path = PurePosixPath(f'{source_name}.foreign-packages') + + with zipfile.ZipFile(tf, 'w') as zf: + for file_ref in self.files_by_path.values(): + if file_ref.include_in_source_archive: + zf.writestr(str(source_dir_path / file_ref.path), + file_ref.contents) + + for desired_path, real_path in piggybacked.archive_files(): + zf.writestr(str(piggybacked_dir_path / desired_path), + real_path.read_bytes()) + + tf.seek(0) + self.source_zip_contents = tf.read() + + return sha256(self.source_zip_contents).digest().hex() + + def _process_item(self, as_what: str, item_def: dict, + piggybacked: Piggybacked): + """ + Process 'item_def' as definition of a resource or mapping (determined by + 'as_what' param) and store in memory its processed form and files used + by it. + + Return a minimal item reference suitable for using in source + description. + """ + resulting_schema_version = [1] + + copy_props = ['identifier', 'long_name', 'description', + *filter(lambda p: p in item_def, ('comment', 'uuid'))] + + new_item_obj: dict = {} + + if as_what == 'resource': + item_list = self.resource_list + + copy_props.append('revision') + + script_file_refs = [self._process_file(f['file'], piggybacked) + for f in item_def.get('scripts', [])] + + deps = [{'identifier': res_ref['identifier']} + for res_ref in item_def.get('dependencies', [])] + + new_item_obj['dependencies'] = \ + [*piggybacked.resource_must_depend, *deps] + new_item_obj['scripts'] = script_file_refs + else: + item_list = self.mapping_list + + payloads = {} + for pat, res_ref in item_def.get('payloads', {}).items(): + payloads[pat] = {'identifier': res_ref['identifier']} + + new_item_obj['payloads'] = payloads + + new_item_obj['version'] = \ + versions.normalize_version(item_def['version']) + + if as_what == 'mapping' and item_def['type'] == "mapping_and_resource": + new_item_obj['version'].append(item_def['revision']) + + if self.source_schema_ver >= [2]: + # handle 'required_mappings' field + required = [{'identifier': map_ref['identifier']} + for map_ref in item_def.get('required_mappings', [])] + if required: + resulting_schema_version = max(resulting_schema_version, [2]) + new_item_obj['required_mappings'] = required + + # handle 'permissions' field + permissions = item_def.get('permissions', {}) + processed_permissions = {} + + if permissions.get('cors_bypass'): + processed_permissions['cors_bypass'] = True + if permissions.get('eval'): + processed_permissions['eval'] = True + + if processed_permissions: + new_item_obj['permissions'] = processed_permissions + resulting_schema_version = max(resulting_schema_version, [2]) + + # handle '{min,max}_haketilo_version' fields + for minmax, default in ('min', [1]), ('max', [65536]): + constraint = item_def.get(f'{minmax}_haketilo_version') + if constraint in (None, default): + continue + + copy_props.append(f'{minmax}_haketilo_version') + resulting_schema_version = max(resulting_schema_version, [2]) + + new_item_obj.update((p, item_def[p]) for p in copy_props) + + new_item_obj['$schema'] = ''.join([ + schemas_root, + f'/api_{as_what}_description', + '-', + versions.version_string(resulting_schema_version), + '.schema.json' + ]) + new_item_obj['type'] = as_what + new_item_obj['source_copyright'] = self.copyright_file_refs + new_item_obj['source_name'] = self.source_name + new_item_obj['generated_by'] = generated_by + + item_list.append(new_item_obj) + + props_in_ref = ('type', 'identifier', 'version', 'long_name') + return dict([(prop, new_item_obj[prop]) for prop in props_in_ref]) + + def _process_index_json(self, index_obj: dict, + major_schema_version: int) -> None: + """ + Process 'index_obj' as contents of source package's index.json and store + in memory this source package's zipfile as well as package's individual + files and computed definitions of the source package and items defined + in it. + """ + self.source_schema_ver = \ + versions.normalize_version(get_schema_version(index_obj)) + + out_schema = f'{schemas_root}/api_source_description-1.schema.json' + + self.source_name = index_obj['source_name'] + + generate_spdx = index_obj.get('reuse_generate_spdx_report', False) + if generate_spdx: + contents = generate_spdx_report(self.srcdir) + spdx_path = PurePosixPath('report.spdx') + spdx_ref = FileRef(spdx_path, contents) + + spdx_ref.include_in_source_archive = False + self.files_by_path[spdx_path] = spdx_ref + + piggyback_def = None + if self.source_schema_ver >= [2] and 'piggyback_on' in index_obj: + piggyback_def = index_obj['piggyback_on'] + + with piggybacked_system(piggyback_def, self.piggyback_files) \ + as piggybacked: + copyright_to_process = [ + *(file_ref['file'] for file_ref in index_obj['copyright']), + *piggybacked.package_license_files + ] + self.copyright_file_refs = [self._process_file(f, piggybacked) + for f in copyright_to_process] + + if generate_spdx and not spdx_ref.include_in_distribution: + raise FileReferenceError(_('report_spdx_not_in_copyright_list')) + + item_refs = [] + for item_def in index_obj['definitions']: + if 'mapping' in item_def['type']: + ref = self._process_item('mapping', item_def, piggybacked) + item_refs.append(ref) + if 'resource' in item_def['type']: + ref = self._process_item('resource', item_def, piggybacked) + item_refs.append(ref) + + for file_ref in index_obj.get('additional_files', []): + self._process_file(file_ref['file'], piggybacked, + include_in_distribution=False) + + zipfile_sha256 = self._prepare_source_package_zip\ + (self.source_name, piggybacked) + + source_archives_obj = {'zip' : {'sha256': zipfile_sha256}} + + self.source_description = { + '$schema': out_schema, + 'source_name': self.source_name, + 'source_copyright': self.copyright_file_refs, + 'upstream_url': index_obj['upstream_url'], + 'definitions': item_refs, + 'source_archives': source_archives_obj, + 'generated_by': generated_by + } + + if 'comment' in index_obj: + self.source_description['comment'] = index_obj['comment'] + + def write_source_package_zip(self, dstpath: Path): + """ + Create a .zip archive containing files needed to build this source + package and write it at 'dstpath'. + """ + with open(dstpath, 'wb') as output: + output.write(self.source_zip_contents) + + def write_package_files(self, dstpath: Path): + """Write package files under 'dstpath' for distribution.""" + file_dir_path = (dstpath / 'file' / 'sha256').resolve() + file_dir_path.mkdir(parents=True, exist_ok=True) + + for file_ref in self.files_by_path.values(): + if file_ref.include_in_distribution: + file_path = file_dir_path / file_ref.contents_hash + file_path.write_bytes(file_ref.contents) + + source_dir_path = (dstpath / 'source').resolve() + source_dir_path.mkdir(parents=True, exist_ok=True) + source_name = self.source_description["source_name"] + + with open(source_dir_path / f'{source_name}.json', 'wt') as out_str: + json.dump(self.source_description, out_str) + + with open(source_dir_path / f'{source_name}.zip', 'wb') as out_bin: + out_bin.write(self.source_zip_contents) + + for item_type, item_list in [ + ('resource', self.resource_list), + ('mapping', self.mapping_list) + ]: + item_type_dir_path = (dstpath / item_type).resolve() + + for item_def in item_list: + item_dir_path = item_type_dir_path / item_def['identifier'] + item_dir_path.mkdir(parents=True, exist_ok=True) + + version = '.'.join([str(n) for n in item_def['version']]) + with open(item_dir_path / version, 'wt') as output: + json.dump(item_def, output) + +dir_type = click.Path(exists=True, file_okay=False, resolve_path=True) + +@click.command(help=_('build_package_from_srcdir_to_dstdir')) +@click.option('-s', '--srcdir', default='./', type=dir_type, show_default=True, + help=_('source_directory_to_build_from')) +@click.option('-i', '--index-json', default='index.json', type=click.Path(), + help=_('path_instead_of_index_json')) +@click.option('-p', '--piggyback-files', type=click.Path(), + help=_('path_instead_for_piggyback_files')) +@click.option('-d', '--dstdir', type=dir_type, required=True, + help=_('built_package_files_destination')) +@click.version_option(version=_version.version, prog_name='Hydrilla builder', + message=_('%(prog)s_%(version)s_license'), + help=_('version_printing')) +def perform(srcdir, index_json, piggyback_files, dstdir): + """ + Execute Hydrilla builder to turn source package into a distributable one. + + This command is meant to be the entry point of hydrilla-builder command + exported by this package. + """ + build = Build(Path(srcdir), Path(index_json), + piggyback_files and Path(piggyback_files)) + build.write_package_files(Path(dstdir)) diff --git a/src/hydrilla/builder/common_errors.py b/src/hydrilla/builder/common_errors.py new file mode 100644 index 0000000..ed4d0d2 --- /dev/null +++ b/src/hydrilla/builder/common_errors.py @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Error classes. +# +# This file is part of Hydrilla +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +This module defines error types for use in other parts of Hydrilla builder. +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +from pathlib import Path +from typing import Optional +from subprocess import CompletedProcess as CP + +from ..translations import smart_gettext as _ + +class DistroError(Exception): + """ + Exception used to report problems when resolving an OS distribution. + """ + +class FileReferenceError(Exception): + """ + Exception used to report various problems concerning files referenced from + source package. + """ + +class SubprocessError(Exception): + """ + Exception used to report problems related to execution of external + processes, includes. various problems when calling apt-* and dpkg-* + commands. + """ + def __init__(self, msg: str, cp: Optional[CP]=None) -> None: + """Initialize this SubprocessError""" + if cp and cp.stdout: + msg = '\n\n'.join([msg, _('STDOUT_OUTPUT_heading'), cp.stdout]) + + if cp and cp.stderr: + msg = '\n\n'.join([msg, _('STDERR_OUTPUT_heading'), cp.stderr]) + + super().__init__(msg) diff --git a/src/hydrilla/builder/local_apt.py b/src/hydrilla/builder/local_apt.py new file mode 100644 index 0000000..bdfc76f --- /dev/null +++ b/src/hydrilla/builder/local_apt.py @@ -0,0 +1,432 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Using a local APT. +# +# This file is part of Hydrilla +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +# Enable using with Python 3.7. +from __future__ import annotations + +import zipfile +import shutil +import re +import subprocess +CP = subprocess.CompletedProcess +from pathlib import Path, PurePosixPath +from tempfile import TemporaryDirectory, NamedTemporaryFile +from hashlib import sha256 +from urllib.parse import unquote +from contextlib import contextmanager +from typing import Optional, Iterable, Iterator + +from ..translations import smart_gettext as _ +from .piggybacking import Piggybacked +from .common_errors import * + +here = Path(__file__).resolve().parent + +""" +Default cache directory to save APT configurations and downloaded GPG keys in. +""" +default_apt_cache_dir = Path.home() / '.cache' / 'hydrilla' / 'builder' / 'apt' + +""" +Default keyserver to use. +""" +default_keyserver = 'hkps://keyserver.ubuntu.com:443' + +""" +Default keys to download when using a local APT. +""" +default_keys = [ + # Trisquel + 'E6C27099CA21965B734AEA31B4EFB9F38D8AEBF1', + '60364C9869F92450421F0C22B138CA450C05112F', + # Ubuntu + '630239CC130E1A7FD81A27B140976EAF437D05B5', + '790BC7277767219C42C86F933B4FE6ACC0B21F32', + 'F6ECB3762474EDA9D21B7022871920D1991BC93C', + # Debian + '6D33866EDD8FFA41C0143AEDDCC9EFBF77E11517', + '80D15823B7FD1561F9F7BCDDDC30D7C23CBBABEE', + 'AC530D520F2F3269F5E98313A48449044AAD5C5D' +] + +"""sources.list file contents for known distros.""" +default_lists = { + 'nabia': [f'{type} http://archive.trisquel.info/trisquel/ nabia{suf} main' + for type in ('deb', 'deb-src') + for suf in ('', '-updates', '-security')] +} + +class GpgError(Exception): + """ + Exception used to report various problems when calling GPG. + """ + +class AptError(SubprocessError): + """ + Exception used to report various problems when calling apt-* and dpkg-* + commands. + """ + +def run(command, **kwargs): + """A wrapped around subprocess.run that sets some default options.""" + return subprocess.run(command, **kwargs, env={'LANG': 'en_US'}, + capture_output=True, text=True) + +class Apt: + """ + This class represents an APT instance and can be used to call apt-get + commands with it. + """ + def __init__(self, apt_conf: str) -> None: + """Initialize this Apt object.""" + self.apt_conf = apt_conf + + def get(self, *args: str, **kwargs) -> CP: + """ + Run apt-get with the specified arguments and raise a meaningful AptError + when something goes wrong. + """ + command = ['apt-get', '-c', self.apt_conf, *args] + try: + cp = run(command, **kwargs) + except FileNotFoundError: + msg = _('couldnt_execute_{}_is_it_installed').format('apt-get') + raise AptError(msg) + + if cp.returncode != 0: + msg = _('command_{}_failed').format(' '.join(command)) + raise AptError(msg, cp) + + return cp + +def cache_dir() -> Path: + """ + Return the directory used to cache data (APT configurations, keyrings) to + speed up repeated operations. + + This function first ensures the directory exists. + """ + default_apt_cache_dir.mkdir(parents=True, exist_ok=True) + return default_apt_cache_dir + +class SourcesList: + """Representation of apt's sources.list contents.""" + def __init__(self, list: list[str]=[], + codename: Optional[str]=None) -> None: + """Initialize this SourcesList.""" + self.codename = None + self.list = [*list] + self.has_extra_entries = bool(self.list) + + if codename is not None: + if codename not in default_lists: + raise DistroError(_('distro_{}_unknown').format(codename)) + + self.codename = codename + self.list.extend(default_lists[codename]) + + def identity(self) -> str: + """ + Produce a string that uniquely identifies this sources.list contents. + """ + if self.codename and not self.has_extra_entries: + return self.codename + + return sha256('\n'.join(sorted(self.list)).encode()).digest().hex() + +def apt_conf(directory: Path) -> str: + """ + Given local APT's directory, produce a configuration suitable for running + APT there. + + 'directory' must not contain any special characters including quotes and + spaces. + """ + return f''' +Architecture "amd64"; +Dir "{directory}"; +Dir::State "{directory}/var/lib/apt"; +Dir::State::status "{directory}/var/lib/dpkg/status"; +Dir::Etc::SourceList "{directory}/etc/apt.sources.list"; +Dir::Etc::SourceParts ""; +Dir::Cache "{directory}/var/cache/apt"; +pkgCacheGen::Essential "none"; +Dir::Etc::Trusted "{directory}/etc/trusted.gpg"; +''' + +def apt_keyring(keys: list[str]) -> bytes: + """ + Download the requested keys if necessary and export them as a keyring + suitable for passing to APT. + + The keyring is returned as a bytes value that should be written to a file. + """ + try: + from gnupg import GPG # type: ignore + except ModuleNotFoundError: + raise GpgError(_('couldnt_import_{}_is_it_installed').format('gnupg')) + + gpg = GPG(keyring=str(cache_dir() / 'master_keyring.gpg')) + for key in keys: + if gpg.list_keys(keys=[key]) != []: + continue + + if gpg.recv_keys(default_keyserver, key).imported == 0: + raise GpgError(_('gpg_couldnt_recv_key_{}').format(key)) + + return gpg.export_keys(keys, armor=False, minimal=True) + +def cache_apt_root(apt_root: Path, destination_zip: Path) -> None: + """ + Zip an APT root directory for later use and move the zipfile to the + requested destination. + """ + temporary_zip_path = None + try: + tmpfile = NamedTemporaryFile(suffix='.zip', prefix='tmp_', + dir=cache_dir(), delete=False) + temporary_zip_path = Path(tmpfile.name) + + to_skip = {Path('etc') / 'apt.conf', Path('etc') / 'trusted.gpg'} + + with zipfile.ZipFile(tmpfile, 'w') as zf: + for member in apt_root.rglob('*'): + relative = member.relative_to(apt_root) + if relative not in to_skip: + # This call will also properly add empty folders to zip file + zf.write(member, relative, zipfile.ZIP_DEFLATED) + + shutil.move(temporary_zip_path, destination_zip) + finally: + if temporary_zip_path is not None and temporary_zip_path.exists(): + temporary_zip_path.unlink() + +def setup_local_apt(directory: Path, list: SourcesList, keys: list[str]) -> Apt: + """ + Create files and directories necessary for running APT without root rights + inside 'directory'. + + 'directory' must not contain any special characters including quotes and + spaces and must be empty. + + Return an Apt object that can be used to call apt-get commands. + """ + apt_root = directory / 'apt_root' + + conf_text = apt_conf(apt_root) + keyring_bytes = apt_keyring(keys) + + apt_zipfile = cache_dir() / f'apt_{list.identity()}.zip' + if apt_zipfile.exists(): + with zipfile.ZipFile(apt_zipfile) as zf: + zf.extractall(apt_root) + + for to_create in ( + apt_root / 'var' / 'lib' / 'apt' / 'partial', + apt_root / 'var' / 'lib' / 'apt' / 'lists', + apt_root / 'var' / 'cache' / 'apt' / 'archives' / 'partial', + apt_root / 'etc' / 'apt' / 'preferences.d', + apt_root / 'var' / 'lib' / 'dpkg', + apt_root / 'var' / 'log' / 'apt' + ): + to_create.mkdir(parents=True, exist_ok=True) + + conf_path = apt_root / 'etc' / 'apt.conf' + trusted_path = apt_root / 'etc' / 'trusted.gpg' + status_path = apt_root / 'var' / 'lib' / 'dpkg' / 'status' + list_path = apt_root / 'etc' / 'apt.sources.list' + + conf_path.write_text(conf_text) + trusted_path.write_bytes(keyring_bytes) + status_path.touch() + list_path.write_text('\n'.join(list.list)) + + apt = Apt(str(conf_path)) + apt.get('update') + + cache_apt_root(apt_root, apt_zipfile) + + return apt + +@contextmanager +def local_apt(list: SourcesList, keys: list[str]) -> Iterator[Apt]: + """ + Create a temporary directory with proper local APT configuration in it. + Yield an Apt object that can be used to issue apt-get commands. + + This function returns a context manager that will remove the directory on + close. + """ + with TemporaryDirectory() as td_str: + td = Path(td_str) + yield setup_local_apt(td, list, keys) + +def download_apt_packages(list: SourcesList, keys: list[str], + packages: list[str], destination_dir: Path, + with_deps: bool) -> list[str]: + """ + Set up a local APT, update it using the specified sources.list configuration + and use it to download the specified packages. + + This function downloads .deb files of packages matching the amd64 + architecture (which includes packages with architecture 'all') as well as + all their corresponding source package files and (if requested) the debs + and source files of all their declared dependencies. + + Return value is a list of names of all downloaded files. + """ + install_line_regex = re.compile(r'^Inst (?P\S+) \((?P\S+) ') + + with local_apt(list, keys) as apt: + if with_deps: + cp = apt.get('install', '--yes', '--just-print', *packages) + + lines = cp.stdout.split('\n') + matches = [install_line_regex.match(l) for l in lines] + packages = [f'{m.group("name")}={m.group("version")}' + for m in matches if m] + + if not packages: + raise AptError(_('apt_install_output_not_understood'), cp) + + # Download .debs to indirectly to destination_dir by first placing them + # in a temporary subdirectory. + with TemporaryDirectory(dir=destination_dir) as td_str: + td = Path(td_str) + cp = apt.get('download', *packages, cwd=td) + + deb_name_regex = re.compile( + r''' + ^ + (?P[^_]+) + _ + (?P[^_]+) + _ + .+ # architecture (or 'all') + \.deb + $ + ''', + re.VERBOSE) + + names_vers = [] + downloaded = [] + for deb_file in td.iterdir(): + match = deb_name_regex.match(deb_file.name) + if match is None: + msg = _('apt_download_gave_bad_filename_{}')\ + .format(deb_file.name) + raise AptError(msg, cp) + + names_vers.append(( + unquote(match.group('name')), + unquote(match.group('ver')) + )) + downloaded.append(deb_file.name) + + apt.get('source', '--download-only', + *[f'{n}={v}' for n, v in names_vers], cwd=td) + + for source_file in td.iterdir(): + if source_file.name in downloaded: + continue + + downloaded.append(source_file.name) + + for filename in downloaded: + shutil.move(td / filename, destination_dir / filename) + + return downloaded + +@contextmanager +def piggybacked_system(piggyback_def: dict, foreign_packages: Optional[Path]) \ + -> Iterator[Piggybacked]: + """ + Resolve resources from APT. Optionally, use package files (.deb's, etc.) + from a specified directory instead of resolving and downloading them. + + The directories and files created for the yielded Piggybacked object shall + be deleted when this context manager gets closed. + """ + assert piggyback_def['system'] == 'apt' + + with TemporaryDirectory() as td_str: + td = Path(td_str) + root = td / 'root' + root.mkdir() + + if foreign_packages is None: + archives = td / 'archives' + archives.mkdir() + else: + archives = foreign_packages / 'apt' + archives.mkdir(exist_ok=True) + + if [*archives.glob('*.deb')] == []: + sources_list = SourcesList(piggyback_def.get('sources_list', []), + piggyback_def.get('distribution')) + packages = piggyback_def['packages'] + with_deps = piggyback_def['dependencies'] + pgp_keys = [ + *default_keys, + *piggyback_def.get('trusted_keys', []) + ] + + download_apt_packages( + list=sources_list, + keys=pgp_keys, + packages=packages, + destination_dir=archives, + with_deps=with_deps + ) + + for deb in archives.glob('*.deb'): + command = ['dpkg-deb', '-x', str(deb), str(root)] + try: + cp = run(command) + except FileNotFoundError: + msg = _('couldnt_execute_{}_is_it_installed'.format('dpkg-deb')) + raise AptError(msg) + + if cp.returncode != 0: + msg = _('command_{}_failed').format(' '.join(command)) + raise AptError(msg, cp) + + docs_dir = root / 'usr' / 'share' / 'doc' + copyright_paths = [p / 'copyright' for p in docs_dir.iterdir()] \ + if docs_dir.exists() else [] + copyright_pure_paths = [PurePosixPath('.apt-root') / p.relative_to(root) + for p in copyright_paths if p.exists()] + + standard_depends = piggyback_def.get('depend_on_base_packages', True) + must_depend = [{'identifier': 'apt-common-licenses'}] \ + if standard_depends else [] + + yield Piggybacked( + archives={'apt': archives}, + roots={'.apt-root': root}, + package_license_files=copyright_pure_paths, + resource_must_depend=must_depend + ) diff --git a/src/hydrilla/builder/piggybacking.py b/src/hydrilla/builder/piggybacking.py new file mode 100644 index 0000000..5813509 --- /dev/null +++ b/src/hydrilla/builder/piggybacking.py @@ -0,0 +1,117 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Handling of software packaged for other distribution systems. +# +# This file is part of Hydrilla +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +This module contains definitions that may be reused by multiple piggybacked +software system backends. +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +from pathlib import Path, PurePosixPath +from typing import Optional, Iterable + +from ..translations import smart_gettext as _ +from .common_errors import * + +here = Path(__file__).resolve().parent + +class Piggybacked: + """ + Store information about foreign resources in use. + + Public attributes: + 'resource_must_depend' (read-only) + 'package_license_files' (read-only) + """ + def __init__(self, archives: dict[str, Path]={}, roots: dict[str, Path]={}, + package_license_files: list[PurePosixPath]=[], + resource_must_depend: list[dict]=[]): + """ + Initialize this Piggybacked object. + + 'archives' maps piggybacked system names to directories that contain + package(s)' archive files. An 'archives' object may look like + {'apt': PosixPath('/path/to/dir/with/debs/and/tarballs')}. + + 'roots' associates directory names to be virtually inserted under + Hydrilla source package directory with paths to real filesystem + directories that hold their desired contents, i.e. unpacked foreign + packages. + + 'package_license_files' lists paths to license files that should be + included with the Haketilo package that will be produced. The paths are + to be resolved using 'roots' dictionary. + + 'resource_must_depend' lists names of Haketilo packages that the + produced resources will additionally depend on. This is meant to help + distribute common licenses with a separate Haketilo package. + """ + self.archives = archives + self.roots = roots + self.package_license_files = package_license_files + self.resource_must_depend = resource_must_depend + + def resolve_file(self, file_ref_name: PurePosixPath) -> Optional[Path]: + """ + 'file_ref_name' is a path as may appear in an index.json file. Check if + the file belongs to one of the roots we have and return either a path + to the relevant file under this root or None. + + It is not being checked whether the file actually exists in the + filesystem. + """ + parts = file_ref_name.parts + if not parts: + return None + + root_path = self.roots.get(parts[0]) + if root_path is None: + return None + + path = root_path + + for part in parts[1:]: + path = path / part + + path = path.resolve() + + if not path.is_relative_to(root_path): + raise FileReferenceError(_('loading_{}_outside_piggybacked_dir') + .format(file_ref_name)) + + return path + + def archive_files(self) -> Iterable[tuple[PurePosixPath, Path]]: + """ + Yield all archive files in use. Each yielded tuple holds file's desired + path relative to the piggybacked archives directory to be created and + its current real path. + """ + for system, real_dir in self.archives.items(): + for path in real_dir.rglob('*'): + yield PurePosixPath(system) / path.relative_to(real_dir), path diff --git a/src/hydrilla/exceptions.py b/src/hydrilla/exceptions.py new file mode 100644 index 0000000..112d98c --- /dev/null +++ b/src/hydrilla/exceptions.py @@ -0,0 +1,40 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Custom exceptions and logging. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2021, 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +This module contains utilities for reading and validation of JSON instances. +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +class HaketiloException(Exception): + """ + Type used for exceptions generated by Haketilo code. Instances of this type + are expected to have their error messages localized. + can + """ + pass diff --git a/src/hydrilla/item_infos.py b/src/hydrilla/item_infos.py new file mode 100644 index 0000000..c366ab5 --- /dev/null +++ b/src/hydrilla/item_infos.py @@ -0,0 +1,344 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Reading resources, mappings and other JSON documents from the filesystem. +# +# This file is part of Hydrilla&Haketilo +# +# Copyright (C) 2021, 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +..... +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +import typing as t +import dataclasses as dc + +from pathlib import Path, PurePath + +from immutables import Map, MapMutation + +from . import versions, json_instances +from .url_patterns import parse_pattern, ParsedUrl +from .exceptions import HaketiloException +from .translations import smart_gettext as _ + +VerTuple = t.Tuple[int, ...] + +@dc.dataclass(frozen=True, unsafe_hash=True) +class ItemRef: + """....""" + identifier: str + +RefObjs = t.Sequence[t.Mapping[str, t.Any]] + +def make_item_refs_seq(ref_objs: RefObjs) -> tuple[ItemRef, ...]: + """....""" + return tuple(ItemRef(ref['identifier']) for ref in ref_objs) + +def make_required_mappings(refs_objs: t.Any, schema_compat: int) \ + -> tuple[ItemRef, ...]: + """....""" + if schema_compat < 2: + return () + + return make_item_refs_seq(refs_objs) + +@dc.dataclass(frozen=True, unsafe_hash=True) +class FileRef: + """....""" + name: str + sha256: str + +def make_file_refs_seq(ref_objs: RefObjs) -> tuple[FileRef, ...]: + """....""" + return tuple(FileRef(ref['file'], ref['sha256']) for ref in ref_objs) + +@dc.dataclass(frozen=True, unsafe_hash=True) +class GeneratedBy: + """....""" + name: str + version: t.Optional[str] + + @staticmethod + def make(generated_obj: t.Optional[t.Mapping[str, t.Any]]) -> \ + t.Optional['GeneratedBy']: + """....""" + if generated_obj is None: + return None + + return GeneratedBy( + name = generated_obj['name'], + version = generated_obj.get('version') + ) + +@dc.dataclass(frozen=True, unsafe_hash=True) +class ItemInfoBase: + """....""" + repository: str # repository used in __hash__() + source_name: str = dc.field(hash=False) + source_copyright: tuple[FileRef, ...] = dc.field(hash=False) + version: VerTuple # version used in __hash__() + identifier: str # identifier used in __hash__() + uuid: t.Optional[str] = dc.field(hash=False) + long_name: str = dc.field(hash=False) + required_mappings: tuple[ItemRef, ...] = dc.field(hash=False) + generated_by: t.Optional[GeneratedBy] = dc.field(hash=False) + + def path_relative_to_type(self) -> str: + """ + Get a relative path to this item's JSON definition with respect to + directory containing items of this type. + """ + return f'{self.identifier}/{versions.version_string(self.version)}' + + def path(self) -> str: + """ + Get a relative path to this item's JSON definition with respect to + malcontent directory containing loadable items. + """ + return f'{self.type_name}/{self.path_relative_to_type()}' + + @property + def versioned_identifier(self): + """....""" + return f'{self.identifier}-{versions.version_string(self.version)}' + + @staticmethod + def _get_base_init_kwargs( + item_obj: t.Mapping[str, t.Any], + schema_compat: int, + repository: str + ) -> t.Mapping[str, t.Any]: + """....""" + source_copyright = make_file_refs_seq(item_obj['source_copyright']) + + version = versions.normalize_version(item_obj['version']) + + required_mappings = make_required_mappings( + item_obj.get('required_mappings', []), + schema_compat + ) + + generated_by = GeneratedBy.make(item_obj.get('generated_by')) + + return Map( + repository = repository, + source_name = item_obj['source_name'], + source_copyright = source_copyright, + version = version, + identifier = item_obj['identifier'], + uuid = item_obj.get('uuid'), + long_name = item_obj['long_name'], + required_mappings = required_mappings, + generated_by = generated_by + ) + + # class property + type_name = '!INVALID!' + +InstanceOrPath = t.Union[Path, str, dict[str, t.Any]] + +@dc.dataclass(frozen=True, unsafe_hash=True) +class ResourceInfo(ItemInfoBase): + """....""" + revision: int = dc.field(hash=False) + dependencies: tuple[ItemRef, ...] = dc.field(hash=False) + scripts: tuple[FileRef, ...] = dc.field(hash=False) + + @property + def versioned_identifier(self): + """....""" + return f'{super().versioned_identifier()}-{self.revision}' + + @staticmethod + def make( + item_obj: t.Mapping[str, t.Any], + schema_compat: int, + repository: str + ) -> 'ResourceInfo': + """....""" + base_init_kwargs = ItemInfoBase._get_base_init_kwargs( + item_obj, + schema_compat, + repository + ) + + return ResourceInfo( + **base_init_kwargs, + + revision = item_obj['revision'], + dependencies = make_item_refs_seq(item_obj.get('dependencies', [])), + scripts = make_file_refs_seq(item_obj.get('scripts', [])), + ) + + @staticmethod + def load(instance_or_path: 'InstanceOrPath', repository: str) \ + -> 'ResourceInfo': + """....""" + return _load_item_info(ResourceInfo, instance_or_path, repository) + + # class property + type_name = 'resource' + +def make_payloads(payloads_obj: t.Mapping[str, t.Any]) \ + -> t.Mapping[ParsedUrl, ItemRef]: + """....""" + mapping: list[tuple[ParsedUrl, ItemRef]] = [] + + for pattern, ref_obj in payloads_obj.items(): + ref = ItemRef(ref_obj['identifier']) + mapping.extend((parsed, ref) for parsed in parse_pattern(pattern)) + + return Map(mapping) + +@dc.dataclass(frozen=True, unsafe_hash=True) +class MappingInfo(ItemInfoBase): + """....""" + payloads: t.Mapping[ParsedUrl, ItemRef] = dc.field(hash=False) + + @staticmethod + def make( + item_obj: t.Mapping[str, t.Any], + schema_compat: int, + repository: str + ) -> 'MappingInfo': + """....""" + base_init_kwargs = ItemInfoBase._get_base_init_kwargs( + item_obj, + schema_compat, + repository + ) + + return MappingInfo( + **base_init_kwargs, + + payloads = make_payloads(item_obj.get('payloads', {})) + ) + + @staticmethod + def load(instance_or_path: 'InstanceOrPath', repository: str) \ + -> 'MappingInfo': + """....""" + return _load_item_info(MappingInfo, instance_or_path, repository) + + # class property + type_name = 'mapping' + + +LoadedType = t.TypeVar('LoadedType', ResourceInfo, MappingInfo) + +def _load_item_info( + info_type: t.Type[LoadedType], + instance_or_path: InstanceOrPath, + repository: str +) -> LoadedType: + """Read, validate and autocomplete a mapping/resource description.""" + instance = json_instances.read_instance(instance_or_path) + + schema_fmt = f'api_{info_type.type_name}_description-{{}}.schema.json' + + schema_compat = json_instances.validate_instance(instance, schema_fmt) + + # We know from successful validation that instance is a dict. + return info_type.make( + t.cast('dict[str, t.Any]', instance), + schema_compat, + repository + ) + + +VersionedType = t.TypeVar('VersionedType', ResourceInfo, MappingInfo) + +@dc.dataclass(frozen=True) +class VersionedItemInfo(t.Generic[VersionedType]): + """Stores data of multiple versions of given resource/mapping.""" + uuid: t.Optional[str] = None + identifier: str = '' + _by_version: Map[VerTuple, VersionedType] = Map() + _initialized: bool = False + + def register(self, item_info: VersionedType) -> 'VersionedInfoSelfType': + """ + Make item info queryable by version. Perform sanity checks for uuid. + """ + identifier = item_info.identifier + if self._initialized: + assert identifier == self.identifier + + if self.uuid is not None: + uuid: t.Optional[str] = self.uuid + if item_info.uuid is not None and self.uuid != item_info.uuid: + raise HaketiloException(_('uuid_mismatch_{identifier}') + .format(identifier=identifier)) + else: + uuid = item_info.uuid + + by_version = self._by_version.set(item_info.version, item_info) + + return VersionedItemInfo( + identifier = identifier, + uuid = uuid, + _by_version = by_version, + _initialized = True + ) + + def unregister(self, version: VerTuple) -> 'VersionedInfoSelfType': + """....""" + try: + by_version = self._by_version.delete(version) + except KeyError: + by_version = self._by_version + + return dc.replace(self, _by_version=by_version) + + def is_empty(self) -> bool: + """....""" + return len(self._by_version) == 0 + + def newest_version(self) -> VerTuple: + """....""" + assert not self.is_empty() + + return max(self._by_version.keys()) + + def get_newest(self) -> VersionedType: + """Find and return info of the newest version of item.""" + newest = self._by_version[self.newest_version()] + assert newest is not None + return newest + + def get_by_ver(self, ver: t.Iterable[int]) -> t.Optional[VersionedType]: + """ + Find and return info of the specified version of the item (or None if + absent). + """ + return self._by_version.get(tuple(ver)) + + def get_all(self) -> t.Iterator[VersionedType]: + """Generate item info for all its versions, from oldest ot newest.""" + for version in sorted(self._by_version.keys()): + yield self._by_version[version] + +# Below we define 1 type used by recursively-typed VersionedItemInfo. +VersionedInfoSelfType = VersionedItemInfo[VersionedType] diff --git a/src/hydrilla/json_instances.py b/src/hydrilla/json_instances.py new file mode 100644 index 0000000..40b213b --- /dev/null +++ b/src/hydrilla/json_instances.py @@ -0,0 +1,207 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Handling JSON objects. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2021, 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +This module contains utilities for reading and validation of JSON instances. +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +import re +import json +import os +import typing as t + +from pathlib import Path, PurePath + +from jsonschema import RefResolver, Draft7Validator # type: ignore + +from .translations import smart_gettext as _ +from .exceptions import HaketiloException +from .versions import parse_version + +here = Path(__file__).resolve().parent + +_strip_comment_re = re.compile(r''' +^ # match from the beginning of each line +( # catch the part before '//' comment + (?: # this group matches either a string or a single out-of-string character + [^"/] | + " + (?: # this group matches any in-a-string character + [^"\\] | # match any normal character + \\[^u] | # match any escaped character like '\f' or '\n' + \\u[a-fA-F0-9]{4} # match an escape + )* + " + )* +) +# expect either end-of-line or a comment: +# * unterminated strings will cause matching to fail +# * bad comment (with '/' instead of '//') will be indicated by second group +# having length 1 instead of 2 or 0 +(//?|$) +''', re.VERBOSE) + +def strip_json_comments(text: str) -> str: + """ + Accept JSON text with optional C++-style ('//') comments and return the text + with comments removed. Consecutive slashes inside strings are handled + properly. A spurious single slash ('/') shall generate an error. Errors in + JSON itself shall be ignored. + """ + stripped_text = [] + for line_num, line in enumerate(text.split('\n'), start=1): + match = _strip_comment_re.match(line) + + if match is None: # unterminated string + # ignore this error, let the json module report it + stripped = line + elif len(match[2]) == 1: + msg_fmt = _('bad_json_comment_line_{line_num}_char_{char_num}') + + raise HaketiloException(msg_fmt.format( + line_num = line_num, + char_num = len(match[1]) + 1 + )) + else: + stripped = match[1] + + stripped_text.append(stripped) + + return '\n'.join(stripped_text) + +_schema_name_re = re.compile(r''' +(?P[^/]*) +- +(?P + (?P[1-9][0-9]*) + (?: # this repeated group matches the remaining version numbers + \. + (?:[1-9][0-9]*|0) + )* +) +\.schema\.json +$ +''', re.VERBOSE) + +schema_paths: dict[str, Path] = {} +for path in (here / 'schemas').rglob('*.schema.json'): + match = _schema_name_re.match(path.name) + assert match is not None + + schema_name_base = match.group('name_base') + schema_ver_list = match.group('ver').split('.') + + for i in range(len(schema_ver_list)): + schema_ver = '.'.join(schema_ver_list[:i+1]) + schema_paths[f'{schema_name_base}-{schema_ver}.schema.json'] = path + +schema_paths.update([(f'https://hydrilla.koszko.org/schemas/{name}', path) + for name, path in schema_paths.items()]) + +schemas: dict[Path, dict[str, t.Any]] = {} + +def _get_schema(schema_name: str) -> dict[str, t.Any]: + """Return loaded JSON of the requested schema. Cache results.""" + path = schema_paths.get(schema_name) + if path is None: + raise HaketiloException(_('unknown_schema_{}').format(schema_name)) + + if path not in schemas: + schemas[path] = json.loads(path.read_text()) + + return schemas[path] + +def validator_for(schema: t.Union[str, dict[str, t.Any]]) -> Draft7Validator: + """ + Prepare a validator for the provided schema. + + Other schemas under '../schemas' can be referenced. + """ + if isinstance(schema, str): + schema = _get_schema(schema) + + resolver = RefResolver( + base_uri=schema['$id'], + referrer=schema, + handlers={'https': _get_schema} + ) + + return Draft7Validator(schema, resolver=resolver) + +def parse_instance(text: str) -> object: + """Parse 'text' as JSON with additional '//' comments support.""" + return json.loads(strip_json_comments(text)) + +InstanceOrPath = t.Union[Path, str, dict[str, t.Any]] + +def read_instance(instance_or_path: InstanceOrPath) -> object: + """....""" + if isinstance(instance_or_path, dict): + return instance_or_path + + with open(instance_or_path, 'rt') as handle: + text = handle.read() + + try: + return parse_instance(text) + except: + raise HaketiloException(_('text_in_{}_not_valid_json')\ + .format(instance_or_path)) + +def get_schema_version(instance: object) -> tuple[int, ...]: + """ + Parse passed object's "$schema" property and return the schema version tuple. + """ + ver_str: t.Optional[str] = None + + if isinstance(instance, dict) and type(instance.get('$schema')) is str: + match = _schema_name_re.search(instance['$schema']) + ver_str = match.group('ver') if match else None + + if ver_str is not None: + return parse_version(ver_str) + else: + raise HaketiloException(_('no_schema_number_in_instance')) + +def get_schema_major_number(instance: object) -> int: + """ + Parse passed object's "$schema" property and return the major number of + schema version. + """ + return get_schema_version(instance)[0] + +def validate_instance(instance: object, schema_name_fmt: str) -> int: + """....""" + major = get_schema_major_number(instance) + schema_name = schema_name_fmt.format(major) + validator = validator_for(schema_name) + + validator.validate(instance) + + return major diff --git a/src/hydrilla/locales/en_US/LC_MESSAGES/messages.po b/src/hydrilla/locales/en_US/LC_MESSAGES/messages.po new file mode 100644 index 0000000..12abee5 --- /dev/null +++ b/src/hydrilla/locales/en_US/LC_MESSAGES/messages.po @@ -0,0 +1,252 @@ +# SPDX-License-Identifier: CC0-1.0 +# +# English (United States) translations for hydrilla. +# Copyright (C) 2021, 2022 Wojtek Kosior +# Available under the terms of Creative Commons Zero v1.0 Universal. +msgid "" +msgstr "" +"Project-Id-Version: hydrilla 2.0\n" +"Report-Msgid-Bugs-To: koszko@koszko.org\n" +"POT-Creation-Date: 2022-06-07 10:23+0200\n" +"PO-Revision-Date: 2022-02-12 00:00+0000\n" +"Last-Translator: Wojtek Kosior \n" +"Language: en_US\n" +"Language-Team: en_US \n" +"Plural-Forms: nplurals=2; plural=(n != 1)\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.8.0\n" + +#: src/hydrilla/builder/build.py:83 src/hydrilla/builder/local_apt.py:116 +#: src/hydrilla/builder/local_apt.py:408 +msgid "couldnt_execute_{}_is_it_installed" +msgstr "Could not execute '{}'. Is the tool installed and reachable via PATH?" + +#: src/hydrilla/builder/build.py:87 src/hydrilla/builder/local_apt.py:120 +#: src/hydrilla/builder/local_apt.py:412 +msgid "command_{}_failed" +msgstr "The following command finished execution with a non-zero exit status: {}" + +#: src/hydrilla/builder/build.py:160 +msgid "unknown_schema_package_source_{}" +msgstr "" +"The provided JSON at '{}' does not use any of the known package source " +"JSON schemas." + +#: src/hydrilla/builder/build.py:196 +msgid "path_contains_double_dot_{}" +msgstr "" +"Attempt to load '{}' which includes a forbidden parent reference ('..') " +"in the path." + +#: src/hydrilla/builder/build.py:203 +msgid "loading_{}_outside_package_dir" +msgstr "Attempt to load '{}' which lies outside package source directory." + +#: src/hydrilla/builder/build.py:207 +msgid "loading_reserved_index_json" +msgstr "Attempt to load 'index.json' which is a reserved filename." + +#: src/hydrilla/builder/build.py:214 +msgid "referenced_file_{}_missing" +msgstr "Referenced file '{}' is missing." + +#: src/hydrilla/builder/build.py:396 +msgid "report_spdx_not_in_copyright_list" +msgstr "" +"Told to generate 'report.spdx' but 'report.spdx' is not listed among " +"copyright files. Refusing to proceed." + +#: src/hydrilla/builder/build.py:473 +msgid "build_package_from_srcdir_to_dstdir" +msgstr "" +"Build Hydrilla package from `scrdir` and write the resulting files under " +"`dstdir`." + +#: src/hydrilla/builder/build.py:475 +msgid "source_directory_to_build_from" +msgstr "Source directory to build from." + +#: src/hydrilla/builder/build.py:477 +msgid "path_instead_of_index_json" +msgstr "" +"Path to file to be processed instead of index.json (if not absolute, " +"resolved relative to srcdir)." + +#: src/hydrilla/builder/build.py:479 +msgid "path_instead_for_piggyback_files" +msgstr "" +"Path to a non-standard directory with foreign packages' archive files to " +"use." + +#: src/hydrilla/builder/build.py:481 +msgid "built_package_files_destination" +msgstr "Destination directory to write built package files to." + +#: src/hydrilla/builder/build.py:483 src/hydrilla/server/serve.py:582 +#: src/hydrilla/server/serve.py:604 src/hydrilla/server/serve.py:647 +#, python-format +msgid "%(prog)s_%(version)s_license" +msgstr "" +"%(prog)s %(version)s\n" +"Copyright (C) 2021,2022 Wojtek Kosior and contributors.\n" +"License GPLv3+: GNU AGPL version 3 or later " +"\n" +"This is free software: you are free to change and redistribute it.\n" +"There is NO WARRANTY, to the extent permitted by law." + +#: src/hydrilla/builder/build.py:484 src/hydrilla/server/serve.py:605 +#: src/hydrilla/server/serve.py:648 +msgid "version_printing" +msgstr "Print version information and exit." + +#: src/hydrilla/builder/common_errors.py:58 +msgid "STDOUT_OUTPUT_heading" +msgstr "## Command's standard output ##" + +#: src/hydrilla/builder/common_errors.py:61 +msgid "STDERR_OUTPUT_heading" +msgstr "## Command's standard error output ##" + +#: src/hydrilla/builder/local_apt.py:145 +msgid "distro_{}_unknown" +msgstr "Attempt to use an unknown software distribution '{}'." + +#: src/hydrilla/builder/local_apt.py:189 +msgid "couldnt_import_{}_is_it_installed" +msgstr "" +"Could not import '{}'. Is the module installed and visible to this Python" +" instance?" + +#: src/hydrilla/builder/local_apt.py:197 +msgid "gpg_couldnt_recv_key_{}" +msgstr "Could not import PGP key '{}'." + +#: src/hydrilla/builder/local_apt.py:311 +msgid "apt_install_output_not_understood" +msgstr "The output of an 'apt-get install' command was not understood." + +#: src/hydrilla/builder/local_apt.py:337 +msgid "apt_download_gave_bad_filename_{}" +msgstr "The 'apt-get download' command produced a file with unexpected name '{}'." + +#: src/hydrilla/builder/piggybacking.py:100 +msgid "loading_{}_outside_piggybacked_dir" +msgstr "" +"Attempt to load '{}' which lies outside piggybacked packages files root " +"directory." + +#: src/hydrilla/server/serve.py:126 +#, python-brace-format +msgid "uuid_mismatch_{identifier}" +msgstr "Two different uuids were specified for item '{identifier}'." + +#: src/hydrilla/server/serve.py:133 +#, python-brace-format +msgid "version_clash_{identifier}_{version}" +msgstr "Version '{version}' specified more than once for item '{identifier}'." + +#: src/hydrilla/server/serve.py:249 src/hydrilla/server/serve.py:261 +msgid "invalid_URL_{}" +msgstr "Invalid URL/pattern: '{}'." + +#: src/hydrilla/server/serve.py:253 +msgid "disallowed_protocol_{}" +msgstr "Disallowed protocol: '{}'." + +#: src/hydrilla/server/serve.py:306 +msgid "malcontent_dir_path_not_dir_{}" +msgstr "Provided 'malcontent_dir' path does not name a directory: {}" + +#: src/hydrilla/server/serve.py:325 +msgid "couldnt_load_item_from_{}" +msgstr "Couldn't load item from {}." + +#: src/hydrilla/server/serve.py:350 +msgid "item_{item}_in_file_{file}" +msgstr "Item {item} incorrectly present under {file}." + +#: src/hydrilla/server/serve.py:356 +msgid "item_version_{ver}_in_file_{file}" +msgstr "Item version {ver} incorrectly present under {file}." + +#: src/hydrilla/server/serve.py:379 +msgid "no_dep_{resource}_{ver}_{dep}" +msgstr "Unknown dependency '{dep}' of resource '{resource}', version '{ver}'." + +#: src/hydrilla/server/serve.py:390 +msgid "no_payload_{mapping}_{ver}_{payload}" +msgstr "Unknown payload '{payload}' of mapping '{mapping}', version '{ver}'." + +#: src/hydrilla/server/serve.py:402 +msgid "no_mapping_{required_by}_{ver}_{required}" +msgstr "Unknown mapping '{required}' required by '{required_by}', version '{ver}'." + +#: src/hydrilla/server/serve.py:429 +msgid "couldnt_register_{mapping}_{ver}_{pattern}" +msgstr "" +"Couldn't register mapping '{mapping}', version '{ver}' (pattern " +"'{pattern}')." + +#: src/hydrilla/server/serve.py:590 +msgid "serve_hydrilla_packages_explain_wsgi_considerations" +msgstr "" +"Serve Hydrilla packages.\n" +"\n" +"This command is meant to be a quick way to run a local or development " +"Hydrilla instance. For better performance, consider deployment using " +"WSGI." + +#: src/hydrilla/server/serve.py:593 +msgid "directory_to_serve_from_overrides_config" +msgstr "" +"Directory to serve files from. Overrides value from the config file (if " +"any)." + +#: src/hydrilla/server/serve.py:595 +msgid "project_url_to_display_overrides_config" +msgstr "" +"Project url to display on generated HTML pages. Overrides value from the " +"config file (if any)." + +#: src/hydrilla/server/serve.py:597 +msgid "tcp_port_to_listen_on_overrides_config" +msgstr "" +"TCP port number to listen on (0-65535). Overrides value from the config " +"file (if any)." + +#: src/hydrilla/server/serve.py:600 +msgid "path_to_config_file_explain_default" +msgstr "" +"Path to Hydrilla server configuration file (optional, by default Hydrilla" +" loads its own config file, which in turn tries to load " +"/etc/hydrilla/config.json)." + +#: src/hydrilla/server/serve.py:637 +msgid "config_option_{}_not_supplied" +msgstr "Missing configuration option '{}'." + +#: src/hydrilla/server/serve.py:641 +msgid "serve_hydrilla_packages_wsgi_help" +msgstr "" +"Serve Hydrilla packages.\n" +"\n" +"This program is a WSGI script that runs Hydrilla repository behind an " +"HTTP server like Apache2 or Nginx. You can configure Hydrilla through the" +" /etc/hydrilla/config.json file." + +#. 'hydrilla' as a title +#: src/hydrilla/server/templates/base.html:99 +#: src/hydrilla/server/templates/base.html:105 +msgid "hydrilla" +msgstr "Hydrilla" + +#: src/hydrilla/server/templates/index.html:29 +msgid "hydrilla_welcome" +msgstr "Welcome to Hydrilla!" + +#: src/hydrilla/util/_util.py:86 +msgid "bad_comment" +msgstr "bad comment" + diff --git a/src/hydrilla/locales/pl_PL/LC_MESSAGES/messages.po b/src/hydrilla/locales/pl_PL/LC_MESSAGES/messages.po new file mode 100644 index 0000000..57cca4d --- /dev/null +++ b/src/hydrilla/locales/pl_PL/LC_MESSAGES/messages.po @@ -0,0 +1,258 @@ +# SPDX-License-Identifier: CC0-1.0 +# +# English (United States) translations for hydrilla. +# Copyright (C) 2021, 2022 Wojtek Kosior +# Available under the terms of Creative Commons Zero v1.0 Universal. +msgid "" +msgstr "" +"Project-Id-Version: hydrilla 2.0\n" +"Report-Msgid-Bugs-To: koszko@koszko.org\n" +"POT-Creation-Date: 2022-06-07 10:23+0200\n" +"PO-Revision-Date: 2022-02-12 00:00+0000\n" +"Last-Translator: Wojtek Kosior \n" +"Language: en_US\n" +"Language-Team: en_US \n" +"Plural-Forms: nplurals=2; plural=(n != 1)\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.8.0\n" + +#: src/hydrilla/builder/build.py:83 src/hydrilla/builder/local_apt.py:116 +#: src/hydrilla/builder/local_apt.py:408 +msgid "couldnt_execute_{}_is_it_installed" +msgstr "Could not execute '{}'. Is the tool installed and reachable via PATH?" + +#: src/hydrilla/builder/build.py:87 src/hydrilla/builder/local_apt.py:120 +#: src/hydrilla/builder/local_apt.py:412 +msgid "command_{}_failed" +msgstr "The following command finished execution with a non-zero exit status: {}" + +#: src/hydrilla/builder/build.py:160 +msgid "unknown_schema_package_source_{}" +msgstr "" +"The provided JSON at '{}' does not use any of the known package source " +"JSON schemas." + +#: src/hydrilla/builder/build.py:196 +msgid "path_contains_double_dot_{}" +msgstr "" +"Attempt to load '{}' which includes a forbidden parent reference ('..') " +"in the path." + +#: src/hydrilla/builder/build.py:203 +msgid "loading_{}_outside_package_dir" +msgstr "Attempt to load '{}' which lies outside package source directory." + +#: src/hydrilla/builder/build.py:207 +msgid "loading_reserved_index_json" +msgstr "Attempt to load 'index.json' which is a reserved filename." + +#: src/hydrilla/builder/build.py:214 +msgid "referenced_file_{}_missing" +msgstr "Referenced file '{}' is missing." + +#: src/hydrilla/builder/build.py:396 +msgid "report_spdx_not_in_copyright_list" +msgstr "" +"Told to generate 'report.spdx' but 'report.spdx' is not listed among " +"copyright files. Refusing to proceed." + +#: src/hydrilla/builder/build.py:473 +msgid "build_package_from_srcdir_to_dstdir" +msgstr "" +"Build Hydrilla package from `scrdir` and write the resulting files under " +"`dstdir`." + +#: src/hydrilla/builder/build.py:475 +msgid "source_directory_to_build_from" +msgstr "Source directory to build from." + +#: src/hydrilla/builder/build.py:477 +msgid "path_instead_of_index_json" +msgstr "" +"Path to file to be processed instead of index.json (if not absolute, " +"resolved relative to srcdir)." + +#: src/hydrilla/builder/build.py:479 +msgid "path_instead_for_piggyback_files" +msgstr "" +"Path to a non-standard directory with foreign packages' archive files to " +"use." + +#: src/hydrilla/builder/build.py:481 +msgid "built_package_files_destination" +msgstr "Destination directory to write built package files to." + +#: src/hydrilla/builder/build.py:483 src/hydrilla/server/serve.py:582 +#: src/hydrilla/server/serve.py:604 src/hydrilla/server/serve.py:647 +#, python-format +msgid "%(prog)s_%(version)s_license" +msgstr "" +"%(prog)s %(version)s\n" +"Copyright (C) 2021,2022 Wojtek Kosior and contributors.\n" +"License GPLv3+: GNU AGPL version 3 or later " +"\n" +"This is free software: you are free to change and redistribute it.\n" +"There is NO WARRANTY, to the extent permitted by law." + +#: src/hydrilla/builder/build.py:484 src/hydrilla/server/serve.py:605 +#: src/hydrilla/server/serve.py:648 +msgid "version_printing" +msgstr "Print version information and exit." + +#: src/hydrilla/builder/common_errors.py:58 +msgid "STDOUT_OUTPUT_heading" +msgstr "## Command's standard output ##" + +#: src/hydrilla/builder/common_errors.py:61 +msgid "STDERR_OUTPUT_heading" +msgstr "## Command's standard error output ##" + +#: src/hydrilla/builder/local_apt.py:145 +msgid "distro_{}_unknown" +msgstr "Attempt to use an unknown software distribution '{}'." + +#: src/hydrilla/builder/local_apt.py:189 +msgid "couldnt_import_{}_is_it_installed" +msgstr "" +"Could not import '{}'. Is the module installed and visible to this Python" +" instance?" + +#: src/hydrilla/builder/local_apt.py:197 +msgid "gpg_couldnt_recv_key_{}" +msgstr "Could not import PGP key '{}'." + +#: src/hydrilla/builder/local_apt.py:311 +msgid "apt_install_output_not_understood" +msgstr "The output of an 'apt-get install' command was not understood." + +#: src/hydrilla/builder/local_apt.py:337 +msgid "apt_download_gave_bad_filename_{}" +msgstr "The 'apt-get download' command produced a file with unexpected name '{}'." + +#: src/hydrilla/builder/piggybacking.py:100 +msgid "loading_{}_outside_piggybacked_dir" +msgstr "" +"Attempt to load '{}' which lies outside piggybacked packages files root " +"directory." + +#: src/hydrilla/server/serve.py:126 +#, python-brace-format +msgid "uuid_mismatch_{identifier}" +msgstr "Two different uuids were specified for item '{identifier}'." + +#: src/hydrilla/server/serve.py:133 +#, python-brace-format +msgid "version_clash_{identifier}_{version}" +msgstr "Version '{version}' specified more than once for item '{identifier}'." + +#: src/hydrilla/server/serve.py:249 src/hydrilla/server/serve.py:261 +msgid "invalid_URL_{}" +msgstr "Invalid URL/pattern: '{}'." + +#: src/hydrilla/server/serve.py:253 +msgid "disallowed_protocol_{}" +msgstr "Disallowed protocol: '{}'." + +#: src/hydrilla/server/serve.py:306 +msgid "malcontent_dir_path_not_dir_{}" +msgstr "Provided 'malcontent_dir' path does not name a directory: {}" + +#: src/hydrilla/server/serve.py:325 +msgid "couldnt_load_item_from_{}" +msgstr "Couldn't load item from {}." + +#: src/hydrilla/server/serve.py:350 +msgid "item_{item}_in_file_{file}" +msgstr "Item {item} incorrectly present under {file}." + +#: src/hydrilla/server/serve.py:356 +msgid "item_version_{ver}_in_file_{file}" +msgstr "Item version {ver} incorrectly present under {file}." + +#: src/hydrilla/server/serve.py:379 +msgid "no_dep_{resource}_{ver}_{dep}" +msgstr "Unknown dependency '{dep}' of resource '{resource}', version '{ver}'." + +#: src/hydrilla/server/serve.py:390 +msgid "no_payload_{mapping}_{ver}_{payload}" +msgstr "Unknown payload '{payload}' of mapping '{mapping}', version '{ver}'." + +#: src/hydrilla/server/serve.py:402 +msgid "no_mapping_{required_by}_{ver}_{required}" +msgstr "Unknown mapping '{required}' required by '{required_by}', version '{ver}'." + +#: src/hydrilla/server/serve.py:429 +msgid "couldnt_register_{mapping}_{ver}_{pattern}" +msgstr "" +"Couldn't register mapping '{mapping}', version '{ver}' (pattern " +"'{pattern}')." + +#: src/hydrilla/server/serve.py:590 +msgid "serve_hydrilla_packages_explain_wsgi_considerations" +msgstr "" +"Serve Hydrilla packages.\n" +"\n" +"This command is meant to be a quick way to run a local or development " +"Hydrilla instance. For better performance, consider deployment using " +"WSGI." + +#: src/hydrilla/server/serve.py:593 +msgid "directory_to_serve_from_overrides_config" +msgstr "" +"Directory to serve files from. Overrides value from the config file (if " +"any)." + +#: src/hydrilla/server/serve.py:595 +msgid "project_url_to_display_overrides_config" +msgstr "" +"Project url to display on generated HTML pages. Overrides value from the " +"config file (if any)." + +#: src/hydrilla/server/serve.py:597 +msgid "tcp_port_to_listen_on_overrides_config" +msgstr "" +"TCP port number to listen on (0-65535). Overrides value from the config " +"file (if any)." + +#: src/hydrilla/server/serve.py:600 +msgid "path_to_config_file_explain_default" +msgstr "" +"Path to Hydrilla server configuration file (optional, by default Hydrilla" +" loads its own config file, which in turn tries to load " +"/etc/hydrilla/config.json)." + +#: src/hydrilla/server/serve.py:602 +msgid "language_to_use_overrides_config" +msgstr "" +"Language to use (also affects served HTML files). Overrides value from " +"the config file (if any)." + +#: src/hydrilla/server/serve.py:637 +msgid "config_option_{}_not_supplied" +msgstr "Missing configuration option '{}'." + +#: src/hydrilla/server/serve.py:641 +msgid "serve_hydrilla_packages_wsgi_help" +msgstr "" +"Serve Hydrilla packages.\n" +"\n" +"This program is a WSGI script that runs Hydrilla repository behind an " +"HTTP server like Apache2 or Nginx. You can configure Hydrilla through the" +" /etc/hydrilla/config.json file." + +#. 'hydrilla' as a title +#: src/hydrilla/server/templates/base.html:99 +#: src/hydrilla/server/templates/base.html:105 +msgid "hydrilla" +msgstr "Hydrilla po polsku" + +#: src/hydrilla/server/templates/index.html:29 +msgid "hydrilla_welcome" +msgstr "Welcome to Hydrilla!" + +#: src/hydrilla/util/_util.py:86 +msgid "bad_comment" +msgstr "bad comment" + diff --git a/src/hydrilla/mitmproxy_launcher/__main__.py b/src/hydrilla/mitmproxy_launcher/__main__.py new file mode 100644 index 0000000..f2ec78a --- /dev/null +++ b/src/hydrilla/mitmproxy_launcher/__main__.py @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior +# +# Available under the terms of Creative Commons Zero v1.0 Universal. + +import sys + +from . import launch + +launch.launch() diff --git a/src/hydrilla/mitmproxy_launcher/launch.py b/src/hydrilla/mitmproxy_launcher/launch.py new file mode 100644 index 0000000..c826598 --- /dev/null +++ b/src/hydrilla/mitmproxy_launcher/launch.py @@ -0,0 +1,77 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Code for starting mitmproxy +# +# This file is part of Hydrilla +# +# Copyright (C) 2021, 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + + +# We want to run mitmproxy with our script as an addon. A simple way would be to +# find something like a 'main' function in mitmproxy, import it and call here. +# Unfortunately, there is currently no guarantee that such function can be +# considered mitmproxy's stable programming API. For this reason we instead +# spawn a new process. + +import sys +import os +import subprocess as sp + +from pathlib import Path + +import click + +from .. import _version +from ..translations import smart_gettext as _ + +@click.command(help=_('cli_help.haketilo')) +@click.option('-p', '--port', default=8080, type=click.IntRange(0, 65535), + help=_('cli_opt.haketilo.port')) +@click.option('-d', '--directory', default='~/.haketilo/', + type=click.Path(file_okay=False), + help=_('cli_opt.haketilo.dir')) +@click.version_option(version=_version.version, prog_name='Haketilo proxy', + message=_('%(prog)s_%(version)s_license'), + help=_('cli_opt.haketilo.version')) +def launch(port: int, directory: str): + """ + .... + """ + directory_path = Path(os.path.expanduser(directory)).resolve() + + directory_path.mkdir(parents=True, exist_ok=True) + + script_path = directory_path / 'addon.py' + + script_path.write_text(''' +from hydrilla.mitmproxy_addon.addon import Haketilo + +addons = [Haketilo()] +''') + + code = sp.call(['mitmdump', + '-p', str(port), + '--set', f'confdir={directory_path / "mitmproxy"}' + '--set', 'upstream_cert=false', + '--set', f'haketilo_dir={directory_path}' + '--scripts', str(script_path)]) + + sys.exit(code) diff --git a/src/hydrilla/pattern_tree.py b/src/hydrilla/pattern_tree.py new file mode 100644 index 0000000..1128a06 --- /dev/null +++ b/src/hydrilla/pattern_tree.py @@ -0,0 +1,339 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Data structure for querying URL patterns. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2021, 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +This module defines data structures for querying data using URL patterns. +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +import sys +import typing as t +import dataclasses as dc + +from immutables import Map + +from .url_patterns import ParsedUrl, parse_url +from .translations import smart_gettext as _ + + +WrapperStoredType = t.TypeVar('WrapperStoredType', bound=t.Hashable) + +@dc.dataclass(frozen=True, unsafe_hash=True) +class StoredTreeItem(t.Generic[WrapperStoredType]): + """ + In the Pattern Tree, each item is stored together with the pattern used to + register it. + """ + pattern: ParsedUrl + item: WrapperStoredType + +# if sys.version_info >= (3, 8): +# CopyableType = t.TypeVar('CopyableType', bound='Copyable') + +# class Copyable(t.Protocol): +# """Certain classes in Pattern Tree depend on this interface.""" +# def copy(self: CopyableType) -> CopyableType: +# """Make a distinct instance with the same properties as this one.""" +# ... +# else: +# Copyable = t.Any + +NodeStoredType = t.TypeVar('NodeStoredType') + +@dc.dataclass(frozen=True) +class PatternTreeNode(t.Generic[NodeStoredType]): + """....""" + children: 'NodeChildrenType' = Map() + literal_match: t.Optional[NodeStoredType] = None + + def is_empty(self) -> bool: + """....""" + return len(self.children) == 0 and self.literal_match is None + + def update_literal_match( + self, + new_match_item: t.Optional[NodeStoredType] + ) -> 'NodeSelfType': + """....""" + return dc.replace(self, literal_match=new_match_item) + + def get_child(self, child_key: str) -> t.Optional['NodeSelfType']: + """....""" + return self.children.get(child_key) + + def remove_child(self, child_key: str) -> 'NodeSelfType': + """....""" + try: + children = self.children.delete(child_key) + except: + children = self.children + + return dc.replace(self, children=children) + + def set_child(self, child_key: str, child: 'NodeSelfType') \ + -> 'NodeSelfType': + """....""" + return dc.replace(self, children=self.children.set(child_key, child)) + +# Below we define 2 types used by recursively-typed PatternTreeNode. +NodeSelfType = PatternTreeNode[NodeStoredType] +NodeChildrenType = Map[str, NodeSelfType] + + +BranchStoredType = t.TypeVar('BranchStoredType') + +ItemUpdater = t.Callable[ + [t.Optional[BranchStoredType]], + t.Optional[BranchStoredType] +] + +@dc.dataclass(frozen=True) +class PatternTreeBranch(t.Generic[BranchStoredType]): + """....""" + root_node: PatternTreeNode[BranchStoredType] = PatternTreeNode() + + def is_empty(self) -> bool: + """....""" + return self.root_node.is_empty() + + # def copy(self) -> 'BranchSelfType': + # """....""" + # return dc.replace(self) + + def update(self, segments: t.Iterable[str], item_updater: ItemUpdater) \ + -> 'BranchSelfType': + """ + ....... + """ + node = self.root_node + nodes_segments = [] + + for segment in segments: + next_node = node.get_child(segment) + + nodes_segments.append((node, segment)) + + node = PatternTreeNode() if next_node is None else next_node + + node = node.update_literal_match(item_updater(node.literal_match)) + + while nodes_segments: + prev_node, segment = nodes_segments.pop() + + if node.is_empty(): + node = prev_node.remove_child(segment) + else: + node = prev_node.set_child(segment, node) + + return dc.replace(self, root_node=node) + + def search(self, segments: t.Sequence[str]) -> t.Iterable[BranchStoredType]: + """ + Yields all matches of this segments sequence against the tree. Results + are produced in order from greatest to lowest pattern specificity. + """ + nodes = [self.root_node] + + for segment in segments: + next_node = nodes[-1].get_child(segment) + if next_node is None: + break + + nodes.append(next_node) + + nsegments = len(segments) + cond_literal = lambda: len(nodes) == nsegments + cond_wildcard = [ + lambda: len(nodes) + 1 == nsegments and segments[-1] != '*', + lambda: len(nodes) + 1 < nsegments, + lambda: len(nodes) + 1 != nsegments or segments[-1] != '***' + ] + + while nodes: + node = nodes.pop() + + wildcard_matches = [node.get_child(wc) for wc in ('*', '**', '***')] + + for match_node, condition in [ + (node, cond_literal), + *zip(wildcard_matches, cond_wildcard) + ]: + if match_node is not None: + if match_node.literal_match is not None: + if condition(): + yield match_node.literal_match + +# Below we define 1 type used by recursively-typed PatternTreeBranch. +BranchSelfType = PatternTreeBranch[BranchStoredType] + + +FilterStoredType = t.TypeVar('FilterStoredType', bound=t.Hashable) +FilterWrappedType = StoredTreeItem[FilterStoredType] + +def filter_by_trailing_slash( + items: t.Iterable[FilterWrappedType], + with_slash: bool +) -> frozenset[FilterWrappedType]: + """....""" + return frozenset(wrapped for wrapped in items + if with_slash == wrapped.pattern.has_trailing_slash) + +TreeStoredType = t.TypeVar('TreeStoredType', bound=t.Hashable) + +StoredSet = frozenset[StoredTreeItem[TreeStoredType]] +PathBranch = PatternTreeBranch[StoredSet] +DomainBranch = PatternTreeBranch[PathBranch] +TreeRoot = Map[tuple[str, int], DomainBranch] + +@dc.dataclass(frozen=True) +class PatternTree(t.Generic[TreeStoredType]): + """ + "Pattern Tree" is how we refer to the data structure used for querying + Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal + is to make it possible to quickly retrieve all known patterns that match + a given URL. + """ + _by_scheme_and_port: TreeRoot = Map() + + def _register( + self, + parsed_pattern: ParsedUrl, + item: TreeStoredType, + register: bool = True + ) -> 'TreeSelfType': + """ + Make an item wrapped in StoredTreeItem object queryable through the + Pattern Tree by the given parsed URL pattern. + """ + wrapped_item = StoredTreeItem(parsed_pattern, item) + + def item_updater(item_set: t.Optional[StoredSet]) \ + -> t.Optional[StoredSet]: + """....""" + if item_set is None: + item_set = frozenset() + + if register: + item_set = item_set.union((wrapped_item,)) + else: + item_set = item_set.difference((wrapped_item,)) + + return None if len(item_set) == 0 else item_set + + def path_branch_updater(path_branch: t.Optional[PathBranch]) \ + -> t.Optional[PathBranch]: + """....""" + if path_branch is None: + path_branch = PatternTreeBranch() + + path_branch = path_branch.update( + parsed_pattern.path_segments, + item_updater + ) + + return None if path_branch.is_empty() else path_branch + + key = (parsed_pattern.scheme, parsed_pattern.port) + domain_tree = self._by_scheme_and_port.get(key, PatternTreeBranch()) + + new_domain_tree = domain_tree.update( + parsed_pattern.domain_labels, + path_branch_updater + ) + + if new_domain_tree.is_empty(): + try: + new_root = self._by_scheme_and_port.delete(key) + except KeyError: + new_root = self._by_scheme_and_port + else: + new_root = self._by_scheme_and_port.set(key, new_domain_tree) + + return dc.replace(self, _by_scheme_and_port=new_root) + + # def _register( + # self, + # url_pattern: str, + # item: TreeStoredType, + # register: bool = True + # ) -> 'TreeSelfType': + # """ + # .... + # """ + # tree = self + + # for parsed_pat in parse_pattern(url_pattern): + # wrapped_item = StoredTreeItem(parsed_pat, item) + # tree = tree._register_with_parsed_pattern( + # parsed_pat, + # wrapped_item, + # register + # ) + + # return tree + + def register(self, parsed_pattern: ParsedUrl, item: TreeStoredType) \ + -> 'TreeSelfType': + """ + Make item queryable through the Pattern Tree by the given URL pattern. + """ + return self._register(parsed_pattern, item) + + def deregister(self, parsed_pattern: ParsedUrl, item: TreeStoredType) \ + -> 'TreeSelfType': + """ + Make item no longer queryable through the Pattern Tree by the given URL + pattern. + """ + return self._register(parsed_pattern, item, register=False) + + def search(self, url: t.Union[ParsedUrl, str]) -> t.Iterable[StoredSet]: + """ + .... + """ + parsed_url = parse_url(url) if isinstance(url, str) else url + + key = (parsed_url.scheme, parsed_url.port) + domain_tree = self._by_scheme_and_port.get(key) + if domain_tree is None: + return + + if parsed_url.has_trailing_slash: + slash_options = [True, False] + else: + slash_options = [False] + + for path_tree in domain_tree.search(parsed_url.domain_labels): + for item_set in path_tree.search(parsed_url.path_segments): + for with_slash in slash_options: + items = filter_by_trailing_slash(item_set, with_slash) + if len(items) > 0: + yield items + +# Below we define 1 type used by recursively-typed PatternTree. +TreeSelfType = PatternTree[TreeStoredType] diff --git a/src/hydrilla/proxy/__init__.py b/src/hydrilla/proxy/__init__.py new file mode 100644 index 0000000..d382ead --- /dev/null +++ b/src/hydrilla/proxy/__init__.py @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior +# +# Available under the terms of Creative Commons Zero v1.0 Universal. diff --git a/src/hydrilla/proxy/addon.py b/src/hydrilla/proxy/addon.py new file mode 100644 index 0000000..7d6487b --- /dev/null +++ b/src/hydrilla/proxy/addon.py @@ -0,0 +1,177 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Haketilo addon for Mitmproxy. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +This module contains the definition of a mitmproxy addon that gets instantiated +from addon script. +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +import os.path +import typing as t +import dataclasses as dc + +from threading import Lock +from pathlib import Path +from contextlib import contextmanager + +from mitmproxy import http, addonmanager, ctx +from mitmproxy.script import concurrent + +from .flow_handlers import make_flow_handler, FlowHandler +from .state import HaketiloState +from ..translations import smart_gettext as _ + +FlowHandlers = dict[int, FlowHandler] + +StateUpdater = t.Callable[[HaketiloState], None] + +HTTPHandlerFun = t.Callable[ + ['HaketiloAddon', http.HTTPFlow], + t.Optional[StateUpdater] +] + +def http_event_handler(handler_fun: HTTPHandlerFun): + """....decorator""" + def wrapped_handler(self: 'HaketiloAddon', flow: http.HTTPFlow): + """....""" + with self.configured_lock: + assert self.configured + + assert self.state is not None + + state_updater = handler_fun(self, flow) + + if state_updater is not None: + state_updater(self.state) + + return wrapped_handler + +@dc.dataclass +class HaketiloAddon: + """ + ....... + """ + configured: bool = False + configured_lock: Lock = dc.field(default_factory=Lock) + + state: t.Optional[HaketiloState] = None + + flow_handlers: FlowHandlers = dc.field(default_factory=dict) + handlers_lock: Lock = dc.field(default_factory=Lock) + + def load(self, loader: addonmanager.Loader) -> None: + """....""" + loader.add_option( + name = 'haketilo_dir', + typespec = str, + default = '~/.haketilo/', + help = "Point to a Haketilo data directory to use", + ) + + def configure(self, updated: set[str]) -> None: + """....""" + if 'haketilo_dir' not in updated: + return + + with self.configured_lock: + if self.configured: + ctx.log.warn(_('haketilo_dir_already_configured')) + return + + haketilo_dir = Path(ctx.options.haketilo_dir) + self.state = HaketiloState(haketilo_dir / 'store') + + def assign_handler(self, flow: http.HTTPFlow, flow_handler: FlowHandler) \ + -> None: + """....""" + with self.handlers_lock: + self.flow_handlers[id(flow)] = flow_handler + + def lookup_handler(self, flow: http.HTTPFlow) -> FlowHandler: + """....""" + with self.handlers_lock: + return self.flow_handlers[id(flow)] + + def forget_handler(self, flow: http.HTTPFlow) -> None: + """....""" + with self.handlers_lock: + self.flow_handlers.pop(id(flow), None) + + @concurrent + @http_event_handler + def requestheaders(self, flow: http.HTTPFlow) -> t.Optional[StateUpdater]: + """ + ..... + """ + assert self.state is not None + + policy = self.state.select_policy(flow.request.url) + + flow_handler = make_flow_handler(flow, policy) + + self.assign_handler(flow, flow_handler) + + return flow_handler.on_requestheaders() + + @concurrent + @http_event_handler + def request(self, flow: http.HTTPFlow) -> t.Optional[StateUpdater]: + """ + .... + """ + return self.lookup_handler(flow).on_request() + + @concurrent + @http_event_handler + def responseheaders(self, flow: http.HTTPFlow) -> t.Optional[StateUpdater]: + """ + ...... + """ + return self.lookup_handler(flow).on_responseheaders() + + @concurrent + @http_event_handler + def response(self, flow: http.HTTPFlow) -> t.Optional[StateUpdater]: + """ + ...... + """ + updater = self.lookup_handler(flow).on_response() + + self.forget_handler(flow) + + return updater + + @http_event_handler + def error(self, flow: http.HTTPFlow) -> None: + """....""" + self.forget_handler(flow) + +addons = [ + HaketiloAddon() +] diff --git a/src/hydrilla/proxy/flow_handlers.py b/src/hydrilla/proxy/flow_handlers.py new file mode 100644 index 0000000..605c7f9 --- /dev/null +++ b/src/hydrilla/proxy/flow_handlers.py @@ -0,0 +1,383 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Logic for modifying mitmproxy's HTTP flows. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +This module's file gets passed to Mitmproxy as addon script and makes it serve +as Haketilo proxy. +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +import re +import typing as t +import dataclasses as dc + +import bs4 # type: ignore + +from mitmproxy import http +from mitmproxy.net.http import Headers +from mitmproxy.script import concurrent + +from .state import HaketiloState +from . import policies + +StateUpdater = t.Callable[[HaketiloState], None] + +@dc.dataclass(frozen=True) +class FlowHandler: + """....""" + flow: http.HTTPFlow + policy: policies.Policy + + stream_request: bool = False + stream_response: bool = False + + def on_requestheaders(self) -> t.Optional[StateUpdater]: + """....""" + if self.stream_request: + self.flow.request.stream = True + + return None + + def on_request(self) -> t.Optional[StateUpdater]: + """....""" + return None + + def on_responseheaders(self) -> t.Optional[StateUpdater]: + """....""" + assert self.flow.response is not None + + if self.stream_response: + self.flow.response.stream = True + + return None + + def on_response(self) -> t.Optional[StateUpdater]: + """....""" + return None + +@dc.dataclass(frozen=True) +class FlowHandlerAllowScripts(FlowHandler): + """....""" + policy: policies.AllowPolicy + + stream_request: bool = True + stream_response: bool = True + +csp_header_names_and_dispositions = ( + ('content-security-policy', 'enforce'), + ('content-security-policy-report-only', 'report'), + ('x-content-security-policy', 'enforce'), + ('x-content-security-policy', 'report'), + ('x-webkit-csp', 'enforce'), + ('x-webkit-csp', 'report') +) + +csp_enforce_header_names_set = { + name for name, disposition in csp_header_names_and_dispositions + if disposition == 'enforce' +} + +@dc.dataclass +class ContentSecurityPolicy: + directives: dict[str, list[str]] + header_name: str + disposition: str + + @staticmethod + def deserialize( + serialized: str, + header_name: str, + disposition: str = 'enforce' + ) -> 'ContentSecurityPolicy': + """....""" + # For more info, see: + # https://www.w3.org/TR/CSP3/#parse-serialized-policy + directives = {} + + for serialized_directive in serialized.split(';'): + if not serialized_directive.isascii(): + continue + + serialized_directive = serialized_directive.strip() + if len(serialized_directive) == 0: + continue + + tokens = serialized_directive.split() + directive_name = tokens.pop(0).lower() + directive_value = tokens + + # Specs mention giving warnings for duplicate directive names but + # from our proxy's perspective this is not important right now. + if directive_name in directives: + continue + + directives[directive_name] = directive_value + + return ContentSecurityPolicy(directives, header_name, disposition) + + def serialize(self) -> str: + """....""" + serialized_directives = [] + for name, value_list in self.directives.items(): + serialized_directives.append(f'{name} {" ".join(value_list)}') + + return ';'.join(serialized_directives) + +def extract_csp(headers: Headers) -> tuple[ContentSecurityPolicy, ...]: + """....""" + csp_policies = [] + + for header_name, disposition in csp_header_names_and_dispositions: + for serialized_list in headers.get(header_name, ''): + for serialized in serialized_list.split(','): + policy = ContentSecurityPolicy.deserialize( + serialized, + header_name, + disposition + ) + + if policy.directives != {}: + csp_policies.append(policy) + + return tuple(csp_policies) + +csp_script_directive_names = ( + 'script-src', + 'script-src-elem', + 'script-src-attr' +) + +@dc.dataclass(frozen=True) +class FlowHandlerBlockScripts(FlowHandler): + policy: policies.BlockPolicy + + stream_request: bool = True + stream_response: bool = True + + def on_responseheaders(self) -> t.Optional[StateUpdater]: + """....""" + super().on_responseheaders() + + assert self.flow.response is not None + + csp_policies = extract_csp(self.flow.response.headers) + + for header_name, _ in csp_header_names_and_dispositions: + del self.flow.response.headers[header_name] + + for policy in csp_policies: + if policy.disposition != 'enforce': + continue + + policy.directives.pop('report-to') + policy.directives.pop('report-uri') + + self.flow.response.headers.add( + policy.header_name, + policy.serialize() + ) + + extra_csp = ';'.join(( + "script-src 'none'", + "script-src-elem 'none'", + "script-src-attr 'none'" + )) + + self.flow.response.headers.add('Content-Security-Policy', extra_csp) + + return None + +# For details of 'Content-Type' header's structure, see: +# https://datatracker.ietf.org/doc/html/rfc7231#section-3.1.1.1 +content_type_reg = re.compile(r''' +^ +(?P[\w-]+/[\w-]+) +\s* +(?: + ; + (?:[^;]*;)* # match possible parameter other than "charset" +) +\s* +charset= # no whitespace allowed in parameter as per RFC +(?P + [\w-]+ + | + "[\w-]+" # quotes are optional per RFC +) +(?:;[^;]+)* # match possible parameter other than "charset" +$ # forbid possible dangling characters after closing '"' +''', re.VERBOSE | re.IGNORECASE) + +def deduce_content_type(headers: Headers) \ + -> tuple[t.Optional[str], t.Optional[str]]: + """....""" + content_type = headers.get('content-type') + if content_type is None: + return (None, None) + + match = content_type_reg.match(content_type) + if match is None: + return (None, None) + + mime, encoding = match.group('mime'), match.group('encoding') + + if encoding is not None: + encoding = encoding.lower() + + return mime, encoding + +UTF8_BOM = b'\xEF\xBB\xBF' +BOMs = ( + (UTF8_BOM, 'utf-8'), + (b'\xFE\xFF', 'utf-16be'), + (b'\xFF\xFE', 'utf-16le') +) + +def block_attr(element: bs4.PageElement, atrr_name: str) -> None: + """....""" + # TODO: implement + pass + +@dc.dataclass(frozen=True) +class FlowHandlerInjectPayload(FlowHandler): + """....""" + policy: policies.PayloadPolicy + + stream_request: bool = True + + def __post_init__(self) -> None: + """....""" + script_src = f"script-src {self.policy.assets_base_url()}" + if self.policy.is_eval_allowed(): + script_src = f"{script_src} 'unsafe-eval'" + + self.new_csp = '; '.join(( + script_src, + "script-src-elem 'none'", + "script-src-attr 'none'" + )) + + def on_responseheaders(self) -> t.Optional[StateUpdater]: + """....""" + super().on_responseheaders() + + assert self.flow.response is not None + + for header_name, _ in csp_header_names_and_dispositions: + del self.flow.response.headers[header_name] + + self.flow.response.headers.add('Content-Security-Policy', self.new_csp) + + return None + + def on_response(self) -> t.Optional[StateUpdater]: + """....""" + super().on_response() + + assert self.flow.response is not None + + if self.flow.response.content is None: + return None + + mime, encoding = deduce_content_type(self.flow.response.headers) + if mime is None or 'html' not in mime: + return None + + # A UTF BOM overrides encoding specified by the header. + for bom, encoding_name in BOMs: + if self.flow.response.content.startswith(bom): + encoding = encoding_name + + soup = bs4.BeautifulSoup( + markup = self.flow.response.content, + from_encoding = encoding, + features = 'html5lib' + ) + + # Inject scripts. + script_parent = soup.find('body') or soup.find('html') + if script_parent is None: + return None + + for url in self.policy.script_urls(): + script_parent.append(bs4.Tag(name='script', attrs={'src': url})) + + # Remove Content Security Policy that could possibly block injected + # scripts. + for meta in soup.select('head meta[http-equiv]'): + header_name = meta.attrs.get('http-equiv', '').lower().strip() + if header_name in csp_enforce_header_names_set: + block_attr(meta, 'http-equiv') + block_attr(meta, 'content') + + # Appending a three-byte Byte Order Mark (BOM) will force the browser to + # decode this as UTF-8 regardless of the 'Content-Type' header. See: + # https://www.w3.org/International/tests/repository/html5/the-input-byte-stream/results-basics#precedence + self.flow.response.content = UTF8_BOM + soup.encode() + + return None + +@dc.dataclass(frozen=True) +class FlowHandlerMetaResource(FlowHandler): + """....""" + policy: policies.MetaResourcePolicy + + def on_request(self) -> t.Optional[StateUpdater]: + """....""" + super().on_request() + # TODO: implement + #self.flow.response = .... + + return None + +def make_flow_handler(flow: http.HTTPFlow, policy: policies.Policy) \ + -> FlowHandler: + """....""" + if isinstance(policy, policies.BlockPolicy): + return FlowHandlerBlockScripts(flow, policy) + + if isinstance(policy, policies.AllowPolicy): + return FlowHandlerAllowScripts(flow, policy) + + if isinstance(policy, policies.PayloadPolicy): + return FlowHandlerInjectPayload(flow, policy) + + assert isinstance(policy, policies.MetaResourcePolicy) + # def response_creator(request: http.HTTPRequest) -> http.HTTPResponse: + # """....""" + # replacement_details = make_replacement_resource( + # policy.replacement, + # request.path + # ) + + # return http.HTTPResponse.make( + # replacement_details.status_code, + # replacement_details.content, + # replacement_details.content_type + # ) + return FlowHandlerMetaResource(flow, policy) diff --git a/src/hydrilla/proxy/policies.py b/src/hydrilla/proxy/policies.py new file mode 100644 index 0000000..5e9451b --- /dev/null +++ b/src/hydrilla/proxy/policies.py @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Various policies for altering HTTP requests. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +import dataclasses as dc +import typing as t + +from abc import ABC + +class Policy(ABC): + pass + +class PayloadPolicy(Policy): + """....""" + def assets_base_url(self) -> str: + """....""" + return 'https://example.com/static/' + + def script_urls(self) -> t.Sequence[str]: + """....""" + # TODO: implement + return ('https://example.com/static/somescript.js',) + + def is_eval_allowed(self) -> bool: + """....""" + # TODO: implement + return True + +class MetaResourcePolicy(Policy): + pass + +class AllowPolicy(Policy): + pass + +@dc.dataclass +class RuleAllowPolicy(AllowPolicy): + pattern: str + +class FallbackAllowPolicy(AllowPolicy): + pass + +class BlockPolicy(Policy): + pass + +@dc.dataclass +class RuleBlockPolicy(BlockPolicy): + pattern: str + +class FallbackBlockPolicy(BlockPolicy): + pass + +@dc.dataclass +class ErrorBlockPolicy(BlockPolicy): + error: Exception diff --git a/src/hydrilla/proxy/state.py b/src/hydrilla/proxy/state.py new file mode 100644 index 0000000..fc01536 --- /dev/null +++ b/src/hydrilla/proxy/state.py @@ -0,0 +1,73 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Haketilo proxy data and configuration. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +This module contains logic for keeping track of all settings, rules, mappings +and resources. +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +import typing as t +import dataclasses as dc + +from threading import Lock +from pathlib import Path + +from ..pattern_tree import PatternTree +from .store import HaketiloStore +from . import policies + +def make_pattern_tree_with_builtin_policies() -> PatternTree[policies.Policy]: + """....""" + # TODO: implement + return PatternTree() + +tree_field = dc.field(default_factory=make_pattern_tree_with_builtin_policies) + +@dc.dataclass +class HaketiloState(HaketiloStore): + """....""" + pattern_tree: PatternTree[policies.Policy] = tree_field + default_allow: bool = False + + state_lock: Lock = dc.field(default_factory=Lock) + + def select_policy(self, url: str, allow_disabled=False) -> policies.Policy: + """....""" + with self.state_lock: + pattern_tree = self.pattern_tree + + try: + for policy_set in pattern_tree.search(url): + # if policy.enabled or allow_disabled: + # return policy + pass + + return policies.FallbackBlockPolicy() + except Exception as e: + return policies.ErrorBlockPolicy(e) diff --git a/src/hydrilla/proxy/store.py b/src/hydrilla/proxy/store.py new file mode 100644 index 0000000..72852d8 --- /dev/null +++ b/src/hydrilla/proxy/store.py @@ -0,0 +1,40 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Haketilo proxy on-disk data storage. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +"""This module facilitates storing and modifying Haketilo proxy data on-disk.""" + +# Enable using with Python 3.7. +from __future__ import annotations + +import dataclasses as dc + +from pathlib import Path + +@dc.dataclass +class HaketiloStore: + """....""" + store_dir: Path + # TODO: implement diff --git a/src/hydrilla/py.typed b/src/hydrilla/py.typed new file mode 100644 index 0000000..f41d511 --- /dev/null +++ b/src/hydrilla/py.typed @@ -0,0 +1,5 @@ +SPDX-License-Identifier: CC0-1.0 + +Copyright (C) 2022 Wojtek Kosior + +Available under the terms of Creative Commons Zero v1.0 Universal. diff --git a/src/hydrilla/schemas/1.x b/src/hydrilla/schemas/1.x new file mode 160000 index 0000000..09634f3 --- /dev/null +++ b/src/hydrilla/schemas/1.x @@ -0,0 +1 @@ +Subproject commit 09634f3446866f712a022327683b1149d8f46bf0 diff --git a/src/hydrilla/schemas/2.x b/src/hydrilla/schemas/2.x new file mode 160000 index 0000000..7206db4 --- /dev/null +++ b/src/hydrilla/schemas/2.x @@ -0,0 +1 @@ +Subproject commit 7206db45f277c10c34d1b7ed9bd35343ac742d30 diff --git a/src/hydrilla/server/config.json b/src/hydrilla/server/config.json index bde341c..e307548 100644 --- a/src/hydrilla/server/config.json +++ b/src/hydrilla/server/config.json @@ -28,9 +28,6 @@ // What port to listen on (if not being run through WSGI). "port": 10112, - // What localization to use for console messages and served HTML files. - "language": "en_US", - // Whether to exit upon emitting a warning. "werror": false } diff --git a/src/hydrilla/server/config.py b/src/hydrilla/server/config.py index 1edd070..c7c5657 100644 --- a/src/hydrilla/server/config.py +++ b/src/hydrilla/server/config.py @@ -31,9 +31,9 @@ import json from pathlib import Path -import jsonschema +import jsonschema # type: ignore -from .. import util +from .. import json_instances config_schema = { '$schema': 'http://json-schema.org/draft-07/schema#', @@ -92,7 +92,7 @@ def load(config_paths: list[Path]=[here / 'config.json'], continue raise e from None - new_config = json.loads(util.strip_json_comments(json_text)) + new_config = json_instances.strip_json_comments(json_text) jsonschema.validate(new_config, config_schema) config.update(new_config) diff --git a/src/hydrilla/server/locales/en_US/LC_MESSAGES/hydrilla-messages.po b/src/hydrilla/server/locales/en_US/LC_MESSAGES/hydrilla-messages.po deleted file mode 100644 index 1998f89..0000000 --- a/src/hydrilla/server/locales/en_US/LC_MESSAGES/hydrilla-messages.po +++ /dev/null @@ -1,151 +0,0 @@ -# SPDX-License-Identifier: CC0-1.0 -# -# English (United States) translations for hydrilla. -# Copyright (C) 2021, 2022 Wojtek Kosior -# Available under the terms of Creative Commons Zero v1.0 Universal. -msgid "" -msgstr "" -"Project-Id-Version: hydrilla.builder 0.1\n" -"Report-Msgid-Bugs-To: koszko@koszko.org\n" -"POT-Creation-Date: 2022-05-31 18:21+0200\n" -"PO-Revision-Date: 2022-02-12 00:00+0000\n" -"Last-Translator: Wojtek Kosior \n" -"Language: en_US\n" -"Language-Team: en_US \n" -"Plural-Forms: nplurals=2; plural=(n != 1)\n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel 2.8.0\n" - -#: src/hydrilla/server/serve.py:127 -#, python-brace-format -msgid "uuid_mismatch_{identifier}" -msgstr "Two different uuids were specified for item '{identifier}'." - -#: src/hydrilla/server/serve.py:134 -#, python-brace-format -msgid "version_clash_{identifier}_{version}" -msgstr "Version '{version}' specified more than once for item '{identifier}'." - -#: src/hydrilla/server/serve.py:250 src/hydrilla/server/serve.py:262 -msgid "invalid_URL_{}" -msgstr "Invalid URL/pattern: '{}'." - -#: src/hydrilla/server/serve.py:254 -msgid "disallowed_protocol_{}" -msgstr "Disallowed protocol: '{}'." - -#: src/hydrilla/server/serve.py:307 -msgid "malcontent_dir_path_not_dir_{}" -msgstr "Provided 'malcontent_dir' path does not name a directory: {}" - -#: src/hydrilla/server/serve.py:326 -msgid "couldnt_load_item_from_{}" -msgstr "Couldn't load item from {}." - -#: src/hydrilla/server/serve.py:351 -msgid "item_{item}_in_file_{file}" -msgstr "Item {item} incorrectly present under {file}." - -#: src/hydrilla/server/serve.py:357 -msgid "item_version_{ver}_in_file_{file}" -msgstr "Item version {ver} incorrectly present under {file}." - -#: src/hydrilla/server/serve.py:380 -msgid "no_dep_{resource}_{ver}_{dep}" -msgstr "Unknown dependency '{dep}' of resource '{resource}', version '{ver}'." - -#: src/hydrilla/server/serve.py:391 -msgid "no_payload_{mapping}_{ver}_{payload}" -msgstr "Unknown payload '{payload}' of mapping '{mapping}', version '{ver}'." - -#: src/hydrilla/server/serve.py:403 -msgid "no_mapping_{required_by}_{ver}_{required}" -msgstr "Unknown mapping '{required}' required by '{required_by}', version '{ver}'." - -#: src/hydrilla/server/serve.py:430 -msgid "couldnt_register_{mapping}_{ver}_{pattern}" -msgstr "" -"Couldn't register mapping '{mapping}', version '{ver}' (pattern " -"'{pattern}')." - -#: src/hydrilla/server/serve.py:583 src/hydrilla/server/serve.py:606 -#: src/hydrilla/server/serve.py:650 -#, python-format -msgid "%(prog)s_%(version)s_license" -msgstr "" -"%(prog)s %(version)s\n" -"Copyright (C) 2021,2022 Wojtek Kosior and contributors.\n" -"License GPLv3+: GNU AGPL version 3 or later " -"\n" -"This is free software: you are free to change and redistribute it.\n" -"There is NO WARRANTY, to the extent permitted by law." - -#: src/hydrilla/server/serve.py:592 -msgid "serve_hydrilla_packages_explain_wsgi_considerations" -msgstr "" -"Serve Hydrilla packages.\n" -"\n" -"This command is meant to be a quick way to run a local or development " -"Hydrilla instance. For better performance, consider deployment using " -"WSGI." - -#: src/hydrilla/server/serve.py:595 -msgid "directory_to_serve_from_overrides_config" -msgstr "" -"Directory to serve files from. Overrides value from the config file (if " -"any)." - -#: src/hydrilla/server/serve.py:597 -msgid "project_url_to_display_overrides_config" -msgstr "" -"Project url to display on generated HTML pages. Overrides value from the " -"config file (if any)." - -#: src/hydrilla/server/serve.py:599 -msgid "tcp_port_to_listen_on_overrides_config" -msgstr "" -"TCP port number to listen on (0-65535). Overrides value from the config " -"file (if any)." - -#: src/hydrilla/server/serve.py:602 -msgid "path_to_config_file_explain_default" -msgstr "" -"Path to Hydrilla server configuration file (optional, by default Hydrilla" -" loads its own config file, which in turn tries to load " -"/etc/hydrilla/config.json)." - -#: src/hydrilla/server/serve.py:604 -msgid "language_to_use_overrides_config" -msgstr "" -"Language to use (also affects served HTML files). Overrides value from " -"the config file (if any)." - -#: src/hydrilla/server/serve.py:607 src/hydrilla/server/serve.py:651 -msgid "version_printing" -msgstr "Print version information and exit." - -#: src/hydrilla/server/serve.py:640 -msgid "config_option_{}_not_supplied" -msgstr "Missing configuration option '{}'." - -#: src/hydrilla/server/serve.py:644 -msgid "serve_hydrilla_packages_wsgi_help" -msgstr "" -"Serve Hydrilla packages.\n" -"\n" -"This program is a WSGI script that runs Hydrilla repository behind an " -"HTTP server like Apache2 or Nginx. You can configure Hydrilla through the" -" /etc/hydrilla/config.json file." - -#. 'hydrilla' as a title -#: src/hydrilla/server/templates/base.html:99 -#: src/hydrilla/server/templates/base.html:105 -msgid "hydrilla" -msgstr "Hydrilla" - -#: src/hydrilla/server/templates/index.html:29 -msgid "hydrilla_welcome" -msgstr "Welcome to Hydrilla!" - diff --git a/src/hydrilla/server/serve.py b/src/hydrilla/server/serve.py index 779f3d2..8f0d557 100644 --- a/src/hydrilla/server/serve.py +++ b/src/hydrilla/server/serve.py @@ -36,16 +36,18 @@ import logging from pathlib import Path from hashlib import sha256 from abc import ABC, abstractmethod -from typing import Optional, Union, Iterable +from typing import Optional, Union, Iterable, TypeVar, Generic import click import flask from werkzeug import Response -from .. import util +from .. import _version, versions, json_instances +from ..item_infos import ResourceInfo, MappingInfo, VersionedItemInfo +from ..translations import smart_gettext as _, translation as make_translation +#from ..url_patterns import PatternTree from . import config -from . import _version here = Path(__file__).resolve().parent @@ -54,243 +56,20 @@ generated_by = { 'version': _version.version } -class ItemInfo(ABC): - """Shortened data of a resource/mapping.""" - def __init__(self, item_obj: dict, major_schema_version: int): - """Initialize ItemInfo using item definition read from JSON.""" - self.version = util.normalize_version(item_obj['version']) - self.identifier = item_obj['identifier'] - self.uuid = item_obj.get('uuid') - self.long_name = item_obj['long_name'] - - self.required_mappings = [] - if major_schema_version >= 2: - self.required_mappings = [map_ref['identifier'] for map_ref in - item_obj.get('required_mappings', [])] - - def path(self) -> str: - """ - Get a relative path to this item's JSON definition with respect to - directory containing items of this type. - """ - return f'{self.identifier}/{util.version_string(self.version)}' - -class ResourceInfo(ItemInfo): - """Shortened data of a resource.""" - def __init__(self, resource_obj: dict, major_schema_version: int): - """Initialize ResourceInfo using resource definition read from JSON.""" - super().__init__(resource_obj, major_schema_version) - - dependencies = resource_obj.get('dependencies', []) - self.dependencies = [res_ref['identifier'] for res_ref in dependencies] - -class MappingInfo(ItemInfo): - """Shortened data of a mapping.""" - def __init__(self, mapping_obj: dict, major_schema_version: int): - """Initialize MappingInfo using mapping definition read from JSON.""" - super().__init__(mapping_obj, major_schema_version) - - self.payloads = {} - for pattern, res_ref in mapping_obj.get('payloads', {}).items(): - self.payloads[pattern] = res_ref['identifier'] - - def as_query_result(self) -> str: - """ - Produce a json.dump()-able object describing this mapping as one of a - collection of query results. - """ - return { - 'version': self.version, - 'identifier': self.identifier, - 'long_name': self.long_name - } - -class VersionedItemInfo: - """Stores data of multiple versions of given resource/mapping.""" - def __init__(self): - self.uuid = None - self.identifier = None - self.by_version = {} - self.known_versions = [] - - def register(self, item_info: ItemInfo) -> None: - """ - Make item info queryable by version. Perform sanity checks for uuid. - """ - if self.identifier is None: - self.identifier = item_info.identifier - - if self.uuid is None: - self.uuid = item_info.uuid - - if self.uuid is not None and self.uuid != item_info.uuid: - raise ValueError(f_('uuid_mismatch_{identifier}') - .format(identifier=self.identifier)) - - ver = item_info.version - ver_str = util.version_string(ver) - - if ver_str in self.by_version: - raise ValueError(f_('version_clash_{identifier}_{version}') - .format(identifier=self.identifier, - version=ver_str)) - - self.by_version[ver_str] = item_info - self.known_versions.append(ver) - - def get_by_ver(self, ver: Optional[list[int]]=None) -> Optional[ItemInfo]: - """ - Find and return info of the newest version of item. - - If ver is specified, instead find and return info of that version of the - item (or None if absent). - """ - ver = util.version_string(ver or self.known_versions[-1]) - - return self.by_version.get(ver) - - def get_all(self) -> list[ItemInfo]: - """ - Return a list of item info for all its versions, from oldest ot newest. - """ - return [self.by_version[util.version_string(ver)] - for ver in self.known_versions] - -class PatternTreeNode: - """ - "Pattern Tree" is how we refer to the data structure used for querying - Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal - is to make it possible for given URL to quickly retrieve all known patterns - that match it. - """ - def __init__(self): - self.wildcard_matches = [None, None, None] - self.literal_match = None - self.children = {} - - def search(self, segments): - """ - Yields all matches of this segments sequence against the tree that - starts at this node. Results are produces in order from greatest to - lowest pattern specificity. - """ - nodes = [self] - - for segment in segments: - next_node = nodes[-1].children.get(segment) - if next_node is None: - break - - nodes.append(next_node) - - nsegments = len(segments) - cond_literal = lambda: len(nodes) == nsegments - cond_wildcard = [ - lambda: len(nodes) + 1 == nsegments and segments[-1] != '*', - lambda: len(nodes) + 1 < nsegments, - lambda: len(nodes) + 1 != nsegments or segments[-1] != '***' - ] - - while nodes: - node = nodes.pop() - - for item, condition in [(node.literal_match, cond_literal), - *zip(node.wildcard_matches, cond_wildcard)]: - if item is not None and condition(): - yield item - - def add(self, segments, item_instantiator): - """ - Make item queryable through (this branch of) the Pattern Tree. If there - was not yet any item associated with the tree path designated by - segments, create a new one using item_instantiator() function. Return - all items matching this path (both the ones that existed and the ones - just created). - """ - node = self - segment = None - - for segment in segments: - wildcards = node.wildcard_matches - - child = node.children.get(segment) or PatternTreeNode() - node.children[segment] = child - node = child - - if node.literal_match is None: - node.literal_match = item_instantiator() - - if segment not in ('*', '**', '***'): - return [node.literal_match] - - if wildcards[len(segment) - 1] is None: - wildcards[len(segment) - 1] = item_instantiator() - - return [node.literal_match, wildcards[len(segment) - 1]] - -proto_regex = re.compile(r'^(?P\w+)://(?P.*)$') -user_re = r'[^/?#@]+@' # r'(?P[^/?#@]+)@' # discarded for now -query_re = r'\??[^#]*' # r'\??(?P[^#]*)' # discarded for now -domain_re = r'(?P[^/?#]+)' -path_re = r'(?P[^?#]*)' -http_regex = re.compile(f'{domain_re}{path_re}{query_re}.*') -ftp_regex = re.compile(f'(?:{user_re})?{domain_re}{path_re}.*') - -class UrlError(ValueError): - """Used to report a URL or URL pattern that is invalid or unsupported.""" - pass - -class DeconstructedUrl: - """Represents a deconstructed URL or URL pattern""" - def __init__(self, url): - self.url = url - - match = proto_regex.match(url) - if not match: - raise UrlError(f_('invalid_URL_{}').format(url)) - - self.proto = match.group('proto') - if self.proto not in ('http', 'https', 'ftp'): - raise UrlError(f_('disallowed_protocol_{}').format(proto)) - - if self.proto == 'ftp': - match = ftp_regex.match(match.group('rest')) - elif self.proto in ('http', 'https'): - match = http_regex.match(match.group('rest')) - - if not match: - raise UrlError(f_('invalid_URL_{}').format(url)) - - self.domain = match.group('domain').split('.') - self.domain.reverse() - self.path = [*filter(None, match.group('path').split('/'))] - -class PatternMapping: - """ - A mapping info, together with one of its patterns, as stored in Pattern - Tree. - """ - def __init__(self, pattern: str, mapping_info: MappingInfo): - self.pattern = pattern - self.mapping_info = mapping_info - - def register(self, pattern_tree: dict): - """ - Make self queryable through the Pattern Tree passed in the argument. - """ - deco = DeconstructedUrl(self.pattern) - - domain_tree = pattern_tree.get(deco.proto) or PatternTreeNode() - pattern_tree[deco.proto] = domain_tree - - for path_tree in domain_tree.add(deco.domain, PatternTreeNode): - for match_list in path_tree.add(deco.path, list): - match_list.append(self) + # def as_query_result(self) -> dict[str, Union[str, list[int]]]: + # """ + # Produce a json.dump()-able object describing this mapping as one of a + # collection of query results. + # """ + # return { + # 'version': self.version, + # 'identifier': self.identifier, + # 'long_name': self.long_name + # } class Malcontent: """ - Instance of this class represents a directory with files that can be loaded - and served by Hydrilla. + Represent a directory with files that can be loaded and served by Hydrilla. """ def __init__(self, malcontent_dir_path: Path): """ @@ -298,13 +77,15 @@ class Malcontent: malcontent_dir_path for serveable site-modifying packages and loads them into its data structures. """ - self.infos = {'resource': {}, 'mapping': {}} - self.pattern_tree = {} + self.resource_infos: dict[str, VersionedItemInfo[ResourceInfo]] = {} + self.mapping_infos: dict[str, VersionedItemInfo[MappingInfo]] = {} + + self.pattern_tree: PatternTree[MappingInfo] = PatternTree() self.malcontent_dir_path = malcontent_dir_path if not self.malcontent_dir_path.is_dir(): - raise ValueError(f_('malcontent_dir_path_not_dir_{}') + raise ValueError(_('malcontent_dir_path_not_dir_{}') .format(malcontent_dir_path)) for item_type in ('mapping', 'resource'): @@ -323,18 +104,27 @@ class Malcontent: if flask.current_app._hydrilla_werror: raise e from None - msg = f_('couldnt_load_item_from_{}').format(ver_file) + msg = _('couldnt_load_item_from_{}').format(ver_file) logging.error(msg, exc_info=True) self._report_missing() self._finalize() + @staticmethod + def _register_info(infos: dict[str, VersionedItemInfo[VersionedType]], + identifier: str, item_info: VersionedType) -> None: + """ + ........... + """ + infos.setdefault(identifier, VersionedItemInfo())\ + .register(item_info) + def _load_item(self, item_type: str, ver_file: Path) -> None: """ Reads, validates and autocompletes serveable mapping/resource definition, then registers information from it in data structures. """ - version = util.parse_version(ver_file.name) + version = versions.parse_version(ver_file.name) identifier = ver_file.parent.name item_json, major = util.load_instance_from_file(ver_file) @@ -342,32 +132,35 @@ class Malcontent: util.validator_for(f'api_{item_type}_description-{major}.schema.json')\ .validate(item_json) - if item_type == 'resource': - item_info = ResourceInfo(item_json, major) - else: - item_info = MappingInfo(item_json, major) + # Assertion needed for mypy. If validation passed, this should not fail. + assert major is not None + + item_info: ItemInfo = ResourceInfo(item_json, major) \ + if item_type == 'resource' else MappingInfo(item_json, major) if item_info.identifier != identifier: - msg = f_('item_{item}_in_file_{file}')\ + msg = _('item_{item}_in_file_{file}')\ .format({'item': item_info.identifier, 'file': ver_file}) raise ValueError(msg) if item_info.version != version: ver_str = util.version_string(item_info.version) - msg = f_('item_version_{ver}_in_file_{file}')\ + msg = _('item_version_{ver}_in_file_{file}')\ .format({'ver': ver_str, 'file': ver_file}) raise ValueError(msg) - versioned_info = self.infos[item_type].get(identifier) - if versioned_info is None: - versioned_info = VersionedItemInfo() - self.infos[item_type][identifier] = versioned_info + if isinstance(item_info, ResourceInfo): + self._register_info(self.resource_infos, identifier, item_info) + elif isinstance(item_info, MappingInfo): + self._register_info(self.mapping_infos, identifier, item_info) - versioned_info.register(item_info) - - def _all_of_type(self, item_type: str) -> Iterable[ItemInfo]: - """Iterator over all registered versions of all mappings/resources.""" - for versioned_info in self.infos[item_type].values(): + @staticmethod + def _all_infos(infos: dict[str, VersionedItemInfo[VersionedType]]) \ + -> Iterable[VersionedType]: + """ + ........... + """ + for versioned_info in infos.values(): for item_info in versioned_info.by_version.values(): yield item_info @@ -377,38 +170,38 @@ class Malcontent: were not loaded. """ def report_missing_dependency(info: ResourceInfo, dep: str) -> None: - msg = f_('no_dep_{resource}_{ver}_{dep}')\ + msg = _('no_dep_{resource}_{ver}_{dep}')\ .format(dep=dep, resource=info.identifier, ver=util.version_string(info.version)) logging.error(msg) - for resource_info in self._all_of_type('resource'): + for resource_info in self._all_infos(self.resource_infos): for dep in resource_info.dependencies: - if dep not in self.infos['resource']: + if dep not in self.resource_infos: report_missing_dependency(resource_info, dep) def report_missing_payload(info: MappingInfo, payload: str) -> None: - msg = f_('no_payload_{mapping}_{ver}_{payload}')\ + msg = _('no_payload_{mapping}_{ver}_{payload}')\ .format(mapping=info.identifier, payload=payload, ver=util.version_string(info.version)) logging.error(msg) - for mapping_info in self._all_of_type('mapping'): + for mapping_info in self._all_infos(self.mapping_infos): for payload in mapping_info.payloads.values(): - if payload not in self.infos['resource']: + if payload not in self.resource_infos: report_missing_payload(mapping_info, payload) - def report_missing_mapping(info: Union[MappingInfo, ResourceInfo], + def report_missing_mapping(info: ItemInfo, required_mapping: str) -> None: msg = _('no_mapping_{required_by}_{ver}_{required}')\ .format(required_by=info.identifier, required=required_mapping, ver=util.version_string(info.version)) logging.error(msg) - for item_info in (*self._all_of_type('mapping'), - *self._all_of_type('resource')): + for item_info in (*self._all_infos(self.mapping_infos), + *self._all_infos(self.resource_infos)): for required in item_info.required_mappings: - if required not in self.infos['mapping']: + if required not in self.mapping_infos: report_missing_mapping(item_info, required) def _finalize(self): @@ -416,18 +209,19 @@ class Malcontent: Initialize structures needed to serve queries. Called once after all data gets loaded. """ - for infos_dict in self.infos.values(): - for versioned_info in infos_dict.values(): + for versioned_info in (*self.mapping_infos.values(), + *self.resource_infos.values()): versioned_info.known_versions.sort() - for info in self._all_of_type('mapping'): + for info in self._all_infos(self.mapping_infos): for pattern in info.payloads: try: - PatternMapping(pattern, info).register(self.pattern_tree) + self.pattern_tree = \ + self.pattern_tree.register(pattern, info) except Exception as e: if flask.current_app._hydrilla_werror: raise e from None - msg = f_('couldnt_register_{mapping}_{ver}_{pattern}')\ + msg = _('couldnt_register_{mapping}_{ver}_{pattern}')\ .format(mapping=info.identifier, pattern=pattern, ver=util.version_string(info.version)) logging.error(msg) @@ -439,27 +233,16 @@ class Malcontent: If multiple versions of a mapping are applicable, only the most recent is included in the result. """ - deco = DeconstructedUrl(url) - - collected = {} - - domain_tree = self.pattern_tree.get(deco.proto) or PatternTreeNode() - - def process_mapping(pattern_mapping: PatternMapping) -> None: - if url[-1] != '/' and pattern_mapping.pattern[-1] == '/': - return - - info = pattern_mapping.mapping_info + collected: dict[str, MappingInfo] = {} + for result_set in self.pattern_tree.search(url): + for wrapped_mapping_info in result_set: + info = wrapped_mapping_info.item + previous = collected.get(info.identifier) + if previous and previous.version > info.version: + continue - if info.identifier not in collected or \ - info.version > collected[info.identifier].version: collected[info.identifier] = info - for path_tree in domain_tree.search(deco.domain): - for matches_list in path_tree.search(deco.path): - for pattern_mapping in matches_list: - process_mapping(pattern_mapping) - return list(collected.values()) bp = flask.Blueprint('bp', __package__) @@ -484,8 +267,6 @@ class HydrillaApp(flask.Flask): ] } - self._hydrilla_translation = \ - util.translation(here / 'locales', hydrilla_config['language']) self._hydrilla_project_url = hydrilla_config['hydrilla_project_url'] self._hydrilla_port = hydrilla_config['port'] self._hydrilla_werror = hydrilla_config.get('werror', False) @@ -506,8 +287,8 @@ class HydrillaApp(flask.Flask): 'hydrilla_project_url' global variable and to install proper translations. """ - env = super().create_jinja_environment(*args, **kwargs) - env.install_gettext_translations(self._hydrilla_translation) + env = super().create_jinja_environment(*args, **kwargs) # type: ignore + env.install_gettext_translations(make_translation()) env.globals['hydrilla_project_url'] = self._hydrilla_project_url return env @@ -519,9 +300,6 @@ class HydrillaApp(flask.Flask): """ return super().run(*args, port=self._hydrilla_port, **kwargs) -def f_(text_key): - return flask.current_app._hydrilla_translation.gettext(text_key) - def malcontent(): return flask.current_app._hydrilla_malcontent @@ -542,7 +320,12 @@ def get_resource_or_mapping(item_type: str, identifier: str) -> Response: identifier = match.group(1) - versioned_info = malcontent().infos[item_type].get(identifier) + if item_type == 'resource': + infos = malcontent().resource_infos + else: + infos = malcontent().mapping_infos + + versioned_info = infos.get(identifier) info = versioned_info and versioned_info.get_by_ver() if info is None: @@ -586,9 +369,6 @@ default_config_path = Path('/etc/hydrilla/config.json') default_malcontent_dir = '/var/lib/hydrilla/malcontent' default_project_url = 'https://hydrillabugs.koszko.org/projects/hydrilla/wiki' -console_gettext = util.translation(here / 'locales').gettext -_ = console_gettext - @click.command(help=_('serve_hydrilla_packages_explain_wsgi_considerations')) @click.option('-m', '--malcontent-dir', type=click.Path(exists=True, file_okay=False), @@ -600,24 +380,21 @@ _ = console_gettext @click.option('-c', '--config', 'config_path', type=click.Path(exists=True, dir_okay=False, resolve_path=True), help=_('path_to_config_file_explain_default')) -@click.option('-l', '--language', type=click.STRING, - help=_('language_to_use_overrides_config')) @click.version_option(version=_version.version, prog_name='Hydrilla', message=_('%(prog)s_%(version)s_license'), help=_('version_printing')) def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str], - port: Optional[int], config_path: Optional[str], - language: Optional[str]) -> None: + port: Optional[int], config_path: Optional[str]) -> None: """ Run a development Hydrilla server. This command is meant to be the entry point of hydrilla command exported by this package. """ - config_load_opts = {} if config_path is None \ - else {'config_path': [Path(config_path)]} - - hydrilla_config = config.load(**config_load_opts) + if config_path is None: + hydrilla_config = config.load() + else: + hydrilla_config = config.load(config_paths=[Path(config_path)]) if malcontent_dir is not None: hydrilla_config['malcontent_dir'] = str(Path(malcontent_dir).resolve()) @@ -628,14 +405,7 @@ def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str], if port is not None: hydrilla_config['port'] = port - if language is not None: - hydrilla_config['language'] = language - - lang = hydrilla_config.get('language') - _ = console_gettext if lang is None else \ - util.translation(here / 'locales', lang).gettext - - for opt in ('malcontent_dir', 'hydrilla_project_url', 'port', 'language'): + for opt in ('malcontent_dir', 'hydrilla_project_url', 'port'): if opt not in hydrilla_config: raise ValueError(_('config_option_{}_not_supplied').format(opt)) @@ -649,7 +419,7 @@ def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str], @click.version_option(version=_version.version, prog_name='Hydrilla', message=_('%(prog)s_%(version)s_license'), help=_('version_printing')) -def start_wsgi() -> None: +def start_wsgi() -> flask.Flask: """ Create application object for use in WSGI deployment. diff --git a/src/hydrilla/translations.py b/src/hydrilla/translations.py new file mode 100644 index 0000000..a963e82 --- /dev/null +++ b/src/hydrilla/translations.py @@ -0,0 +1,104 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Handling of gettext for Hydrilla. +# +# This file is part of Hydrilla +# +# Copyright (C) 2021, 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +# Enable using with Python 3.7. +from __future__ import annotations + +import locale as lcl +import gettext + +from pathlib import Path +from typing import Optional + +here = Path(__file__).resolve().parent + +localedir = here / 'locales' + +supported_locales = [f.name for f in localedir.iterdir() if f.is_dir()] + +default_locale = 'en_US' + +def select_best_locale() -> str: + """ + .... + + Otherwise, try to determine system's default language and use that. + """ + # TODO: Stop referenceing flask here. Instead, allow other code to register + # custom locale resolvers and register flask-aware resolver during + # runtime from within the flask-related part(s) of the application. + try: + import flask + use_flask = flask.has_request_context() + except ModuleNotFoundError: + use_flask = False + + if use_flask: + return flask.request.accept_languages.best_match( + supported_locales, + default=default_locale + ) + + # https://stackoverflow.com/questions/3425294/how-to-detect-the-os-default-language-in-python + # I am not going to surrender to Microbugs' nonfree, crappy OS to test it, + # so the lines inside try: block may actually fail. + locale: Optional[str] = lcl.getdefaultlocale()[0] + try: + from ctypes.windll import kernel32 as windll # type: ignore + locale = lcl.windows_locale[windll.GetUserDefaultUILanguage()] + except: + pass + + return locale if locale in supported_locales else default_locale + +translations: dict[str, gettext.NullTranslations] = {} + +def translation(locale: Optional[str] = None) -> gettext.NullTranslations: + """ + Configure translations for domain 'messages' and return the object that + represents them. If the requested locale is not available, fall back to + 'en_US'. + """ + if locale is None: + locale = select_best_locale() + + if not (localedir / locale).is_dir(): + locale = 'en_US' + + if locale not in translations: + translations[locale] = gettext.translation( + 'messages', + localedir=localedir, + languages=[locale] + ) + + return translations[locale] + +def smart_gettext(msg: str, locale: Optional[str] = None) -> str: + """....""" + return translation(locale).gettext(msg) + +_ = smart_gettext diff --git a/src/hydrilla/url_patterns.py b/src/hydrilla/url_patterns.py new file mode 100644 index 0000000..8e80379 --- /dev/null +++ b/src/hydrilla/url_patterns.py @@ -0,0 +1,181 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Data structure for querying URL patterns. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2021, 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +This module contains functions for deconstruction and construction of URLs and +Haketilo URL patterns. + +Data structures for querying data using URL patterns are also defined there. +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +import re +import urllib.parse as up +import typing as t +import dataclasses as dc + +from immutables import Map + +from hydrilla.translations import smart_gettext as _ +from hydrilla.exceptions import HaketiloException + +default_ports: t.Mapping[str, int] = Map(http=80, https=443, ftp=21) + +@dc.dataclass(frozen=True, unsafe_hash=True) +class ParsedUrl: + """....""" + orig_url: str # orig_url used in __hash__() + scheme: str = dc.field(hash=False) + domain_labels: tuple[str, ...] = dc.field(hash=False) + path_segments: tuple[str, ...] = dc.field(hash=False) + has_trailing_slash: bool = dc.field(hash=False) + port: int = dc.field(hash=False) + + # def reconstruct_url(self) -> str: + # """....""" + # scheme = self.orig_scheme + + # netloc = '.'.join(reversed(self.domain_labels)) + # if scheme == self.scheme and \ + # self.port is not None and \ + # default_ports[scheme] != self.port: + # netloc += f':{self.port}' + + # path = '/'.join(('', *self.path_segments)) + # if self.has_trailing_slash: + # path += '/' + + # return f'{scheme}://{netloc}{path}' + +# URLs with those schemes will be recognized but not all of them have to be +# actually supported by Hydrilla server and Haketilo proxy. +supported_schemes = 'http', 'https', 'ftp', 'file' + +def _parse_pattern_or_url(url: str, orig_url: str, is_pattern: bool = False) \ + -> ParsedUrl: + """....""" + if not is_pattern: + assert orig_url == url + + parse_result = up.urlparse(url) + + # Verify the parsed URL is valid + has_hostname = parse_result.hostname is not None + if not parse_result.scheme or \ + (parse_result.scheme == 'file' and parse_result.port is not None) or \ + (parse_result.scheme == 'file' and has_hostname) or \ + (parse_result.scheme != 'file' and not has_hostname): + if is_pattern: + msg = _('err.url_pattern_{}.bad').format(orig_url) + raise HaketiloException(msg) + else: + raise HaketiloException(_('err.url_{}.bad') .format(url)) + + # Verify the URL uses a known scheme and extract it. + scheme = parse_result.scheme + + if parse_result.scheme not in supported_schemes: + if is_pattern: + msg = _('err.url_pattern_{}.bad_scheme').format(orig_url) + raise HaketiloException(msg) + else: + raise HaketiloException(_('err.url_{}.bad_scheme').format(url)) + + # Extract and keep information about special pattern schemas used. + if is_pattern and orig_url.startswith('http*:'): + if parse_result.port: + fmt = _('err.url_pattern_{}.special_scheme_port') + raise HaketiloException(fmt.format(orig_url)) + + # Extract URL's explicit port or deduce the port based on URL's protocol. + try: + explicit_port = parse_result.port + port_out_of_range = explicit_port == 0 + except ValueError: + port_out_of_range = True + + if port_out_of_range: + if is_pattern: + msg = _('err.url_pattern_{}.bad_port').format(orig_url) + raise HaketiloException(msg) + else: + raise HaketiloException(_('err.url_{}.bad_port').format(url)) + + port = t.cast(int, explicit_port or default_ports.get(parse_result.scheme)) + + # Make URL's hostname into a list of labels in reverse order. E.g. + # 'https://a.bc..de.fg.com/h/i/' -> ['com', 'fg', 'de', 'bc', 'a'] + hostname = parse_result.hostname or '' + domain_labels_with_empty = reversed(hostname.split('.')) + domain_labels = tuple(lbl for lbl in domain_labels_with_empty if lbl) + + # Make URL's path into a list of segments. E.g. + # 'https://ab.cd/e//f/g/' -> ['e', 'f', 'g'] + path_segments_with_empty = parse_result.path.split('/') + path_segments = tuple(sgmt for sgmt in path_segments_with_empty if sgmt) + + # Record whether a trailing '/' is present in the URL. + has_trailing_slash = parse_result.path.endswith('/') + + # Perform some additional sanity checks and return the result. + if is_pattern: + if parse_result.query: + msg = _('err.url_pattern_{}.has_query').format(orig_url) + raise HaketiloException(msg) + + if parse_result.fragment: + msg = _('err.url_pattern_{}.has_frag').format(orig_url) + raise HaketiloException(msg) + + return ParsedUrl( + orig_url = orig_url, + scheme = scheme, + port = port, + domain_labels = domain_labels, + path_segments = path_segments, + has_trailing_slash = has_trailing_slash + ) + +replace_scheme_regex = re.compile(r'^[^:]*') + +def parse_pattern(url_pattern: str) -> t.Sequence[ParsedUrl]: + """....""" + if url_pattern.startswith('http*:'): + patterns = [ + replace_scheme_regex.sub('http', url_pattern), + replace_scheme_regex.sub('https', url_pattern) + ] + else: + patterns = [url_pattern] + + return tuple(_parse_pattern_or_url(pat, url_pattern, True) + for pat in patterns) + +def parse_url(url: str) -> ParsedUrl: + """....""" + return _parse_pattern_or_url(url, url) diff --git a/src/hydrilla/versions.py b/src/hydrilla/versions.py new file mode 100644 index 0000000..a7a9f29 --- /dev/null +++ b/src/hydrilla/versions.py @@ -0,0 +1,59 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Functions to operate on version numbers. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2021, 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +This module contains functions for deconstruction and construction of version +strings and version tuples. +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +import typing as t + +def normalize_version(ver: t.Sequence[int]) -> tuple[int, ...]: + """Strip right-most zeroes from 'ver'. The original list is not modified.""" + new_len = 0 + for i, num in enumerate(ver): + if num != 0: + new_len = i + 1 + + return tuple(ver[:new_len]) + +def parse_version(ver_str: str) -> tuple[int, ...]: + """ + Convert 'ver_str' into an array representation, e.g. for ver_str="4.6.13.0" + return [4, 6, 13, 0]. + """ + return tuple(int(num) for num in ver_str.split('.')) + +def version_string(ver: t.Sequence[int], rev: t.Optional[int] = None) -> str: + """ + Produce version's string representation (optionally with revision), like: + 1.2.3-5 + No version normalization is performed. + """ + return '.'.join(str(n) for n in ver) + ('' if rev is None else f'-{rev}') -- cgit v1.2.3