summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorWojtek Kosior <koszko@koszko.org>2022-06-13 11:06:49 +0200
committerWojtek Kosior <koszko@koszko.org>2022-07-16 16:31:44 +0200
commit52d12a4fa124daa1595529e3e7008276a7986d95 (patch)
tree9b56fe2d28ff0242f8511aca570be455112ad3df /src
parent9dcbfdfe8620cc417438d1727aa1e0c89846e9bf (diff)
downloadhaketilo-hydrilla-52d12a4fa124daa1595529e3e7008276a7986d95.tar.gz
haketilo-hydrilla-52d12a4fa124daa1595529e3e7008276a7986d95.zip
unfinished partial work
Diffstat (limited to 'src')
-rw-r--r--src/hydrilla/__init__.py10
-rw-r--r--src/hydrilla/builder/__init__.py7
-rw-r--r--src/hydrilla/builder/__main__.py9
-rw-r--r--src/hydrilla/builder/_version.py5
-rw-r--r--src/hydrilla/builder/build.py485
-rw-r--r--src/hydrilla/builder/common_errors.py65
-rw-r--r--src/hydrilla/builder/local_apt.py432
-rw-r--r--src/hydrilla/builder/piggybacking.py117
-rw-r--r--src/hydrilla/exceptions.py40
-rw-r--r--src/hydrilla/item_infos.py344
-rw-r--r--src/hydrilla/json_instances.py207
-rw-r--r--src/hydrilla/locales/en_US/LC_MESSAGES/messages.po252
-rw-r--r--src/hydrilla/locales/pl_PL/LC_MESSAGES/messages.po258
-rw-r--r--src/hydrilla/mitmproxy_launcher/__main__.py11
-rw-r--r--src/hydrilla/mitmproxy_launcher/launch.py77
-rw-r--r--src/hydrilla/pattern_tree.py339
-rw-r--r--src/hydrilla/proxy/__init__.py5
-rw-r--r--src/hydrilla/proxy/addon.py177
-rw-r--r--src/hydrilla/proxy/flow_handlers.py383
-rw-r--r--src/hydrilla/proxy/policies.py76
-rw-r--r--src/hydrilla/proxy/state.py73
-rw-r--r--src/hydrilla/proxy/store.py40
-rw-r--r--src/hydrilla/py.typed5
m---------src/hydrilla/schemas/1.x0
m---------src/hydrilla/schemas/2.x0
-rw-r--r--src/hydrilla/server/config.json3
-rw-r--r--src/hydrilla/server/config.py6
-rw-r--r--src/hydrilla/server/locales/en_US/LC_MESSAGES/hydrilla-messages.po151
-rw-r--r--src/hydrilla/server/serve.py406
-rw-r--r--src/hydrilla/translations.py104
-rw-r--r--src/hydrilla/url_patterns.py181
-rw-r--r--src/hydrilla/versions.py59
32 files changed, 3846 insertions, 481 deletions
diff --git a/src/hydrilla/__init__.py b/src/hydrilla/__init__.py
index 6aeb276..d382ead 100644
--- a/src/hydrilla/__init__.py
+++ b/src/hydrilla/__init__.py
@@ -1,7 +1,5 @@
-# SPDX-License-Identifier: 0BSD
+# SPDX-License-Identifier: CC0-1.0
-# Copyright (C) 2013-2020, PyPA
-
-# https://packaging.python.org/en/latest/guides/packaging-namespace-packages/#pkgutil-style-namespace-packages
-
-__path__ = __import__('pkgutil').extend_path(__path__, __name__)
+# Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org>
+#
+# Available under the terms of Creative Commons Zero v1.0 Universal.
diff --git a/src/hydrilla/builder/__init__.py b/src/hydrilla/builder/__init__.py
new file mode 100644
index 0000000..73dc579
--- /dev/null
+++ b/src/hydrilla/builder/__init__.py
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: CC0-1.0
+
+# Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org>
+#
+# Available under the terms of Creative Commons Zero v1.0 Universal.
+
+from .build import Build
diff --git a/src/hydrilla/builder/__main__.py b/src/hydrilla/builder/__main__.py
new file mode 100644
index 0000000..87dc9e2
--- /dev/null
+++ b/src/hydrilla/builder/__main__.py
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: CC0-1.0
+
+# Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org>
+#
+# Available under the terms of Creative Commons Zero v1.0 Universal.
+
+from . import build
+
+build.perform()
diff --git a/src/hydrilla/builder/_version.py b/src/hydrilla/builder/_version.py
new file mode 100644
index 0000000..2feb153
--- /dev/null
+++ b/src/hydrilla/builder/_version.py
@@ -0,0 +1,5 @@
+# coding: utf-8
+# file generated by setuptools_scm
+# don't change, don't track in version control
+version = '1.1b1'
+version_tuple = (1, '1b1')
diff --git a/src/hydrilla/builder/build.py b/src/hydrilla/builder/build.py
new file mode 100644
index 0000000..acc6576
--- /dev/null
+++ b/src/hydrilla/builder/build.py
@@ -0,0 +1,485 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# Building Hydrilla packages.
+#
+# This file is part of Hydrilla
+#
+# Copyright (C) 2022 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+# Enable using with Python 3.7.
+from __future__ import annotations
+
+import json
+import re
+import zipfile
+import subprocess
+from pathlib import Path, PurePosixPath
+from hashlib import sha256
+from sys import stderr
+from contextlib import contextmanager
+from tempfile import TemporaryDirectory, TemporaryFile
+from typing import Optional, Iterable, Iterator, Union
+
+import jsonschema # type: ignore
+import click
+
+from .. import _version, json_instances, versions
+from ..translations import smart_gettext as _
+from . import local_apt
+from .piggybacking import Piggybacked
+from .common_errors import *
+
+here = Path(__file__).resolve().parent
+
+schemas_root = 'https://hydrilla.koszko.org/schemas'
+
+generated_by = {
+ 'name': 'hydrilla.builder',
+ 'version': _version.version
+}
+
+class ReuseError(SubprocessError):
+ """
+ Exception used to report various problems when calling the REUSE tool.
+ """
+
+def generate_spdx_report(root: Path) -> bytes:
+ """
+ Use REUSE tool to generate an SPDX report for sources under 'root' and
+ return the report's contents as 'bytes'.
+
+ In case the directory tree under 'root' does not constitute a
+ REUSE-compliant package, as exception is raised with linting report
+ included in it.
+
+ In case the reuse tool is not installed, an exception is also raised.
+ """
+ for command in [
+ ['reuse', '--root', str(root), 'lint'],
+ ['reuse', '--root', str(root), 'spdx']
+ ]:
+ try:
+ cp = subprocess.run(command, capture_output=True, text=True)
+ except FileNotFoundError:
+ msg = _('couldnt_execute_{}_is_it_installed').format('reuse')
+ raise ReuseError(msg)
+
+ if cp.returncode != 0:
+ msg = _('command_{}_failed').format(' '.join(command))
+ raise ReuseError(msg, cp)
+
+ return cp.stdout.encode()
+
+class FileRef:
+ """Represent reference to a file in the package."""
+ def __init__(self, path: PurePosixPath, contents: bytes) -> None:
+ """Initialize FileRef."""
+ self.include_in_distribution = False
+ self.include_in_source_archive = True
+ self.path = path
+ self.contents = contents
+
+ self.contents_hash = sha256(contents).digest().hex()
+
+ def make_ref_dict(self) -> dict[str, str]:
+ """
+ Represent the file reference through a dict that can be included in JSON
+ defintions.
+ """
+ return {
+ 'file': str(self.path),
+ 'sha256': self.contents_hash
+ }
+
+@contextmanager
+def piggybacked_system(piggyback_def: Optional[dict],
+ piggyback_files: Optional[Path]) \
+ -> Iterator[Piggybacked]:
+ """
+ Resolve resources from a foreign software packaging system. Optionally, use
+ package files (.deb's, etc.) from a specified directory instead of resolving
+ and downloading them.
+ """
+ if piggyback_def is None:
+ yield Piggybacked()
+ else:
+ # apt is the only supported system right now
+ assert piggyback_def['system'] == 'apt'
+
+ with local_apt.piggybacked_system(piggyback_def, piggyback_files) \
+ as piggybacked:
+ yield piggybacked
+
+class Build:
+ """
+ Build a Hydrilla package.
+ """
+ def __init__(self, srcdir: Path, index_json_path: Path,
+ piggyback_files: Optional[Path]=None):
+ """
+ Initialize a build. All files to be included in a distribution package
+ are loaded into memory, all data gets validated and all necessary
+ computations (e.g. preparing of hashes) are performed.
+ """
+ self.srcdir = srcdir.resolve()
+ self.piggyback_files = piggyback_files
+ if piggyback_files is None:
+ piggyback_default_path = \
+ srcdir.parent / f'{srcdir.name}.foreign-packages'
+ if piggyback_default_path.exists():
+ self.piggyback_files = piggyback_default_path
+
+ self.files_by_path: dict[PurePosixPath, FileRef] = {}
+ self.resource_list: list[dict] = []
+ self.mapping_list: list[dict] = []
+
+ if not index_json_path.is_absolute():
+ index_json_path = (self.srcdir / index_json_path)
+
+ index_obj = json_instances.read_instance(index_json_path)
+ schema_fmt = 'package_source-{}.schema.json'
+ major = json_instances.validate_instance(index_obj, schema_fmt)
+
+ index_desired_path = PurePosixPath('index.json')
+ self.files_by_path[index_desired_path] = \
+ FileRef(index_desired_path, index_json_path.read_bytes())
+
+ self._process_index_json(index_obj, major)
+
+ def _process_file(self, filename: Union[str, PurePosixPath],
+ piggybacked: Piggybacked,
+ include_in_distribution: bool=True):
+ """
+ Resolve 'filename' relative to srcdir, load it to memory (if not loaded
+ before), compute its hash and store its information in
+ 'self.files_by_path'.
+
+ 'filename' shall represent a relative path withing package directory.
+
+ if 'include_in_distribution' is True it shall cause the file to not only
+ be included in the source package's zipfile, but also written as one of
+ built package's files.
+
+ For each file an attempt is made to resolve it using 'piggybacked'
+ object. If a file is found and pulled from foreign software packaging
+ system this way, it gets automatically excluded from inclusion in
+ Hydrilla source package's zipfile.
+
+ Return file's reference object that can be included in JSON defintions
+ of various kinds.
+ """
+ include_in_source_archive = True
+
+ desired_path = PurePosixPath(filename)
+ if '..' in desired_path.parts:
+ msg = _('path_contains_double_dot_{}').format(filename)
+ raise FileReferenceError(msg)
+
+ path = piggybacked.resolve_file(desired_path)
+ if path is None:
+ path = (self.srcdir / desired_path).resolve()
+ if not path.is_relative_to(self.srcdir):
+ raise FileReferenceError(_('loading_{}_outside_package_dir')
+ .format(filename))
+
+ if str(path.relative_to(self.srcdir)) == 'index.json':
+ raise FileReferenceError(_('loading_reserved_index_json'))
+ else:
+ include_in_source_archive = False
+
+ file_ref = self.files_by_path.get(desired_path)
+ if file_ref is None:
+ if not path.is_file():
+ msg = _('referenced_file_{}_missing').format(desired_path)
+ raise FileReferenceError(msg)
+
+ file_ref = FileRef(desired_path, path.read_bytes())
+ self.files_by_path[desired_path] = file_ref
+
+ if include_in_distribution:
+ file_ref.include_in_distribution = True
+
+ if not include_in_source_archive:
+ file_ref.include_in_source_archive = False
+
+ return file_ref.make_ref_dict()
+
+ def _prepare_source_package_zip(self, source_name: str,
+ piggybacked: Piggybacked) -> str:
+ """
+ Create and store in memory a .zip archive containing files needed to
+ build this source package.
+
+ 'src_dir_name' shall not contain any slashes ('/').
+
+ Return zipfile's sha256 sum's hexstring.
+ """
+ tf = TemporaryFile()
+ source_dir_path = PurePosixPath(source_name)
+ piggybacked_dir_path = PurePosixPath(f'{source_name}.foreign-packages')
+
+ with zipfile.ZipFile(tf, 'w') as zf:
+ for file_ref in self.files_by_path.values():
+ if file_ref.include_in_source_archive:
+ zf.writestr(str(source_dir_path / file_ref.path),
+ file_ref.contents)
+
+ for desired_path, real_path in piggybacked.archive_files():
+ zf.writestr(str(piggybacked_dir_path / desired_path),
+ real_path.read_bytes())
+
+ tf.seek(0)
+ self.source_zip_contents = tf.read()
+
+ return sha256(self.source_zip_contents).digest().hex()
+
+ def _process_item(self, as_what: str, item_def: dict,
+ piggybacked: Piggybacked):
+ """
+ Process 'item_def' as definition of a resource or mapping (determined by
+ 'as_what' param) and store in memory its processed form and files used
+ by it.
+
+ Return a minimal item reference suitable for using in source
+ description.
+ """
+ resulting_schema_version = [1]
+
+ copy_props = ['identifier', 'long_name', 'description',
+ *filter(lambda p: p in item_def, ('comment', 'uuid'))]
+
+ new_item_obj: dict = {}
+
+ if as_what == 'resource':
+ item_list = self.resource_list
+
+ copy_props.append('revision')
+
+ script_file_refs = [self._process_file(f['file'], piggybacked)
+ for f in item_def.get('scripts', [])]
+
+ deps = [{'identifier': res_ref['identifier']}
+ for res_ref in item_def.get('dependencies', [])]
+
+ new_item_obj['dependencies'] = \
+ [*piggybacked.resource_must_depend, *deps]
+ new_item_obj['scripts'] = script_file_refs
+ else:
+ item_list = self.mapping_list
+
+ payloads = {}
+ for pat, res_ref in item_def.get('payloads', {}).items():
+ payloads[pat] = {'identifier': res_ref['identifier']}
+
+ new_item_obj['payloads'] = payloads
+
+ new_item_obj['version'] = \
+ versions.normalize_version(item_def['version'])
+
+ if as_what == 'mapping' and item_def['type'] == "mapping_and_resource":
+ new_item_obj['version'].append(item_def['revision'])
+
+ if self.source_schema_ver >= [2]:
+ # handle 'required_mappings' field
+ required = [{'identifier': map_ref['identifier']}
+ for map_ref in item_def.get('required_mappings', [])]
+ if required:
+ resulting_schema_version = max(resulting_schema_version, [2])
+ new_item_obj['required_mappings'] = required
+
+ # handle 'permissions' field
+ permissions = item_def.get('permissions', {})
+ processed_permissions = {}
+
+ if permissions.get('cors_bypass'):
+ processed_permissions['cors_bypass'] = True
+ if permissions.get('eval'):
+ processed_permissions['eval'] = True
+
+ if processed_permissions:
+ new_item_obj['permissions'] = processed_permissions
+ resulting_schema_version = max(resulting_schema_version, [2])
+
+ # handle '{min,max}_haketilo_version' fields
+ for minmax, default in ('min', [1]), ('max', [65536]):
+ constraint = item_def.get(f'{minmax}_haketilo_version')
+ if constraint in (None, default):
+ continue
+
+ copy_props.append(f'{minmax}_haketilo_version')
+ resulting_schema_version = max(resulting_schema_version, [2])
+
+ new_item_obj.update((p, item_def[p]) for p in copy_props)
+
+ new_item_obj['$schema'] = ''.join([
+ schemas_root,
+ f'/api_{as_what}_description',
+ '-',
+ versions.version_string(resulting_schema_version),
+ '.schema.json'
+ ])
+ new_item_obj['type'] = as_what
+ new_item_obj['source_copyright'] = self.copyright_file_refs
+ new_item_obj['source_name'] = self.source_name
+ new_item_obj['generated_by'] = generated_by
+
+ item_list.append(new_item_obj)
+
+ props_in_ref = ('type', 'identifier', 'version', 'long_name')
+ return dict([(prop, new_item_obj[prop]) for prop in props_in_ref])
+
+ def _process_index_json(self, index_obj: dict,
+ major_schema_version: int) -> None:
+ """
+ Process 'index_obj' as contents of source package's index.json and store
+ in memory this source package's zipfile as well as package's individual
+ files and computed definitions of the source package and items defined
+ in it.
+ """
+ self.source_schema_ver = \
+ versions.normalize_version(get_schema_version(index_obj))
+
+ out_schema = f'{schemas_root}/api_source_description-1.schema.json'
+
+ self.source_name = index_obj['source_name']
+
+ generate_spdx = index_obj.get('reuse_generate_spdx_report', False)
+ if generate_spdx:
+ contents = generate_spdx_report(self.srcdir)
+ spdx_path = PurePosixPath('report.spdx')
+ spdx_ref = FileRef(spdx_path, contents)
+
+ spdx_ref.include_in_source_archive = False
+ self.files_by_path[spdx_path] = spdx_ref
+
+ piggyback_def = None
+ if self.source_schema_ver >= [2] and 'piggyback_on' in index_obj:
+ piggyback_def = index_obj['piggyback_on']
+
+ with piggybacked_system(piggyback_def, self.piggyback_files) \
+ as piggybacked:
+ copyright_to_process = [
+ *(file_ref['file'] for file_ref in index_obj['copyright']),
+ *piggybacked.package_license_files
+ ]
+ self.copyright_file_refs = [self._process_file(f, piggybacked)
+ for f in copyright_to_process]
+
+ if generate_spdx and not spdx_ref.include_in_distribution:
+ raise FileReferenceError(_('report_spdx_not_in_copyright_list'))
+
+ item_refs = []
+ for item_def in index_obj['definitions']:
+ if 'mapping' in item_def['type']:
+ ref = self._process_item('mapping', item_def, piggybacked)
+ item_refs.append(ref)
+ if 'resource' in item_def['type']:
+ ref = self._process_item('resource', item_def, piggybacked)
+ item_refs.append(ref)
+
+ for file_ref in index_obj.get('additional_files', []):
+ self._process_file(file_ref['file'], piggybacked,
+ include_in_distribution=False)
+
+ zipfile_sha256 = self._prepare_source_package_zip\
+ (self.source_name, piggybacked)
+
+ source_archives_obj = {'zip' : {'sha256': zipfile_sha256}}
+
+ self.source_description = {
+ '$schema': out_schema,
+ 'source_name': self.source_name,
+ 'source_copyright': self.copyright_file_refs,
+ 'upstream_url': index_obj['upstream_url'],
+ 'definitions': item_refs,
+ 'source_archives': source_archives_obj,
+ 'generated_by': generated_by
+ }
+
+ if 'comment' in index_obj:
+ self.source_description['comment'] = index_obj['comment']
+
+ def write_source_package_zip(self, dstpath: Path):
+ """
+ Create a .zip archive containing files needed to build this source
+ package and write it at 'dstpath'.
+ """
+ with open(dstpath, 'wb') as output:
+ output.write(self.source_zip_contents)
+
+ def write_package_files(self, dstpath: Path):
+ """Write package files under 'dstpath' for distribution."""
+ file_dir_path = (dstpath / 'file' / 'sha256').resolve()
+ file_dir_path.mkdir(parents=True, exist_ok=True)
+
+ for file_ref in self.files_by_path.values():
+ if file_ref.include_in_distribution:
+ file_path = file_dir_path / file_ref.contents_hash
+ file_path.write_bytes(file_ref.contents)
+
+ source_dir_path = (dstpath / 'source').resolve()
+ source_dir_path.mkdir(parents=True, exist_ok=True)
+ source_name = self.source_description["source_name"]
+
+ with open(source_dir_path / f'{source_name}.json', 'wt') as out_str:
+ json.dump(self.source_description, out_str)
+
+ with open(source_dir_path / f'{source_name}.zip', 'wb') as out_bin:
+ out_bin.write(self.source_zip_contents)
+
+ for item_type, item_list in [
+ ('resource', self.resource_list),
+ ('mapping', self.mapping_list)
+ ]:
+ item_type_dir_path = (dstpath / item_type).resolve()
+
+ for item_def in item_list:
+ item_dir_path = item_type_dir_path / item_def['identifier']
+ item_dir_path.mkdir(parents=True, exist_ok=True)
+
+ version = '.'.join([str(n) for n in item_def['version']])
+ with open(item_dir_path / version, 'wt') as output:
+ json.dump(item_def, output)
+
+dir_type = click.Path(exists=True, file_okay=False, resolve_path=True)
+
+@click.command(help=_('build_package_from_srcdir_to_dstdir'))
+@click.option('-s', '--srcdir', default='./', type=dir_type, show_default=True,
+ help=_('source_directory_to_build_from'))
+@click.option('-i', '--index-json', default='index.json', type=click.Path(),
+ help=_('path_instead_of_index_json'))
+@click.option('-p', '--piggyback-files', type=click.Path(),
+ help=_('path_instead_for_piggyback_files'))
+@click.option('-d', '--dstdir', type=dir_type, required=True,
+ help=_('built_package_files_destination'))
+@click.version_option(version=_version.version, prog_name='Hydrilla builder',
+ message=_('%(prog)s_%(version)s_license'),
+ help=_('version_printing'))
+def perform(srcdir, index_json, piggyback_files, dstdir):
+ """
+ Execute Hydrilla builder to turn source package into a distributable one.
+
+ This command is meant to be the entry point of hydrilla-builder command
+ exported by this package.
+ """
+ build = Build(Path(srcdir), Path(index_json),
+ piggyback_files and Path(piggyback_files))
+ build.write_package_files(Path(dstdir))
diff --git a/src/hydrilla/builder/common_errors.py b/src/hydrilla/builder/common_errors.py
new file mode 100644
index 0000000..ed4d0d2
--- /dev/null
+++ b/src/hydrilla/builder/common_errors.py
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# Error classes.
+#
+# This file is part of Hydrilla
+#
+# Copyright (C) 2022 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+"""
+This module defines error types for use in other parts of Hydrilla builder.
+"""
+
+# Enable using with Python 3.7.
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Optional
+from subprocess import CompletedProcess as CP
+
+from ..translations import smart_gettext as _
+
+class DistroError(Exception):
+ """
+ Exception used to report problems when resolving an OS distribution.
+ """
+
+class FileReferenceError(Exception):
+ """
+ Exception used to report various problems concerning files referenced from
+ source package.
+ """
+
+class SubprocessError(Exception):
+ """
+ Exception used to report problems related to execution of external
+ processes, includes. various problems when calling apt-* and dpkg-*
+ commands.
+ """
+ def __init__(self, msg: str, cp: Optional[CP]=None) -> None:
+ """Initialize this SubprocessError"""
+ if cp and cp.stdout:
+ msg = '\n\n'.join([msg, _('STDOUT_OUTPUT_heading'), cp.stdout])
+
+ if cp and cp.stderr:
+ msg = '\n\n'.join([msg, _('STDERR_OUTPUT_heading'), cp.stderr])
+
+ super().__init__(msg)
diff --git a/src/hydrilla/builder/local_apt.py b/src/hydrilla/builder/local_apt.py
new file mode 100644
index 0000000..bdfc76f
--- /dev/null
+++ b/src/hydrilla/builder/local_apt.py
@@ -0,0 +1,432 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# Using a local APT.
+#
+# This file is part of Hydrilla
+#
+# Copyright (C) 2022 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+# Enable using with Python 3.7.
+from __future__ import annotations
+
+import zipfile
+import shutil
+import re
+import subprocess
+CP = subprocess.CompletedProcess
+from pathlib import Path, PurePosixPath
+from tempfile import TemporaryDirectory, NamedTemporaryFile
+from hashlib import sha256
+from urllib.parse import unquote
+from contextlib import contextmanager
+from typing import Optional, Iterable, Iterator
+
+from ..translations import smart_gettext as _
+from .piggybacking import Piggybacked
+from .common_errors import *
+
+here = Path(__file__).resolve().parent
+
+"""
+Default cache directory to save APT configurations and downloaded GPG keys in.
+"""
+default_apt_cache_dir = Path.home() / '.cache' / 'hydrilla' / 'builder' / 'apt'
+
+"""
+Default keyserver to use.
+"""
+default_keyserver = 'hkps://keyserver.ubuntu.com:443'
+
+"""
+Default keys to download when using a local APT.
+"""
+default_keys = [
+ # Trisquel
+ 'E6C27099CA21965B734AEA31B4EFB9F38D8AEBF1',
+ '60364C9869F92450421F0C22B138CA450C05112F',
+ # Ubuntu
+ '630239CC130E1A7FD81A27B140976EAF437D05B5',
+ '790BC7277767219C42C86F933B4FE6ACC0B21F32',
+ 'F6ECB3762474EDA9D21B7022871920D1991BC93C',
+ # Debian
+ '6D33866EDD8FFA41C0143AEDDCC9EFBF77E11517',
+ '80D15823B7FD1561F9F7BCDDDC30D7C23CBBABEE',
+ 'AC530D520F2F3269F5E98313A48449044AAD5C5D'
+]
+
+"""sources.list file contents for known distros."""
+default_lists = {
+ 'nabia': [f'{type} http://archive.trisquel.info/trisquel/ nabia{suf} main'
+ for type in ('deb', 'deb-src')
+ for suf in ('', '-updates', '-security')]
+}
+
+class GpgError(Exception):
+ """
+ Exception used to report various problems when calling GPG.
+ """
+
+class AptError(SubprocessError):
+ """
+ Exception used to report various problems when calling apt-* and dpkg-*
+ commands.
+ """
+
+def run(command, **kwargs):
+ """A wrapped around subprocess.run that sets some default options."""
+ return subprocess.run(command, **kwargs, env={'LANG': 'en_US'},
+ capture_output=True, text=True)
+
+class Apt:
+ """
+ This class represents an APT instance and can be used to call apt-get
+ commands with it.
+ """
+ def __init__(self, apt_conf: str) -> None:
+ """Initialize this Apt object."""
+ self.apt_conf = apt_conf
+
+ def get(self, *args: str, **kwargs) -> CP:
+ """
+ Run apt-get with the specified arguments and raise a meaningful AptError
+ when something goes wrong.
+ """
+ command = ['apt-get', '-c', self.apt_conf, *args]
+ try:
+ cp = run(command, **kwargs)
+ except FileNotFoundError:
+ msg = _('couldnt_execute_{}_is_it_installed').format('apt-get')
+ raise AptError(msg)
+
+ if cp.returncode != 0:
+ msg = _('command_{}_failed').format(' '.join(command))
+ raise AptError(msg, cp)
+
+ return cp
+
+def cache_dir() -> Path:
+ """
+ Return the directory used to cache data (APT configurations, keyrings) to
+ speed up repeated operations.
+
+ This function first ensures the directory exists.
+ """
+ default_apt_cache_dir.mkdir(parents=True, exist_ok=True)
+ return default_apt_cache_dir
+
+class SourcesList:
+ """Representation of apt's sources.list contents."""
+ def __init__(self, list: list[str]=[],
+ codename: Optional[str]=None) -> None:
+ """Initialize this SourcesList."""
+ self.codename = None
+ self.list = [*list]
+ self.has_extra_entries = bool(self.list)
+
+ if codename is not None:
+ if codename not in default_lists:
+ raise DistroError(_('distro_{}_unknown').format(codename))
+
+ self.codename = codename
+ self.list.extend(default_lists[codename])
+
+ def identity(self) -> str:
+ """
+ Produce a string that uniquely identifies this sources.list contents.
+ """
+ if self.codename and not self.has_extra_entries:
+ return self.codename
+
+ return sha256('\n'.join(sorted(self.list)).encode()).digest().hex()
+
+def apt_conf(directory: Path) -> str:
+ """
+ Given local APT's directory, produce a configuration suitable for running
+ APT there.
+
+ 'directory' must not contain any special characters including quotes and
+ spaces.
+ """
+ return f'''
+Architecture "amd64";
+Dir "{directory}";
+Dir::State "{directory}/var/lib/apt";
+Dir::State::status "{directory}/var/lib/dpkg/status";
+Dir::Etc::SourceList "{directory}/etc/apt.sources.list";
+Dir::Etc::SourceParts "";
+Dir::Cache "{directory}/var/cache/apt";
+pkgCacheGen::Essential "none";
+Dir::Etc::Trusted "{directory}/etc/trusted.gpg";
+'''
+
+def apt_keyring(keys: list[str]) -> bytes:
+ """
+ Download the requested keys if necessary and export them as a keyring
+ suitable for passing to APT.
+
+ The keyring is returned as a bytes value that should be written to a file.
+ """
+ try:
+ from gnupg import GPG # type: ignore
+ except ModuleNotFoundError:
+ raise GpgError(_('couldnt_import_{}_is_it_installed').format('gnupg'))
+
+ gpg = GPG(keyring=str(cache_dir() / 'master_keyring.gpg'))
+ for key in keys:
+ if gpg.list_keys(keys=[key]) != []:
+ continue
+
+ if gpg.recv_keys(default_keyserver, key).imported == 0:
+ raise GpgError(_('gpg_couldnt_recv_key_{}').format(key))
+
+ return gpg.export_keys(keys, armor=False, minimal=True)
+
+def cache_apt_root(apt_root: Path, destination_zip: Path) -> None:
+ """
+ Zip an APT root directory for later use and move the zipfile to the
+ requested destination.
+ """
+ temporary_zip_path = None
+ try:
+ tmpfile = NamedTemporaryFile(suffix='.zip', prefix='tmp_',
+ dir=cache_dir(), delete=False)
+ temporary_zip_path = Path(tmpfile.name)
+
+ to_skip = {Path('etc') / 'apt.conf', Path('etc') / 'trusted.gpg'}
+
+ with zipfile.ZipFile(tmpfile, 'w') as zf:
+ for member in apt_root.rglob('*'):
+ relative = member.relative_to(apt_root)
+ if relative not in to_skip:
+ # This call will also properly add empty folders to zip file
+ zf.write(member, relative, zipfile.ZIP_DEFLATED)
+
+ shutil.move(temporary_zip_path, destination_zip)
+ finally:
+ if temporary_zip_path is not None and temporary_zip_path.exists():
+ temporary_zip_path.unlink()
+
+def setup_local_apt(directory: Path, list: SourcesList, keys: list[str]) -> Apt:
+ """
+ Create files and directories necessary for running APT without root rights
+ inside 'directory'.
+
+ 'directory' must not contain any special characters including quotes and
+ spaces and must be empty.
+
+ Return an Apt object that can be used to call apt-get commands.
+ """
+ apt_root = directory / 'apt_root'
+
+ conf_text = apt_conf(apt_root)
+ keyring_bytes = apt_keyring(keys)
+
+ apt_zipfile = cache_dir() / f'apt_{list.identity()}.zip'
+ if apt_zipfile.exists():
+ with zipfile.ZipFile(apt_zipfile) as zf:
+ zf.extractall(apt_root)
+
+ for to_create in (
+ apt_root / 'var' / 'lib' / 'apt' / 'partial',
+ apt_root / 'var' / 'lib' / 'apt' / 'lists',
+ apt_root / 'var' / 'cache' / 'apt' / 'archives' / 'partial',
+ apt_root / 'etc' / 'apt' / 'preferences.d',
+ apt_root / 'var' / 'lib' / 'dpkg',
+ apt_root / 'var' / 'log' / 'apt'
+ ):
+ to_create.mkdir(parents=True, exist_ok=True)
+
+ conf_path = apt_root / 'etc' / 'apt.conf'
+ trusted_path = apt_root / 'etc' / 'trusted.gpg'
+ status_path = apt_root / 'var' / 'lib' / 'dpkg' / 'status'
+ list_path = apt_root / 'etc' / 'apt.sources.list'
+
+ conf_path.write_text(conf_text)
+ trusted_path.write_bytes(keyring_bytes)
+ status_path.touch()
+ list_path.write_text('\n'.join(list.list))
+
+ apt = Apt(str(conf_path))
+ apt.get('update')
+
+ cache_apt_root(apt_root, apt_zipfile)
+
+ return apt
+
+@contextmanager
+def local_apt(list: SourcesList, keys: list[str]) -> Iterator[Apt]:
+ """
+ Create a temporary directory with proper local APT configuration in it.
+ Yield an Apt object that can be used to issue apt-get commands.
+
+ This function returns a context manager that will remove the directory on
+ close.
+ """
+ with TemporaryDirectory() as td_str:
+ td = Path(td_str)
+ yield setup_local_apt(td, list, keys)
+
+def download_apt_packages(list: SourcesList, keys: list[str],
+ packages: list[str], destination_dir: Path,
+ with_deps: bool) -> list[str]:
+ """
+ Set up a local APT, update it using the specified sources.list configuration
+ and use it to download the specified packages.
+
+ This function downloads .deb files of packages matching the amd64
+ architecture (which includes packages with architecture 'all') as well as
+ all their corresponding source package files and (if requested) the debs
+ and source files of all their declared dependencies.
+
+ Return value is a list of names of all downloaded files.
+ """
+ install_line_regex = re.compile(r'^Inst (?P<name>\S+) \((?P<version>\S+) ')
+
+ with local_apt(list, keys) as apt:
+ if with_deps:
+ cp = apt.get('install', '--yes', '--just-print', *packages)
+
+ lines = cp.stdout.split('\n')
+ matches = [install_line_regex.match(l) for l in lines]
+ packages = [f'{m.group("name")}={m.group("version")}'
+ for m in matches if m]
+
+ if not packages:
+ raise AptError(_('apt_install_output_not_understood'), cp)
+
+ # Download .debs to indirectly to destination_dir by first placing them
+ # in a temporary subdirectory.
+ with TemporaryDirectory(dir=destination_dir) as td_str:
+ td = Path(td_str)
+ cp = apt.get('download', *packages, cwd=td)
+
+ deb_name_regex = re.compile(
+ r'''
+ ^
+ (?P<name>[^_]+)
+ _
+ (?P<ver>[^_]+)
+ _
+ .+ # architecture (or 'all')
+ \.deb
+ $
+ ''',
+ re.VERBOSE)
+
+ names_vers = []
+ downloaded = []
+ for deb_file in td.iterdir():
+ match = deb_name_regex.match(deb_file.name)
+ if match is None:
+ msg = _('apt_download_gave_bad_filename_{}')\
+ .format(deb_file.name)
+ raise AptError(msg, cp)
+
+ names_vers.append((
+ unquote(match.group('name')),
+ unquote(match.group('ver'))
+ ))
+ downloaded.append(deb_file.name)
+
+ apt.get('source', '--download-only',
+ *[f'{n}={v}' for n, v in names_vers], cwd=td)
+
+ for source_file in td.iterdir():
+ if source_file.name in downloaded:
+ continue
+
+ downloaded.append(source_file.name)
+
+ for filename in downloaded:
+ shutil.move(td / filename, destination_dir / filename)
+
+ return downloaded
+
+@contextmanager
+def piggybacked_system(piggyback_def: dict, foreign_packages: Optional[Path]) \
+ -> Iterator[Piggybacked]:
+ """
+ Resolve resources from APT. Optionally, use package files (.deb's, etc.)
+ from a specified directory instead of resolving and downloading them.
+
+ The directories and files created for the yielded Piggybacked object shall
+ be deleted when this context manager gets closed.
+ """
+ assert piggyback_def['system'] == 'apt'
+
+ with TemporaryDirectory() as td_str:
+ td = Path(td_str)
+ root = td / 'root'
+ root.mkdir()
+
+ if foreign_packages is None:
+ archives = td / 'archives'
+ archives.mkdir()
+ else:
+ archives = foreign_packages / 'apt'
+ archives.mkdir(exist_ok=True)
+
+ if [*archives.glob('*.deb')] == []:
+ sources_list = SourcesList(piggyback_def.get('sources_list', []),
+ piggyback_def.get('distribution'))
+ packages = piggyback_def['packages']
+ with_deps = piggyback_def['dependencies']
+ pgp_keys = [
+ *default_keys,
+ *piggyback_def.get('trusted_keys', [])
+ ]
+
+ download_apt_packages(
+ list=sources_list,
+ keys=pgp_keys,
+ packages=packages,
+ destination_dir=archives,
+ with_deps=with_deps
+ )
+
+ for deb in archives.glob('*.deb'):
+ command = ['dpkg-deb', '-x', str(deb), str(root)]
+ try:
+ cp = run(command)
+ except FileNotFoundError:
+ msg = _('couldnt_execute_{}_is_it_installed'.format('dpkg-deb'))
+ raise AptError(msg)
+
+ if cp.returncode != 0:
+ msg = _('command_{}_failed').format(' '.join(command))
+ raise AptError(msg, cp)
+
+ docs_dir = root / 'usr' / 'share' / 'doc'
+ copyright_paths = [p / 'copyright' for p in docs_dir.iterdir()] \
+ if docs_dir.exists() else []
+ copyright_pure_paths = [PurePosixPath('.apt-root') / p.relative_to(root)
+ for p in copyright_paths if p.exists()]
+
+ standard_depends = piggyback_def.get('depend_on_base_packages', True)
+ must_depend = [{'identifier': 'apt-common-licenses'}] \
+ if standard_depends else []
+
+ yield Piggybacked(
+ archives={'apt': archives},
+ roots={'.apt-root': root},
+ package_license_files=copyright_pure_paths,
+ resource_must_depend=must_depend
+ )
diff --git a/src/hydrilla/builder/piggybacking.py b/src/hydrilla/builder/piggybacking.py
new file mode 100644
index 0000000..5813509
--- /dev/null
+++ b/src/hydrilla/builder/piggybacking.py
@@ -0,0 +1,117 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# Handling of software packaged for other distribution systems.
+#
+# This file is part of Hydrilla
+#
+# Copyright (C) 2022 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+"""
+This module contains definitions that may be reused by multiple piggybacked
+software system backends.
+"""
+
+# Enable using with Python 3.7.
+from __future__ import annotations
+
+from pathlib import Path, PurePosixPath
+from typing import Optional, Iterable
+
+from ..translations import smart_gettext as _
+from .common_errors import *
+
+here = Path(__file__).resolve().parent
+
+class Piggybacked:
+ """
+ Store information about foreign resources in use.
+
+ Public attributes:
+ 'resource_must_depend' (read-only)
+ 'package_license_files' (read-only)
+ """
+ def __init__(self, archives: dict[str, Path]={}, roots: dict[str, Path]={},
+ package_license_files: list[PurePosixPath]=[],
+ resource_must_depend: list[dict]=[]):
+ """
+ Initialize this Piggybacked object.
+
+ 'archives' maps piggybacked system names to directories that contain
+ package(s)' archive files. An 'archives' object may look like
+ {'apt': PosixPath('/path/to/dir/with/debs/and/tarballs')}.
+
+ 'roots' associates directory names to be virtually inserted under
+ Hydrilla source package directory with paths to real filesystem
+ directories that hold their desired contents, i.e. unpacked foreign
+ packages.
+
+ 'package_license_files' lists paths to license files that should be
+ included with the Haketilo package that will be produced. The paths are
+ to be resolved using 'roots' dictionary.
+
+ 'resource_must_depend' lists names of Haketilo packages that the
+ produced resources will additionally depend on. This is meant to help
+ distribute common licenses with a separate Haketilo package.
+ """
+ self.archives = archives
+ self.roots = roots
+ self.package_license_files = package_license_files
+ self.resource_must_depend = resource_must_depend
+
+ def resolve_file(self, file_ref_name: PurePosixPath) -> Optional[Path]:
+ """
+ 'file_ref_name' is a path as may appear in an index.json file. Check if
+ the file belongs to one of the roots we have and return either a path
+ to the relevant file under this root or None.
+
+ It is not being checked whether the file actually exists in the
+ filesystem.
+ """
+ parts = file_ref_name.parts
+ if not parts:
+ return None
+
+ root_path = self.roots.get(parts[0])
+ if root_path is None:
+ return None
+
+ path = root_path
+
+ for part in parts[1:]:
+ path = path / part
+
+ path = path.resolve()
+
+ if not path.is_relative_to(root_path):
+ raise FileReferenceError(_('loading_{}_outside_piggybacked_dir')
+ .format(file_ref_name))
+
+ return path
+
+ def archive_files(self) -> Iterable[tuple[PurePosixPath, Path]]:
+ """
+ Yield all archive files in use. Each yielded tuple holds file's desired
+ path relative to the piggybacked archives directory to be created and
+ its current real path.
+ """
+ for system, real_dir in self.archives.items():
+ for path in real_dir.rglob('*'):
+ yield PurePosixPath(system) / path.relative_to(real_dir), path
diff --git a/src/hydrilla/exceptions.py b/src/hydrilla/exceptions.py
new file mode 100644
index 0000000..112d98c
--- /dev/null
+++ b/src/hydrilla/exceptions.py
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# Custom exceptions and logging.
+#
+# This file is part of Hydrilla&Haketilo.
+#
+# Copyright (C) 2021, 2022 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+"""
+This module contains utilities for reading and validation of JSON instances.
+"""
+
+# Enable using with Python 3.7.
+from __future__ import annotations
+
+class HaketiloException(Exception):
+ """
+ Type used for exceptions generated by Haketilo code. Instances of this type
+ are expected to have their error messages localized.
+ can
+ """
+ pass
diff --git a/src/hydrilla/item_infos.py b/src/hydrilla/item_infos.py
new file mode 100644
index 0000000..c366ab5
--- /dev/null
+++ b/src/hydrilla/item_infos.py
@@ -0,0 +1,344 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# Reading resources, mappings and other JSON documents from the filesystem.
+#
+# This file is part of Hydrilla&Haketilo
+#
+# Copyright (C) 2021, 2022 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+"""
+.....
+"""
+
+# Enable using with Python 3.7.
+from __future__ import annotations
+
+import typing as t
+import dataclasses as dc
+
+from pathlib import Path, PurePath
+
+from immutables import Map, MapMutation
+
+from . import versions, json_instances
+from .url_patterns import parse_pattern, ParsedUrl
+from .exceptions import HaketiloException
+from .translations import smart_gettext as _
+
+VerTuple = t.Tuple[int, ...]
+
+@dc.dataclass(frozen=True, unsafe_hash=True)
+class ItemRef:
+ """...."""
+ identifier: str
+
+RefObjs = t.Sequence[t.Mapping[str, t.Any]]
+
+def make_item_refs_seq(ref_objs: RefObjs) -> tuple[ItemRef, ...]:
+ """...."""
+ return tuple(ItemRef(ref['identifier']) for ref in ref_objs)
+
+def make_required_mappings(refs_objs: t.Any, schema_compat: int) \
+ -> tuple[ItemRef, ...]:
+ """...."""
+ if schema_compat < 2:
+ return ()
+
+ return make_item_refs_seq(refs_objs)
+
+@dc.dataclass(frozen=True, unsafe_hash=True)
+class FileRef:
+ """...."""
+ name: str
+ sha256: str
+
+def make_file_refs_seq(ref_objs: RefObjs) -> tuple[FileRef, ...]:
+ """...."""
+ return tuple(FileRef(ref['file'], ref['sha256']) for ref in ref_objs)
+
+@dc.dataclass(frozen=True, unsafe_hash=True)
+class GeneratedBy:
+ """...."""
+ name: str
+ version: t.Optional[str]
+
+ @staticmethod
+ def make(generated_obj: t.Optional[t.Mapping[str, t.Any]]) -> \
+ t.Optional['GeneratedBy']:
+ """...."""
+ if generated_obj is None:
+ return None
+
+ return GeneratedBy(
+ name = generated_obj['name'],
+ version = generated_obj.get('version')
+ )
+
+@dc.dataclass(frozen=True, unsafe_hash=True)
+class ItemInfoBase:
+ """...."""
+ repository: str # repository used in __hash__()
+ source_name: str = dc.field(hash=False)
+ source_copyright: tuple[FileRef, ...] = dc.field(hash=False)
+ version: VerTuple # version used in __hash__()
+ identifier: str # identifier used in __hash__()
+ uuid: t.Optional[str] = dc.field(hash=False)
+ long_name: str = dc.field(hash=False)
+ required_mappings: tuple[ItemRef, ...] = dc.field(hash=False)
+ generated_by: t.Optional[GeneratedBy] = dc.field(hash=False)
+
+ def path_relative_to_type(self) -> str:
+ """
+ Get a relative path to this item's JSON definition with respect to
+ directory containing items of this type.
+ """
+ return f'{self.identifier}/{versions.version_string(self.version)}'
+
+ def path(self) -> str:
+ """
+ Get a relative path to this item's JSON definition with respect to
+ malcontent directory containing loadable items.
+ """
+ return f'{self.type_name}/{self.path_relative_to_type()}'
+
+ @property
+ def versioned_identifier(self):
+ """...."""
+ return f'{self.identifier}-{versions.version_string(self.version)}'
+
+ @staticmethod
+ def _get_base_init_kwargs(
+ item_obj: t.Mapping[str, t.Any],
+ schema_compat: int,
+ repository: str
+ ) -> t.Mapping[str, t.Any]:
+ """...."""
+ source_copyright = make_file_refs_seq(item_obj['source_copyright'])
+
+ version = versions.normalize_version(item_obj['version'])
+
+ required_mappings = make_required_mappings(
+ item_obj.get('required_mappings', []),
+ schema_compat
+ )
+
+ generated_by = GeneratedBy.make(item_obj.get('generated_by'))
+
+ return Map(
+ repository = repository,
+ source_name = item_obj['source_name'],
+ source_copyright = source_copyright,
+ version = version,
+ identifier = item_obj['identifier'],
+ uuid = item_obj.get('uuid'),
+ long_name = item_obj['long_name'],
+ required_mappings = required_mappings,
+ generated_by = generated_by
+ )
+
+ # class property
+ type_name = '!INVALID!'
+
+InstanceOrPath = t.Union[Path, str, dict[str, t.Any]]
+
+@dc.dataclass(frozen=True, unsafe_hash=True)
+class ResourceInfo(ItemInfoBase):
+ """...."""
+ revision: int = dc.field(hash=False)
+ dependencies: tuple[ItemRef, ...] = dc.field(hash=False)
+ scripts: tuple[FileRef, ...] = dc.field(hash=False)
+
+ @property
+ def versioned_identifier(self):
+ """...."""
+ return f'{super().versioned_identifier()}-{self.revision}'
+
+ @staticmethod
+ def make(
+ item_obj: t.Mapping[str, t.Any],
+ schema_compat: int,
+ repository: str
+ ) -> 'ResourceInfo':
+ """...."""
+ base_init_kwargs = ItemInfoBase._get_base_init_kwargs(
+ item_obj,
+ schema_compat,
+ repository
+ )
+
+ return ResourceInfo(
+ **base_init_kwargs,
+
+ revision = item_obj['revision'],
+ dependencies = make_item_refs_seq(item_obj.get('dependencies', [])),
+ scripts = make_file_refs_seq(item_obj.get('scripts', [])),
+ )
+
+ @staticmethod
+ def load(instance_or_path: 'InstanceOrPath', repository: str) \
+ -> 'ResourceInfo':
+ """...."""
+ return _load_item_info(ResourceInfo, instance_or_path, repository)
+
+ # class property
+ type_name = 'resource'
+
+def make_payloads(payloads_obj: t.Mapping[str, t.Any]) \
+ -> t.Mapping[ParsedUrl, ItemRef]:
+ """...."""
+ mapping: list[tuple[ParsedUrl, ItemRef]] = []
+
+ for pattern, ref_obj in payloads_obj.items():
+ ref = ItemRef(ref_obj['identifier'])
+ mapping.extend((parsed, ref) for parsed in parse_pattern(pattern))
+
+ return Map(mapping)
+
+@dc.dataclass(frozen=True, unsafe_hash=True)
+class MappingInfo(ItemInfoBase):
+ """...."""
+ payloads: t.Mapping[ParsedUrl, ItemRef] = dc.field(hash=False)
+
+ @staticmethod
+ def make(
+ item_obj: t.Mapping[str, t.Any],
+ schema_compat: int,
+ repository: str
+ ) -> 'MappingInfo':
+ """...."""
+ base_init_kwargs = ItemInfoBase._get_base_init_kwargs(
+ item_obj,
+ schema_compat,
+ repository
+ )
+
+ return MappingInfo(
+ **base_init_kwargs,
+
+ payloads = make_payloads(item_obj.get('payloads', {}))
+ )
+
+ @staticmethod
+ def load(instance_or_path: 'InstanceOrPath', repository: str) \
+ -> 'MappingInfo':
+ """...."""
+ return _load_item_info(MappingInfo, instance_or_path, repository)
+
+ # class property
+ type_name = 'mapping'
+
+
+LoadedType = t.TypeVar('LoadedType', ResourceInfo, MappingInfo)
+
+def _load_item_info(
+ info_type: t.Type[LoadedType],
+ instance_or_path: InstanceOrPath,
+ repository: str
+) -> LoadedType:
+ """Read, validate and autocomplete a mapping/resource description."""
+ instance = json_instances.read_instance(instance_or_path)
+
+ schema_fmt = f'api_{info_type.type_name}_description-{{}}.schema.json'
+
+ schema_compat = json_instances.validate_instance(instance, schema_fmt)
+
+ # We know from successful validation that instance is a dict.
+ return info_type.make(
+ t.cast('dict[str, t.Any]', instance),
+ schema_compat,
+ repository
+ )
+
+
+VersionedType = t.TypeVar('VersionedType', ResourceInfo, MappingInfo)
+
+@dc.dataclass(frozen=True)
+class VersionedItemInfo(t.Generic[VersionedType]):
+ """Stores data of multiple versions of given resource/mapping."""
+ uuid: t.Optional[str] = None
+ identifier: str = '<dummy>'
+ _by_version: Map[VerTuple, VersionedType] = Map()
+ _initialized: bool = False
+
+ def register(self, item_info: VersionedType) -> 'VersionedInfoSelfType':
+ """
+ Make item info queryable by version. Perform sanity checks for uuid.
+ """
+ identifier = item_info.identifier
+ if self._initialized:
+ assert identifier == self.identifier
+
+ if self.uuid is not None:
+ uuid: t.Optional[str] = self.uuid
+ if item_info.uuid is not None and self.uuid != item_info.uuid:
+ raise HaketiloException(_('uuid_mismatch_{identifier}')
+ .format(identifier=identifier))
+ else:
+ uuid = item_info.uuid
+
+ by_version = self._by_version.set(item_info.version, item_info)
+
+ return VersionedItemInfo(
+ identifier = identifier,
+ uuid = uuid,
+ _by_version = by_version,
+ _initialized = True
+ )
+
+ def unregister(self, version: VerTuple) -> 'VersionedInfoSelfType':
+ """...."""
+ try:
+ by_version = self._by_version.delete(version)
+ except KeyError:
+ by_version = self._by_version
+
+ return dc.replace(self, _by_version=by_version)
+
+ def is_empty(self) -> bool:
+ """...."""
+ return len(self._by_version) == 0
+
+ def newest_version(self) -> VerTuple:
+ """...."""
+ assert not self.is_empty()
+
+ return max(self._by_version.keys())
+
+ def get_newest(self) -> VersionedType:
+ """Find and return info of the newest version of item."""
+ newest = self._by_version[self.newest_version()]
+ assert newest is not None
+ return newest
+
+ def get_by_ver(self, ver: t.Iterable[int]) -> t.Optional[VersionedType]:
+ """
+ Find and return info of the specified version of the item (or None if
+ absent).
+ """
+ return self._by_version.get(tuple(ver))
+
+ def get_all(self) -> t.Iterator[VersionedType]:
+ """Generate item info for all its versions, from oldest ot newest."""
+ for version in sorted(self._by_version.keys()):
+ yield self._by_version[version]
+
+# Below we define 1 type used by recursively-typed VersionedItemInfo.
+VersionedInfoSelfType = VersionedItemInfo[VersionedType]
diff --git a/src/hydrilla/json_instances.py b/src/hydrilla/json_instances.py
new file mode 100644
index 0000000..40b213b
--- /dev/null
+++ b/src/hydrilla/json_instances.py
@@ -0,0 +1,207 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# Handling JSON objects.
+#
+# This file is part of Hydrilla&Haketilo.
+#
+# Copyright (C) 2021, 2022 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+"""
+This module contains utilities for reading and validation of JSON instances.
+"""
+
+# Enable using with Python 3.7.
+from __future__ import annotations
+
+import re
+import json
+import os
+import typing as t
+
+from pathlib import Path, PurePath
+
+from jsonschema import RefResolver, Draft7Validator # type: ignore
+
+from .translations import smart_gettext as _
+from .exceptions import HaketiloException
+from .versions import parse_version
+
+here = Path(__file__).resolve().parent
+
+_strip_comment_re = re.compile(r'''
+^ # match from the beginning of each line
+( # catch the part before '//' comment
+ (?: # this group matches either a string or a single out-of-string character
+ [^"/] |
+ "
+ (?: # this group matches any in-a-string character
+ [^"\\] | # match any normal character
+ \\[^u] | # match any escaped character like '\f' or '\n'
+ \\u[a-fA-F0-9]{4} # match an escape
+ )*
+ "
+ )*
+)
+# expect either end-of-line or a comment:
+# * unterminated strings will cause matching to fail
+# * bad comment (with '/' instead of '//') will be indicated by second group
+# having length 1 instead of 2 or 0
+(//?|$)
+''', re.VERBOSE)
+
+def strip_json_comments(text: str) -> str:
+ """
+ Accept JSON text with optional C++-style ('//') comments and return the text
+ with comments removed. Consecutive slashes inside strings are handled
+ properly. A spurious single slash ('/') shall generate an error. Errors in
+ JSON itself shall be ignored.
+ """
+ stripped_text = []
+ for line_num, line in enumerate(text.split('\n'), start=1):
+ match = _strip_comment_re.match(line)
+
+ if match is None: # unterminated string
+ # ignore this error, let the json module report it
+ stripped = line
+ elif len(match[2]) == 1:
+ msg_fmt = _('bad_json_comment_line_{line_num}_char_{char_num}')
+
+ raise HaketiloException(msg_fmt.format(
+ line_num = line_num,
+ char_num = len(match[1]) + 1
+ ))
+ else:
+ stripped = match[1]
+
+ stripped_text.append(stripped)
+
+ return '\n'.join(stripped_text)
+
+_schema_name_re = re.compile(r'''
+(?P<name_base>[^/]*)
+-
+(?P<ver>
+ (?P<major>[1-9][0-9]*)
+ (?: # this repeated group matches the remaining version numbers
+ \.
+ (?:[1-9][0-9]*|0)
+ )*
+)
+\.schema\.json
+$
+''', re.VERBOSE)
+
+schema_paths: dict[str, Path] = {}
+for path in (here / 'schemas').rglob('*.schema.json'):
+ match = _schema_name_re.match(path.name)
+ assert match is not None
+
+ schema_name_base = match.group('name_base')
+ schema_ver_list = match.group('ver').split('.')
+
+ for i in range(len(schema_ver_list)):
+ schema_ver = '.'.join(schema_ver_list[:i+1])
+ schema_paths[f'{schema_name_base}-{schema_ver}.schema.json'] = path
+
+schema_paths.update([(f'https://hydrilla.koszko.org/schemas/{name}', path)
+ for name, path in schema_paths.items()])
+
+schemas: dict[Path, dict[str, t.Any]] = {}
+
+def _get_schema(schema_name: str) -> dict[str, t.Any]:
+ """Return loaded JSON of the requested schema. Cache results."""
+ path = schema_paths.get(schema_name)
+ if path is None:
+ raise HaketiloException(_('unknown_schema_{}').format(schema_name))
+
+ if path not in schemas:
+ schemas[path] = json.loads(path.read_text())
+
+ return schemas[path]
+
+def validator_for(schema: t.Union[str, dict[str, t.Any]]) -> Draft7Validator:
+ """
+ Prepare a validator for the provided schema.
+
+ Other schemas under '../schemas' can be referenced.
+ """
+ if isinstance(schema, str):
+ schema = _get_schema(schema)
+
+ resolver = RefResolver(
+ base_uri=schema['$id'],
+ referrer=schema,
+ handlers={'https': _get_schema}
+ )
+
+ return Draft7Validator(schema, resolver=resolver)
+
+def parse_instance(text: str) -> object:
+ """Parse 'text' as JSON with additional '//' comments support."""
+ return json.loads(strip_json_comments(text))
+
+InstanceOrPath = t.Union[Path, str, dict[str, t.Any]]
+
+def read_instance(instance_or_path: InstanceOrPath) -> object:
+ """...."""
+ if isinstance(instance_or_path, dict):
+ return instance_or_path
+
+ with open(instance_or_path, 'rt') as handle:
+ text = handle.read()
+
+ try:
+ return parse_instance(text)
+ except:
+ raise HaketiloException(_('text_in_{}_not_valid_json')\
+ .format(instance_or_path))
+
+def get_schema_version(instance: object) -> tuple[int, ...]:
+ """
+ Parse passed object's "$schema" property and return the schema version tuple.
+ """
+ ver_str: t.Optional[str] = None
+
+ if isinstance(instance, dict) and type(instance.get('$schema')) is str:
+ match = _schema_name_re.search(instance['$schema'])
+ ver_str = match.group('ver') if match else None
+
+ if ver_str is not None:
+ return parse_version(ver_str)
+ else:
+ raise HaketiloException(_('no_schema_number_in_instance'))
+
+def get_schema_major_number(instance: object) -> int:
+ """
+ Parse passed object's "$schema" property and return the major number of
+ schema version.
+ """
+ return get_schema_version(instance)[0]
+
+def validate_instance(instance: object, schema_name_fmt: str) -> int:
+ """...."""
+ major = get_schema_major_number(instance)
+ schema_name = schema_name_fmt.format(major)
+ validator = validator_for(schema_name)
+
+ validator.validate(instance)
+
+ return major
diff --git a/src/hydrilla/locales/en_US/LC_MESSAGES/messages.po b/src/hydrilla/locales/en_US/LC_MESSAGES/messages.po
new file mode 100644
index 0000000..12abee5
--- /dev/null
+++ b/src/hydrilla/locales/en_US/LC_MESSAGES/messages.po
@@ -0,0 +1,252 @@
+# SPDX-License-Identifier: CC0-1.0
+#
+# English (United States) translations for hydrilla.
+# Copyright (C) 2021, 2022 Wojtek Kosior <koszko@koszko.org>
+# Available under the terms of Creative Commons Zero v1.0 Universal.
+msgid ""
+msgstr ""
+"Project-Id-Version: hydrilla 2.0\n"
+"Report-Msgid-Bugs-To: koszko@koszko.org\n"
+"POT-Creation-Date: 2022-06-07 10:23+0200\n"
+"PO-Revision-Date: 2022-02-12 00:00+0000\n"
+"Last-Translator: Wojtek Kosior <koszko@koszko.org>\n"
+"Language: en_US\n"
+"Language-Team: en_US <koszko@koszko.org>\n"
+"Plural-Forms: nplurals=2; plural=(n != 1)\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.8.0\n"
+
+#: src/hydrilla/builder/build.py:83 src/hydrilla/builder/local_apt.py:116
+#: src/hydrilla/builder/local_apt.py:408
+msgid "couldnt_execute_{}_is_it_installed"
+msgstr "Could not execute '{}'. Is the tool installed and reachable via PATH?"
+
+#: src/hydrilla/builder/build.py:87 src/hydrilla/builder/local_apt.py:120
+#: src/hydrilla/builder/local_apt.py:412
+msgid "command_{}_failed"
+msgstr "The following command finished execution with a non-zero exit status: {}"
+
+#: src/hydrilla/builder/build.py:160
+msgid "unknown_schema_package_source_{}"
+msgstr ""
+"The provided JSON at '{}' does not use any of the known package source "
+"JSON schemas."
+
+#: src/hydrilla/builder/build.py:196
+msgid "path_contains_double_dot_{}"
+msgstr ""
+"Attempt to load '{}' which includes a forbidden parent reference ('..') "
+"in the path."
+
+#: src/hydrilla/builder/build.py:203
+msgid "loading_{}_outside_package_dir"
+msgstr "Attempt to load '{}' which lies outside package source directory."
+
+#: src/hydrilla/builder/build.py:207
+msgid "loading_reserved_index_json"
+msgstr "Attempt to load 'index.json' which is a reserved filename."
+
+#: src/hydrilla/builder/build.py:214
+msgid "referenced_file_{}_missing"
+msgstr "Referenced file '{}' is missing."
+
+#: src/hydrilla/builder/build.py:396
+msgid "report_spdx_not_in_copyright_list"
+msgstr ""
+"Told to generate 'report.spdx' but 'report.spdx' is not listed among "
+"copyright files. Refusing to proceed."
+
+#: src/hydrilla/builder/build.py:473
+msgid "build_package_from_srcdir_to_dstdir"
+msgstr ""
+"Build Hydrilla package from `scrdir` and write the resulting files under "
+"`dstdir`."
+
+#: src/hydrilla/builder/build.py:475
+msgid "source_directory_to_build_from"
+msgstr "Source directory to build from."
+
+#: src/hydrilla/builder/build.py:477
+msgid "path_instead_of_index_json"
+msgstr ""
+"Path to file to be processed instead of index.json (if not absolute, "
+"resolved relative to srcdir)."
+
+#: src/hydrilla/builder/build.py:479
+msgid "path_instead_for_piggyback_files"
+msgstr ""
+"Path to a non-standard directory with foreign packages' archive files to "
+"use."
+
+#: src/hydrilla/builder/build.py:481
+msgid "built_package_files_destination"
+msgstr "Destination directory to write built package files to."
+
+#: src/hydrilla/builder/build.py:483 src/hydrilla/server/serve.py:582
+#: src/hydrilla/server/serve.py:604 src/hydrilla/server/serve.py:647
+#, python-format
+msgid "%(prog)s_%(version)s_license"
+msgstr ""
+"%(prog)s %(version)s\n"
+"Copyright (C) 2021,2022 Wojtek Kosior and contributors.\n"
+"License GPLv3+: GNU AGPL version 3 or later "
+"<https://gnu.org/licenses/gpl.html>\n"
+"This is free software: you are free to change and redistribute it.\n"
+"There is NO WARRANTY, to the extent permitted by law."
+
+#: src/hydrilla/builder/build.py:484 src/hydrilla/server/serve.py:605
+#: src/hydrilla/server/serve.py:648
+msgid "version_printing"
+msgstr "Print version information and exit."
+
+#: src/hydrilla/builder/common_errors.py:58
+msgid "STDOUT_OUTPUT_heading"
+msgstr "## Command's standard output ##"
+
+#: src/hydrilla/builder/common_errors.py:61
+msgid "STDERR_OUTPUT_heading"
+msgstr "## Command's standard error output ##"
+
+#: src/hydrilla/builder/local_apt.py:145
+msgid "distro_{}_unknown"
+msgstr "Attempt to use an unknown software distribution '{}'."
+
+#: src/hydrilla/builder/local_apt.py:189
+msgid "couldnt_import_{}_is_it_installed"
+msgstr ""
+"Could not import '{}'. Is the module installed and visible to this Python"
+" instance?"
+
+#: src/hydrilla/builder/local_apt.py:197
+msgid "gpg_couldnt_recv_key_{}"
+msgstr "Could not import PGP key '{}'."
+
+#: src/hydrilla/builder/local_apt.py:311
+msgid "apt_install_output_not_understood"
+msgstr "The output of an 'apt-get install' command was not understood."
+
+#: src/hydrilla/builder/local_apt.py:337
+msgid "apt_download_gave_bad_filename_{}"
+msgstr "The 'apt-get download' command produced a file with unexpected name '{}'."
+
+#: src/hydrilla/builder/piggybacking.py:100
+msgid "loading_{}_outside_piggybacked_dir"
+msgstr ""
+"Attempt to load '{}' which lies outside piggybacked packages files root "
+"directory."
+
+#: src/hydrilla/server/serve.py:126
+#, python-brace-format
+msgid "uuid_mismatch_{identifier}"
+msgstr "Two different uuids were specified for item '{identifier}'."
+
+#: src/hydrilla/server/serve.py:133
+#, python-brace-format
+msgid "version_clash_{identifier}_{version}"
+msgstr "Version '{version}' specified more than once for item '{identifier}'."
+
+#: src/hydrilla/server/serve.py:249 src/hydrilla/server/serve.py:261
+msgid "invalid_URL_{}"
+msgstr "Invalid URL/pattern: '{}'."
+
+#: src/hydrilla/server/serve.py:253
+msgid "disallowed_protocol_{}"
+msgstr "Disallowed protocol: '{}'."
+
+#: src/hydrilla/server/serve.py:306
+msgid "malcontent_dir_path_not_dir_{}"
+msgstr "Provided 'malcontent_dir' path does not name a directory: {}"
+
+#: src/hydrilla/server/serve.py:325
+msgid "couldnt_load_item_from_{}"
+msgstr "Couldn't load item from {}."
+
+#: src/hydrilla/server/serve.py:350
+msgid "item_{item}_in_file_{file}"
+msgstr "Item {item} incorrectly present under {file}."
+
+#: src/hydrilla/server/serve.py:356
+msgid "item_version_{ver}_in_file_{file}"
+msgstr "Item version {ver} incorrectly present under {file}."
+
+#: src/hydrilla/server/serve.py:379
+msgid "no_dep_{resource}_{ver}_{dep}"
+msgstr "Unknown dependency '{dep}' of resource '{resource}', version '{ver}'."
+
+#: src/hydrilla/server/serve.py:390
+msgid "no_payload_{mapping}_{ver}_{payload}"
+msgstr "Unknown payload '{payload}' of mapping '{mapping}', version '{ver}'."
+
+#: src/hydrilla/server/serve.py:402
+msgid "no_mapping_{required_by}_{ver}_{required}"
+msgstr "Unknown mapping '{required}' required by '{required_by}', version '{ver}'."
+
+#: src/hydrilla/server/serve.py:429
+msgid "couldnt_register_{mapping}_{ver}_{pattern}"
+msgstr ""
+"Couldn't register mapping '{mapping}', version '{ver}' (pattern "
+"'{pattern}')."
+
+#: src/hydrilla/server/serve.py:590
+msgid "serve_hydrilla_packages_explain_wsgi_considerations"
+msgstr ""
+"Serve Hydrilla packages.\n"
+"\n"
+"This command is meant to be a quick way to run a local or development "
+"Hydrilla instance. For better performance, consider deployment using "
+"WSGI."
+
+#: src/hydrilla/server/serve.py:593
+msgid "directory_to_serve_from_overrides_config"
+msgstr ""
+"Directory to serve files from. Overrides value from the config file (if "
+"any)."
+
+#: src/hydrilla/server/serve.py:595
+msgid "project_url_to_display_overrides_config"
+msgstr ""
+"Project url to display on generated HTML pages. Overrides value from the "
+"config file (if any)."
+
+#: src/hydrilla/server/serve.py:597
+msgid "tcp_port_to_listen_on_overrides_config"
+msgstr ""
+"TCP port number to listen on (0-65535). Overrides value from the config "
+"file (if any)."
+
+#: src/hydrilla/server/serve.py:600
+msgid "path_to_config_file_explain_default"
+msgstr ""
+"Path to Hydrilla server configuration file (optional, by default Hydrilla"
+" loads its own config file, which in turn tries to load "
+"/etc/hydrilla/config.json)."
+
+#: src/hydrilla/server/serve.py:637
+msgid "config_option_{}_not_supplied"
+msgstr "Missing configuration option '{}'."
+
+#: src/hydrilla/server/serve.py:641
+msgid "serve_hydrilla_packages_wsgi_help"
+msgstr ""
+"Serve Hydrilla packages.\n"
+"\n"
+"This program is a WSGI script that runs Hydrilla repository behind an "
+"HTTP server like Apache2 or Nginx. You can configure Hydrilla through the"
+" /etc/hydrilla/config.json file."
+
+#. 'hydrilla' as a title
+#: src/hydrilla/server/templates/base.html:99
+#: src/hydrilla/server/templates/base.html:105
+msgid "hydrilla"
+msgstr "Hydrilla"
+
+#: src/hydrilla/server/templates/index.html:29
+msgid "hydrilla_welcome"
+msgstr "Welcome to Hydrilla!"
+
+#: src/hydrilla/util/_util.py:86
+msgid "bad_comment"
+msgstr "bad comment"
+
diff --git a/src/hydrilla/locales/pl_PL/LC_MESSAGES/messages.po b/src/hydrilla/locales/pl_PL/LC_MESSAGES/messages.po
new file mode 100644
index 0000000..57cca4d
--- /dev/null
+++ b/src/hydrilla/locales/pl_PL/LC_MESSAGES/messages.po
@@ -0,0 +1,258 @@
+# SPDX-License-Identifier: CC0-1.0
+#
+# English (United States) translations for hydrilla.
+# Copyright (C) 2021, 2022 Wojtek Kosior <koszko@koszko.org>
+# Available under the terms of Creative Commons Zero v1.0 Universal.
+msgid ""
+msgstr ""
+"Project-Id-Version: hydrilla 2.0\n"
+"Report-Msgid-Bugs-To: koszko@koszko.org\n"
+"POT-Creation-Date: 2022-06-07 10:23+0200\n"
+"PO-Revision-Date: 2022-02-12 00:00+0000\n"
+"Last-Translator: Wojtek Kosior <koszko@koszko.org>\n"
+"Language: en_US\n"
+"Language-Team: en_US <koszko@koszko.org>\n"
+"Plural-Forms: nplurals=2; plural=(n != 1)\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.8.0\n"
+
+#: src/hydrilla/builder/build.py:83 src/hydrilla/builder/local_apt.py:116
+#: src/hydrilla/builder/local_apt.py:408
+msgid "couldnt_execute_{}_is_it_installed"
+msgstr "Could not execute '{}'. Is the tool installed and reachable via PATH?"
+
+#: src/hydrilla/builder/build.py:87 src/hydrilla/builder/local_apt.py:120
+#: src/hydrilla/builder/local_apt.py:412
+msgid "command_{}_failed"
+msgstr "The following command finished execution with a non-zero exit status: {}"
+
+#: src/hydrilla/builder/build.py:160
+msgid "unknown_schema_package_source_{}"
+msgstr ""
+"The provided JSON at '{}' does not use any of the known package source "
+"JSON schemas."
+
+#: src/hydrilla/builder/build.py:196
+msgid "path_contains_double_dot_{}"
+msgstr ""
+"Attempt to load '{}' which includes a forbidden parent reference ('..') "
+"in the path."
+
+#: src/hydrilla/builder/build.py:203
+msgid "loading_{}_outside_package_dir"
+msgstr "Attempt to load '{}' which lies outside package source directory."
+
+#: src/hydrilla/builder/build.py:207
+msgid "loading_reserved_index_json"
+msgstr "Attempt to load 'index.json' which is a reserved filename."
+
+#: src/hydrilla/builder/build.py:214
+msgid "referenced_file_{}_missing"
+msgstr "Referenced file '{}' is missing."
+
+#: src/hydrilla/builder/build.py:396
+msgid "report_spdx_not_in_copyright_list"
+msgstr ""
+"Told to generate 'report.spdx' but 'report.spdx' is not listed among "
+"copyright files. Refusing to proceed."
+
+#: src/hydrilla/builder/build.py:473
+msgid "build_package_from_srcdir_to_dstdir"
+msgstr ""
+"Build Hydrilla package from `scrdir` and write the resulting files under "
+"`dstdir`."
+
+#: src/hydrilla/builder/build.py:475
+msgid "source_directory_to_build_from"
+msgstr "Source directory to build from."
+
+#: src/hydrilla/builder/build.py:477
+msgid "path_instead_of_index_json"
+msgstr ""
+"Path to file to be processed instead of index.json (if not absolute, "
+"resolved relative to srcdir)."
+
+#: src/hydrilla/builder/build.py:479
+msgid "path_instead_for_piggyback_files"
+msgstr ""
+"Path to a non-standard directory with foreign packages' archive files to "
+"use."
+
+#: src/hydrilla/builder/build.py:481
+msgid "built_package_files_destination"
+msgstr "Destination directory to write built package files to."
+
+#: src/hydrilla/builder/build.py:483 src/hydrilla/server/serve.py:582
+#: src/hydrilla/server/serve.py:604 src/hydrilla/server/serve.py:647
+#, python-format
+msgid "%(prog)s_%(version)s_license"
+msgstr ""
+"%(prog)s %(version)s\n"
+"Copyright (C) 2021,2022 Wojtek Kosior and contributors.\n"
+"License GPLv3+: GNU AGPL version 3 or later "
+"<https://gnu.org/licenses/gpl.html>\n"
+"This is free software: you are free to change and redistribute it.\n"
+"There is NO WARRANTY, to the extent permitted by law."
+
+#: src/hydrilla/builder/build.py:484 src/hydrilla/server/serve.py:605
+#: src/hydrilla/server/serve.py:648
+msgid "version_printing"
+msgstr "Print version information and exit."
+
+#: src/hydrilla/builder/common_errors.py:58
+msgid "STDOUT_OUTPUT_heading"
+msgstr "## Command's standard output ##"
+
+#: src/hydrilla/builder/common_errors.py:61
+msgid "STDERR_OUTPUT_heading"
+msgstr "## Command's standard error output ##"
+
+#: src/hydrilla/builder/local_apt.py:145
+msgid "distro_{}_unknown"
+msgstr "Attempt to use an unknown software distribution '{}'."
+
+#: src/hydrilla/builder/local_apt.py:189
+msgid "couldnt_import_{}_is_it_installed"
+msgstr ""
+"Could not import '{}'. Is the module installed and visible to this Python"
+" instance?"
+
+#: src/hydrilla/builder/local_apt.py:197
+msgid "gpg_couldnt_recv_key_{}"
+msgstr "Could not import PGP key '{}'."
+
+#: src/hydrilla/builder/local_apt.py:311
+msgid "apt_install_output_not_understood"
+msgstr "The output of an 'apt-get install' command was not understood."
+
+#: src/hydrilla/builder/local_apt.py:337
+msgid "apt_download_gave_bad_filename_{}"
+msgstr "The 'apt-get download' command produced a file with unexpected name '{}'."
+
+#: src/hydrilla/builder/piggybacking.py:100
+msgid "loading_{}_outside_piggybacked_dir"
+msgstr ""
+"Attempt to load '{}' which lies outside piggybacked packages files root "
+"directory."
+
+#: src/hydrilla/server/serve.py:126
+#, python-brace-format
+msgid "uuid_mismatch_{identifier}"
+msgstr "Two different uuids were specified for item '{identifier}'."
+
+#: src/hydrilla/server/serve.py:133
+#, python-brace-format
+msgid "version_clash_{identifier}_{version}"
+msgstr "Version '{version}' specified more than once for item '{identifier}'."
+
+#: src/hydrilla/server/serve.py:249 src/hydrilla/server/serve.py:261
+msgid "invalid_URL_{}"
+msgstr "Invalid URL/pattern: '{}'."
+
+#: src/hydrilla/server/serve.py:253
+msgid "disallowed_protocol_{}"
+msgstr "Disallowed protocol: '{}'."
+
+#: src/hydrilla/server/serve.py:306
+msgid "malcontent_dir_path_not_dir_{}"
+msgstr "Provided 'malcontent_dir' path does not name a directory: {}"
+
+#: src/hydrilla/server/serve.py:325
+msgid "couldnt_load_item_from_{}"
+msgstr "Couldn't load item from {}."
+
+#: src/hydrilla/server/serve.py:350
+msgid "item_{item}_in_file_{file}"
+msgstr "Item {item} incorrectly present under {file}."
+
+#: src/hydrilla/server/serve.py:356
+msgid "item_version_{ver}_in_file_{file}"
+msgstr "Item version {ver} incorrectly present under {file}."
+
+#: src/hydrilla/server/serve.py:379
+msgid "no_dep_{resource}_{ver}_{dep}"
+msgstr "Unknown dependency '{dep}' of resource '{resource}', version '{ver}'."
+
+#: src/hydrilla/server/serve.py:390
+msgid "no_payload_{mapping}_{ver}_{payload}"
+msgstr "Unknown payload '{payload}' of mapping '{mapping}', version '{ver}'."
+
+#: src/hydrilla/server/serve.py:402
+msgid "no_mapping_{required_by}_{ver}_{required}"
+msgstr "Unknown mapping '{required}' required by '{required_by}', version '{ver}'."
+
+#: src/hydrilla/server/serve.py:429
+msgid "couldnt_register_{mapping}_{ver}_{pattern}"
+msgstr ""
+"Couldn't register mapping '{mapping}', version '{ver}' (pattern "
+"'{pattern}')."
+
+#: src/hydrilla/server/serve.py:590
+msgid "serve_hydrilla_packages_explain_wsgi_considerations"
+msgstr ""
+"Serve Hydrilla packages.\n"
+"\n"
+"This command is meant to be a quick way to run a local or development "
+"Hydrilla instance. For better performance, consider deployment using "
+"WSGI."
+
+#: src/hydrilla/server/serve.py:593
+msgid "directory_to_serve_from_overrides_config"
+msgstr ""
+"Directory to serve files from. Overrides value from the config file (if "
+"any)."
+
+#: src/hydrilla/server/serve.py:595
+msgid "project_url_to_display_overrides_config"
+msgstr ""
+"Project url to display on generated HTML pages. Overrides value from the "
+"config file (if any)."
+
+#: src/hydrilla/server/serve.py:597
+msgid "tcp_port_to_listen_on_overrides_config"
+msgstr ""
+"TCP port number to listen on (0-65535). Overrides value from the config "
+"file (if any)."
+
+#: src/hydrilla/server/serve.py:600
+msgid "path_to_config_file_explain_default"
+msgstr ""
+"Path to Hydrilla server configuration file (optional, by default Hydrilla"
+" loads its own config file, which in turn tries to load "
+"/etc/hydrilla/config.json)."
+
+#: src/hydrilla/server/serve.py:602
+msgid "language_to_use_overrides_config"
+msgstr ""
+"Language to use (also affects served HTML files). Overrides value from "
+"the config file (if any)."
+
+#: src/hydrilla/server/serve.py:637
+msgid "config_option_{}_not_supplied"
+msgstr "Missing configuration option '{}'."
+
+#: src/hydrilla/server/serve.py:641
+msgid "serve_hydrilla_packages_wsgi_help"
+msgstr ""
+"Serve Hydrilla packages.\n"
+"\n"
+"This program is a WSGI script that runs Hydrilla repository behind an "
+"HTTP server like Apache2 or Nginx. You can configure Hydrilla through the"
+" /etc/hydrilla/config.json file."
+
+#. 'hydrilla' as a title
+#: src/hydrilla/server/templates/base.html:99
+#: src/hydrilla/server/templates/base.html:105
+msgid "hydrilla"
+msgstr "Hydrilla po polsku"
+
+#: src/hydrilla/server/templates/index.html:29
+msgid "hydrilla_welcome"
+msgstr "Welcome to Hydrilla!"
+
+#: src/hydrilla/util/_util.py:86
+msgid "bad_comment"
+msgstr "bad comment"
+
diff --git a/src/hydrilla/mitmproxy_launcher/__main__.py b/src/hydrilla/mitmproxy_launcher/__main__.py
new file mode 100644
index 0000000..f2ec78a
--- /dev/null
+++ b/src/hydrilla/mitmproxy_launcher/__main__.py
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: CC0-1.0
+
+# Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org>
+#
+# Available under the terms of Creative Commons Zero v1.0 Universal.
+
+import sys
+
+from . import launch
+
+launch.launch()
diff --git a/src/hydrilla/mitmproxy_launcher/launch.py b/src/hydrilla/mitmproxy_launcher/launch.py
new file mode 100644
index 0000000..c826598
--- /dev/null
+++ b/src/hydrilla/mitmproxy_launcher/launch.py
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# Code for starting mitmproxy
+#
+# This file is part of Hydrilla
+#
+# Copyright (C) 2021, 2022 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+
+# We want to run mitmproxy with our script as an addon. A simple way would be to
+# find something like a 'main' function in mitmproxy, import it and call here.
+# Unfortunately, there is currently no guarantee that such function can be
+# considered mitmproxy's stable programming API. For this reason we instead
+# spawn a new process.
+
+import sys
+import os
+import subprocess as sp
+
+from pathlib import Path
+
+import click
+
+from .. import _version
+from ..translations import smart_gettext as _
+
+@click.command(help=_('cli_help.haketilo'))
+@click.option('-p', '--port', default=8080, type=click.IntRange(0, 65535),
+ help=_('cli_opt.haketilo.port'))
+@click.option('-d', '--directory', default='~/.haketilo/',
+ type=click.Path(file_okay=False),
+ help=_('cli_opt.haketilo.dir'))
+@click.version_option(version=_version.version, prog_name='Haketilo proxy',
+ message=_('%(prog)s_%(version)s_license'),
+ help=_('cli_opt.haketilo.version'))
+def launch(port: int, directory: str):
+ """
+ ....
+ """
+ directory_path = Path(os.path.expanduser(directory)).resolve()
+
+ directory_path.mkdir(parents=True, exist_ok=True)
+
+ script_path = directory_path / 'addon.py'
+
+ script_path.write_text('''
+from hydrilla.mitmproxy_addon.addon import Haketilo
+
+addons = [Haketilo()]
+''')
+
+ code = sp.call(['mitmdump',
+ '-p', str(port),
+ '--set', f'confdir={directory_path / "mitmproxy"}'
+ '--set', 'upstream_cert=false',
+ '--set', f'haketilo_dir={directory_path}'
+ '--scripts', str(script_path)])
+
+ sys.exit(code)
diff --git a/src/hydrilla/pattern_tree.py b/src/hydrilla/pattern_tree.py
new file mode 100644
index 0000000..1128a06
--- /dev/null
+++ b/src/hydrilla/pattern_tree.py
@@ -0,0 +1,339 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# Data structure for querying URL patterns.
+#
+# This file is part of Hydrilla&Haketilo.
+#
+# Copyright (C) 2021, 2022 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+"""
+This module defines data structures for querying data using URL patterns.
+"""
+
+# Enable using with Python 3.7.
+from __future__ import annotations
+
+import sys
+import typing as t
+import dataclasses as dc
+
+from immutables import Map
+
+from .url_patterns import ParsedUrl, parse_url
+from .translations import smart_gettext as _
+
+
+WrapperStoredType = t.TypeVar('WrapperStoredType', bound=t.Hashable)
+
+@dc.dataclass(frozen=True, unsafe_hash=True)
+class StoredTreeItem(t.Generic[WrapperStoredType]):
+ """
+ In the Pattern Tree, each item is stored together with the pattern used to
+ register it.
+ """
+ pattern: ParsedUrl
+ item: WrapperStoredType
+
+# if sys.version_info >= (3, 8):
+# CopyableType = t.TypeVar('CopyableType', bound='Copyable')
+
+# class Copyable(t.Protocol):
+# """Certain classes in Pattern Tree depend on this interface."""
+# def copy(self: CopyableType) -> CopyableType:
+# """Make a distinct instance with the same properties as this one."""
+# ...
+# else:
+# Copyable = t.Any
+
+NodeStoredType = t.TypeVar('NodeStoredType')
+
+@dc.dataclass(frozen=True)
+class PatternTreeNode(t.Generic[NodeStoredType]):
+ """...."""
+ children: 'NodeChildrenType' = Map()
+ literal_match: t.Optional[NodeStoredType] = None
+
+ def is_empty(self) -> bool:
+ """...."""
+ return len(self.children) == 0 and self.literal_match is None
+
+ def update_literal_match(
+ self,
+ new_match_item: t.Optional[NodeStoredType]
+ ) -> 'NodeSelfType':
+ """...."""
+ return dc.replace(self, literal_match=new_match_item)
+
+ def get_child(self, child_key: str) -> t.Optional['NodeSelfType']:
+ """...."""
+ return self.children.get(child_key)
+
+ def remove_child(self, child_key: str) -> 'NodeSelfType':
+ """...."""
+ try:
+ children = self.children.delete(child_key)
+ except:
+ children = self.children
+
+ return dc.replace(self, children=children)
+
+ def set_child(self, child_key: str, child: 'NodeSelfType') \
+ -> 'NodeSelfType':
+ """...."""
+ return dc.replace(self, children=self.children.set(child_key, child))
+
+# Below we define 2 types used by recursively-typed PatternTreeNode.
+NodeSelfType = PatternTreeNode[NodeStoredType]
+NodeChildrenType = Map[str, NodeSelfType]
+
+
+BranchStoredType = t.TypeVar('BranchStoredType')
+
+ItemUpdater = t.Callable[
+ [t.Optional[BranchStoredType]],
+ t.Optional[BranchStoredType]
+]
+
+@dc.dataclass(frozen=True)
+class PatternTreeBranch(t.Generic[BranchStoredType]):
+ """...."""
+ root_node: PatternTreeNode[BranchStoredType] = PatternTreeNode()
+
+ def is_empty(self) -> bool:
+ """...."""
+ return self.root_node.is_empty()
+
+ # def copy(self) -> 'BranchSelfType':
+ # """...."""
+ # return dc.replace(self)
+
+ def update(self, segments: t.Iterable[str], item_updater: ItemUpdater) \
+ -> 'BranchSelfType':
+ """
+ .......
+ """
+ node = self.root_node
+ nodes_segments = []
+
+ for segment in segments:
+ next_node = node.get_child(segment)
+
+ nodes_segments.append((node, segment))
+
+ node = PatternTreeNode() if next_node is None else next_node
+
+ node = node.update_literal_match(item_updater(node.literal_match))
+
+ while nodes_segments:
+ prev_node, segment = nodes_segments.pop()
+
+ if node.is_empty():
+ node = prev_node.remove_child(segment)
+ else:
+ node = prev_node.set_child(segment, node)
+
+ return dc.replace(self, root_node=node)
+
+ def search(self, segments: t.Sequence[str]) -> t.Iterable[BranchStoredType]:
+ """
+ Yields all matches of this segments sequence against the tree. Results
+ are produced in order from greatest to lowest pattern specificity.
+ """
+ nodes = [self.root_node]
+
+ for segment in segments:
+ next_node = nodes[-1].get_child(segment)
+ if next_node is None:
+ break
+
+ nodes.append(next_node)
+
+ nsegments = len(segments)
+ cond_literal = lambda: len(nodes) == nsegments
+ cond_wildcard = [
+ lambda: len(nodes) + 1 == nsegments and segments[-1] != '*',
+ lambda: len(nodes) + 1 < nsegments,
+ lambda: len(nodes) + 1 != nsegments or segments[-1] != '***'
+ ]
+
+ while nodes:
+ node = nodes.pop()
+
+ wildcard_matches = [node.get_child(wc) for wc in ('*', '**', '***')]
+
+ for match_node, condition in [
+ (node, cond_literal),
+ *zip(wildcard_matches, cond_wildcard)
+ ]:
+ if match_node is not None:
+ if match_node.literal_match is not None:
+ if condition():
+ yield match_node.literal_match
+
+# Below we define 1 type used by recursively-typed PatternTreeBranch.
+BranchSelfType = PatternTreeBranch[BranchStoredType]
+
+
+FilterStoredType = t.TypeVar('FilterStoredType', bound=t.Hashable)
+FilterWrappedType = StoredTreeItem[FilterStoredType]
+
+def filter_by_trailing_slash(
+ items: t.Iterable[FilterWrappedType],
+ with_slash: bool
+) -> frozenset[FilterWrappedType]:
+ """...."""
+ return frozenset(wrapped for wrapped in items
+ if with_slash == wrapped.pattern.has_trailing_slash)
+
+TreeStoredType = t.TypeVar('TreeStoredType', bound=t.Hashable)
+
+StoredSet = frozenset[StoredTreeItem[TreeStoredType]]
+PathBranch = PatternTreeBranch[StoredSet]
+DomainBranch = PatternTreeBranch[PathBranch]
+TreeRoot = Map[tuple[str, int], DomainBranch]
+
+@dc.dataclass(frozen=True)
+class PatternTree(t.Generic[TreeStoredType]):
+ """
+ "Pattern Tree" is how we refer to the data structure used for querying
+ Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal
+ is to make it possible to quickly retrieve all known patterns that match
+ a given URL.
+ """
+ _by_scheme_and_port: TreeRoot = Map()
+
+ def _register(
+ self,
+ parsed_pattern: ParsedUrl,
+ item: TreeStoredType,
+ register: bool = True
+ ) -> 'TreeSelfType':
+ """
+ Make an item wrapped in StoredTreeItem object queryable through the
+ Pattern Tree by the given parsed URL pattern.
+ """
+ wrapped_item = StoredTreeItem(parsed_pattern, item)
+
+ def item_updater(item_set: t.Optional[StoredSet]) \
+ -> t.Optional[StoredSet]:
+ """...."""
+ if item_set is None:
+ item_set = frozenset()
+
+ if register:
+ item_set = item_set.union((wrapped_item,))
+ else:
+ item_set = item_set.difference((wrapped_item,))
+
+ return None if len(item_set) == 0 else item_set
+
+ def path_branch_updater(path_branch: t.Optional[PathBranch]) \
+ -> t.Optional[PathBranch]:
+ """...."""
+ if path_branch is None:
+ path_branch = PatternTreeBranch()
+
+ path_branch = path_branch.update(
+ parsed_pattern.path_segments,
+ item_updater
+ )
+
+ return None if path_branch.is_empty() else path_branch
+
+ key = (parsed_pattern.scheme, parsed_pattern.port)
+ domain_tree = self._by_scheme_and_port.get(key, PatternTreeBranch())
+
+ new_domain_tree = domain_tree.update(
+ parsed_pattern.domain_labels,
+ path_branch_updater
+ )
+
+ if new_domain_tree.is_empty():
+ try:
+ new_root = self._by_scheme_and_port.delete(key)
+ except KeyError:
+ new_root = self._by_scheme_and_port
+ else:
+ new_root = self._by_scheme_and_port.set(key, new_domain_tree)
+
+ return dc.replace(self, _by_scheme_and_port=new_root)
+
+ # def _register(
+ # self,
+ # url_pattern: str,
+ # item: TreeStoredType,
+ # register: bool = True
+ # ) -> 'TreeSelfType':
+ # """
+ # ....
+ # """
+ # tree = self
+
+ # for parsed_pat in parse_pattern(url_pattern):
+ # wrapped_item = StoredTreeItem(parsed_pat, item)
+ # tree = tree._register_with_parsed_pattern(
+ # parsed_pat,
+ # wrapped_item,
+ # register
+ # )
+
+ # return tree
+
+ def register(self, parsed_pattern: ParsedUrl, item: TreeStoredType) \
+ -> 'TreeSelfType':
+ """
+ Make item queryable through the Pattern Tree by the given URL pattern.
+ """
+ return self._register(parsed_pattern, item)
+
+ def deregister(self, parsed_pattern: ParsedUrl, item: TreeStoredType) \
+ -> 'TreeSelfType':
+ """
+ Make item no longer queryable through the Pattern Tree by the given URL
+ pattern.
+ """
+ return self._register(parsed_pattern, item, register=False)
+
+ def search(self, url: t.Union[ParsedUrl, str]) -> t.Iterable[StoredSet]:
+ """
+ ....
+ """
+ parsed_url = parse_url(url) if isinstance(url, str) else url
+
+ key = (parsed_url.scheme, parsed_url.port)
+ domain_tree = self._by_scheme_and_port.get(key)
+ if domain_tree is None:
+ return
+
+ if parsed_url.has_trailing_slash:
+ slash_options = [True, False]
+ else:
+ slash_options = [False]
+
+ for path_tree in domain_tree.search(parsed_url.domain_labels):
+ for item_set in path_tree.search(parsed_url.path_segments):
+ for with_slash in slash_options:
+ items = filter_by_trailing_slash(item_set, with_slash)
+ if len(items) > 0:
+ yield items
+
+# Below we define 1 type used by recursively-typed PatternTree.
+TreeSelfType = PatternTree[TreeStoredType]
diff --git a/src/hydrilla/proxy/__init__.py b/src/hydrilla/proxy/__init__.py
new file mode 100644
index 0000000..d382ead
--- /dev/null
+++ b/src/hydrilla/proxy/__init__.py
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: CC0-1.0
+
+# Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org>
+#
+# Available under the terms of Creative Commons Zero v1.0 Universal.
diff --git a/src/hydrilla/proxy/addon.py b/src/hydrilla/proxy/addon.py
new file mode 100644
index 0000000..7d6487b
--- /dev/null
+++ b/src/hydrilla/proxy/addon.py
@@ -0,0 +1,177 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# Haketilo addon for Mitmproxy.
+#
+# This file is part of Hydrilla&Haketilo.
+#
+# Copyright (C) 2022 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+"""
+This module contains the definition of a mitmproxy addon that gets instantiated
+from addon script.
+"""
+
+# Enable using with Python 3.7.
+from __future__ import annotations
+
+import os.path
+import typing as t
+import dataclasses as dc
+
+from threading import Lock
+from pathlib import Path
+from contextlib import contextmanager
+
+from mitmproxy import http, addonmanager, ctx
+from mitmproxy.script import concurrent
+
+from .flow_handlers import make_flow_handler, FlowHandler
+from .state import HaketiloState
+from ..translations import smart_gettext as _
+
+FlowHandlers = dict[int, FlowHandler]
+
+StateUpdater = t.Callable[[HaketiloState], None]
+
+HTTPHandlerFun = t.Callable[
+ ['HaketiloAddon', http.HTTPFlow],
+ t.Optional[StateUpdater]
+]
+
+def http_event_handler(handler_fun: HTTPHandlerFun):
+ """....decorator"""
+ def wrapped_handler(self: 'HaketiloAddon', flow: http.HTTPFlow):
+ """...."""
+ with self.configured_lock:
+ assert self.configured
+
+ assert self.state is not None
+
+ state_updater = handler_fun(self, flow)
+
+ if state_updater is not None:
+ state_updater(self.state)
+
+ return wrapped_handler
+
+@dc.dataclass
+class HaketiloAddon:
+ """
+ .......
+ """
+ configured: bool = False
+ configured_lock: Lock = dc.field(default_factory=Lock)
+
+ state: t.Optional[HaketiloState] = None
+
+ flow_handlers: FlowHandlers = dc.field(default_factory=dict)
+ handlers_lock: Lock = dc.field(default_factory=Lock)
+
+ def load(self, loader: addonmanager.Loader) -> None:
+ """...."""
+ loader.add_option(
+ name = 'haketilo_dir',
+ typespec = str,
+ default = '~/.haketilo/',
+ help = "Point to a Haketilo data directory to use",
+ )
+
+ def configure(self, updated: set[str]) -> None:
+ """...."""
+ if 'haketilo_dir' not in updated:
+ return
+
+ with self.configured_lock:
+ if self.configured:
+ ctx.log.warn(_('haketilo_dir_already_configured'))
+ return
+
+ haketilo_dir = Path(ctx.options.haketilo_dir)
+ self.state = HaketiloState(haketilo_dir / 'store')
+
+ def assign_handler(self, flow: http.HTTPFlow, flow_handler: FlowHandler) \
+ -> None:
+ """...."""
+ with self.handlers_lock:
+ self.flow_handlers[id(flow)] = flow_handler
+
+ def lookup_handler(self, flow: http.HTTPFlow) -> FlowHandler:
+ """...."""
+ with self.handlers_lock:
+ return self.flow_handlers[id(flow)]
+
+ def forget_handler(self, flow: http.HTTPFlow) -> None:
+ """...."""
+ with self.handlers_lock:
+ self.flow_handlers.pop(id(flow), None)
+
+ @concurrent
+ @http_event_handler
+ def requestheaders(self, flow: http.HTTPFlow) -> t.Optional[StateUpdater]:
+ """
+ .....
+ """
+ assert self.state is not None
+
+ policy = self.state.select_policy(flow.request.url)
+
+ flow_handler = make_flow_handler(flow, policy)
+
+ self.assign_handler(flow, flow_handler)
+
+ return flow_handler.on_requestheaders()
+
+ @concurrent
+ @http_event_handler
+ def request(self, flow: http.HTTPFlow) -> t.Optional[StateUpdater]:
+ """
+ ....
+ """
+ return self.lookup_handler(flow).on_request()
+
+ @concurrent
+ @http_event_handler
+ def responseheaders(self, flow: http.HTTPFlow) -> t.Optional[StateUpdater]:
+ """
+ ......
+ """
+ return self.lookup_handler(flow).on_responseheaders()
+
+ @concurrent
+ @http_event_handler
+ def response(self, flow: http.HTTPFlow) -> t.Optional[StateUpdater]:
+ """
+ ......
+ """
+ updater = self.lookup_handler(flow).on_response()
+
+ self.forget_handler(flow)
+
+ return updater
+
+ @http_event_handler
+ def error(self, flow: http.HTTPFlow) -> None:
+ """...."""
+ self.forget_handler(flow)
+
+addons = [
+ HaketiloAddon()
+]
diff --git a/src/hydrilla/proxy/flow_handlers.py b/src/hydrilla/proxy/flow_handlers.py
new file mode 100644
index 0000000..605c7f9
--- /dev/null
+++ b/src/hydrilla/proxy/flow_handlers.py
@@ -0,0 +1,383 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# Logic for modifying mitmproxy's HTTP flows.
+#
+# This file is part of Hydrilla&Haketilo.
+#
+# Copyright (C) 2022 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+"""
+This module's file gets passed to Mitmproxy as addon script and makes it serve
+as Haketilo proxy.
+"""
+
+# Enable using with Python 3.7.
+from __future__ import annotations
+
+import re
+import typing as t
+import dataclasses as dc
+
+import bs4 # type: ignore
+
+from mitmproxy import http
+from mitmproxy.net.http import Headers
+from mitmproxy.script import concurrent
+
+from .state import HaketiloState
+from . import policies
+
+StateUpdater = t.Callable[[HaketiloState], None]
+
+@dc.dataclass(frozen=True)
+class FlowHandler:
+ """...."""
+ flow: http.HTTPFlow
+ policy: policies.Policy
+
+ stream_request: bool = False
+ stream_response: bool = False
+
+ def on_requestheaders(self) -> t.Optional[StateUpdater]:
+ """...."""
+ if self.stream_request:
+ self.flow.request.stream = True
+
+ return None
+
+ def on_request(self) -> t.Optional[StateUpdater]:
+ """...."""
+ return None
+
+ def on_responseheaders(self) -> t.Optional[StateUpdater]:
+ """...."""
+ assert self.flow.response is not None
+
+ if self.stream_response:
+ self.flow.response.stream = True
+
+ return None
+
+ def on_response(self) -> t.Optional[StateUpdater]:
+ """...."""
+ return None
+
+@dc.dataclass(frozen=True)
+class FlowHandlerAllowScripts(FlowHandler):
+ """...."""
+ policy: policies.AllowPolicy
+
+ stream_request: bool = True
+ stream_response: bool = True
+
+csp_header_names_and_dispositions = (
+ ('content-security-policy', 'enforce'),
+ ('content-security-policy-report-only', 'report'),
+ ('x-content-security-policy', 'enforce'),
+ ('x-content-security-policy', 'report'),
+ ('x-webkit-csp', 'enforce'),
+ ('x-webkit-csp', 'report')
+)
+
+csp_enforce_header_names_set = {
+ name for name, disposition in csp_header_names_and_dispositions
+ if disposition == 'enforce'
+}
+
+@dc.dataclass
+class ContentSecurityPolicy:
+ directives: dict[str, list[str]]
+ header_name: str
+ disposition: str
+
+ @staticmethod
+ def deserialize(
+ serialized: str,
+ header_name: str,
+ disposition: str = 'enforce'
+ ) -> 'ContentSecurityPolicy':
+ """...."""
+ # For more info, see:
+ # https://www.w3.org/TR/CSP3/#parse-serialized-policy
+ directives = {}
+
+ for serialized_directive in serialized.split(';'):
+ if not serialized_directive.isascii():
+ continue
+
+ serialized_directive = serialized_directive.strip()
+ if len(serialized_directive) == 0:
+ continue
+
+ tokens = serialized_directive.split()
+ directive_name = tokens.pop(0).lower()
+ directive_value = tokens
+
+ # Specs mention giving warnings for duplicate directive names but
+ # from our proxy's perspective this is not important right now.
+ if directive_name in directives:
+ continue
+
+ directives[directive_name] = directive_value
+
+ return ContentSecurityPolicy(directives, header_name, disposition)
+
+ def serialize(self) -> str:
+ """...."""
+ serialized_directives = []
+ for name, value_list in self.directives.items():
+ serialized_directives.append(f'{name} {" ".join(value_list)}')
+
+ return ';'.join(serialized_directives)
+
+def extract_csp(headers: Headers) -> tuple[ContentSecurityPolicy, ...]:
+ """...."""
+ csp_policies = []
+
+ for header_name, disposition in csp_header_names_and_dispositions:
+ for serialized_list in headers.get(header_name, ''):
+ for serialized in serialized_list.split(','):
+ policy = ContentSecurityPolicy.deserialize(
+ serialized,
+ header_name,
+ disposition
+ )
+
+ if policy.directives != {}:
+ csp_policies.append(policy)
+
+ return tuple(csp_policies)
+
+csp_script_directive_names = (
+ 'script-src',
+ 'script-src-elem',
+ 'script-src-attr'
+)
+
+@dc.dataclass(frozen=True)
+class FlowHandlerBlockScripts(FlowHandler):
+ policy: policies.BlockPolicy
+
+ stream_request: bool = True
+ stream_response: bool = True
+
+ def on_responseheaders(self) -> t.Optional[StateUpdater]:
+ """...."""
+ super().on_responseheaders()
+
+ assert self.flow.response is not None
+
+ csp_policies = extract_csp(self.flow.response.headers)
+
+ for header_name, _ in csp_header_names_and_dispositions:
+ del self.flow.response.headers[header_name]
+
+ for policy in csp_policies:
+ if policy.disposition != 'enforce':
+ continue
+
+ policy.directives.pop('report-to')
+ policy.directives.pop('report-uri')
+
+ self.flow.response.headers.add(
+ policy.header_name,
+ policy.serialize()
+ )
+
+ extra_csp = ';'.join((
+ "script-src 'none'",
+ "script-src-elem 'none'",
+ "script-src-attr 'none'"
+ ))
+
+ self.flow.response.headers.add('Content-Security-Policy', extra_csp)
+
+ return None
+
+# For details of 'Content-Type' header's structure, see:
+# https://datatracker.ietf.org/doc/html/rfc7231#section-3.1.1.1
+content_type_reg = re.compile(r'''
+^
+(?P<mime>[\w-]+/[\w-]+)
+\s*
+(?:
+ ;
+ (?:[^;]*;)* # match possible parameter other than "charset"
+)
+\s*
+charset= # no whitespace allowed in parameter as per RFC
+(?P<encoding>
+ [\w-]+
+ |
+ "[\w-]+" # quotes are optional per RFC
+)
+(?:;[^;]+)* # match possible parameter other than "charset"
+$ # forbid possible dangling characters after closing '"'
+''', re.VERBOSE | re.IGNORECASE)
+
+def deduce_content_type(headers: Headers) \
+ -> tuple[t.Optional[str], t.Optional[str]]:
+ """...."""
+ content_type = headers.get('content-type')
+ if content_type is None:
+ return (None, None)
+
+ match = content_type_reg.match(content_type)
+ if match is None:
+ return (None, None)
+
+ mime, encoding = match.group('mime'), match.group('encoding')
+
+ if encoding is not None:
+ encoding = encoding.lower()
+
+ return mime, encoding
+
+UTF8_BOM = b'\xEF\xBB\xBF'
+BOMs = (
+ (UTF8_BOM, 'utf-8'),
+ (b'\xFE\xFF', 'utf-16be'),
+ (b'\xFF\xFE', 'utf-16le')
+)
+
+def block_attr(element: bs4.PageElement, atrr_name: str) -> None:
+ """...."""
+ # TODO: implement
+ pass
+
+@dc.dataclass(frozen=True)
+class FlowHandlerInjectPayload(FlowHandler):
+ """...."""
+ policy: policies.PayloadPolicy
+
+ stream_request: bool = True
+
+ def __post_init__(self) -> None:
+ """...."""
+ script_src = f"script-src {self.policy.assets_base_url()}"
+ if self.policy.is_eval_allowed():
+ script_src = f"{script_src} 'unsafe-eval'"
+
+ self.new_csp = '; '.join((
+ script_src,
+ "script-src-elem 'none'",
+ "script-src-attr 'none'"
+ ))
+
+ def on_responseheaders(self) -> t.Optional[StateUpdater]:
+ """...."""
+ super().on_responseheaders()
+
+ assert self.flow.response is not None
+
+ for header_name, _ in csp_header_names_and_dispositions:
+ del self.flow.response.headers[header_name]
+
+ self.flow.response.headers.add('Content-Security-Policy', self.new_csp)
+
+ return None
+
+ def on_response(self) -> t.Optional[StateUpdater]:
+ """...."""
+ super().on_response()
+
+ assert self.flow.response is not None
+
+ if self.flow.response.content is None:
+ return None
+
+ mime, encoding = deduce_content_type(self.flow.response.headers)
+ if mime is None or 'html' not in mime:
+ return None
+
+ # A UTF BOM overrides encoding specified by the header.
+ for bom, encoding_name in BOMs:
+ if self.flow.response.content.startswith(bom):
+ encoding = encoding_name
+
+ soup = bs4.BeautifulSoup(
+ markup = self.flow.response.content,
+ from_encoding = encoding,
+ features = 'html5lib'
+ )
+
+ # Inject scripts.
+ script_parent = soup.find('body') or soup.find('html')
+ if script_parent is None:
+ return None
+
+ for url in self.policy.script_urls():
+ script_parent.append(bs4.Tag(name='script', attrs={'src': url}))
+
+ # Remove Content Security Policy that could possibly block injected
+ # scripts.
+ for meta in soup.select('head meta[http-equiv]'):
+ header_name = meta.attrs.get('http-equiv', '').lower().strip()
+ if header_name in csp_enforce_header_names_set:
+ block_attr(meta, 'http-equiv')
+ block_attr(meta, 'content')
+
+ # Appending a three-byte Byte Order Mark (BOM) will force the browser to
+ # decode this as UTF-8 regardless of the 'Content-Type' header. See:
+ # https://www.w3.org/International/tests/repository/html5/the-input-byte-stream/results-basics#precedence
+ self.flow.response.content = UTF8_BOM + soup.encode()
+
+ return None
+
+@dc.dataclass(frozen=True)
+class FlowHandlerMetaResource(FlowHandler):
+ """...."""
+ policy: policies.MetaResourcePolicy
+
+ def on_request(self) -> t.Optional[StateUpdater]:
+ """...."""
+ super().on_request()
+ # TODO: implement
+ #self.flow.response = ....
+
+ return None
+
+def make_flow_handler(flow: http.HTTPFlow, policy: policies.Policy) \
+ -> FlowHandler:
+ """...."""
+ if isinstance(policy, policies.BlockPolicy):
+ return FlowHandlerBlockScripts(flow, policy)
+
+ if isinstance(policy, policies.AllowPolicy):
+ return FlowHandlerAllowScripts(flow, policy)
+
+ if isinstance(policy, policies.PayloadPolicy):
+ return FlowHandlerInjectPayload(flow, policy)
+
+ assert isinstance(policy, policies.MetaResourcePolicy)
+ # def response_creator(request: http.HTTPRequest) -> http.HTTPResponse:
+ # """...."""
+ # replacement_details = make_replacement_resource(
+ # policy.replacement,
+ # request.path
+ # )
+
+ # return http.HTTPResponse.make(
+ # replacement_details.status_code,
+ # replacement_details.content,
+ # replacement_details.content_type
+ # )
+ return FlowHandlerMetaResource(flow, policy)
diff --git a/src/hydrilla/proxy/policies.py b/src/hydrilla/proxy/policies.py
new file mode 100644
index 0000000..5e9451b
--- /dev/null
+++ b/src/hydrilla/proxy/policies.py
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# Various policies for altering HTTP requests.
+#
+# This file is part of Hydrilla&Haketilo.
+#
+# Copyright (C) 2022 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+import dataclasses as dc
+import typing as t
+
+from abc import ABC
+
+class Policy(ABC):
+ pass
+
+class PayloadPolicy(Policy):
+ """...."""
+ def assets_base_url(self) -> str:
+ """...."""
+ return 'https://example.com/static/'
+
+ def script_urls(self) -> t.Sequence[str]:
+ """...."""
+ # TODO: implement
+ return ('https://example.com/static/somescript.js',)
+
+ def is_eval_allowed(self) -> bool:
+ """...."""
+ # TODO: implement
+ return True
+
+class MetaResourcePolicy(Policy):
+ pass
+
+class AllowPolicy(Policy):
+ pass
+
+@dc.dataclass
+class RuleAllowPolicy(AllowPolicy):
+ pattern: str
+
+class FallbackAllowPolicy(AllowPolicy):
+ pass
+
+class BlockPolicy(Policy):
+ pass
+
+@dc.dataclass
+class RuleBlockPolicy(BlockPolicy):
+ pattern: str
+
+class FallbackBlockPolicy(BlockPolicy):
+ pass
+
+@dc.dataclass
+class ErrorBlockPolicy(BlockPolicy):
+ error: Exception
diff --git a/src/hydrilla/proxy/state.py b/src/hydrilla/proxy/state.py
new file mode 100644
index 0000000..fc01536
--- /dev/null
+++ b/src/hydrilla/proxy/state.py
@@ -0,0 +1,73 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# Haketilo proxy data and configuration.
+#
+# This file is part of Hydrilla&Haketilo.
+#
+# Copyright (C) 2022 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+"""
+This module contains logic for keeping track of all settings, rules, mappings
+and resources.
+"""
+
+# Enable using with Python 3.7.
+from __future__ import annotations
+
+import typing as t
+import dataclasses as dc
+
+from threading import Lock
+from pathlib import Path
+
+from ..pattern_tree import PatternTree
+from .store import HaketiloStore
+from . import policies
+
+def make_pattern_tree_with_builtin_policies() -> PatternTree[policies.Policy]:
+ """...."""
+ # TODO: implement
+ return PatternTree()
+
+tree_field = dc.field(default_factory=make_pattern_tree_with_builtin_policies)
+
+@dc.dataclass
+class HaketiloState(HaketiloStore):
+ """...."""
+ pattern_tree: PatternTree[policies.Policy] = tree_field
+ default_allow: bool = False
+
+ state_lock: Lock = dc.field(default_factory=Lock)
+
+ def select_policy(self, url: str, allow_disabled=False) -> policies.Policy:
+ """...."""
+ with self.state_lock:
+ pattern_tree = self.pattern_tree
+
+ try:
+ for policy_set in pattern_tree.search(url):
+ # if policy.enabled or allow_disabled:
+ # return policy
+ pass
+
+ return policies.FallbackBlockPolicy()
+ except Exception as e:
+ return policies.ErrorBlockPolicy(e)
diff --git a/src/hydrilla/proxy/store.py b/src/hydrilla/proxy/store.py
new file mode 100644
index 0000000..72852d8
--- /dev/null
+++ b/src/hydrilla/proxy/store.py
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# Haketilo proxy on-disk data storage.
+#
+# This file is part of Hydrilla&Haketilo.
+#
+# Copyright (C) 2022 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+"""This module facilitates storing and modifying Haketilo proxy data on-disk."""
+
+# Enable using with Python 3.7.
+from __future__ import annotations
+
+import dataclasses as dc
+
+from pathlib import Path
+
+@dc.dataclass
+class HaketiloStore:
+ """...."""
+ store_dir: Path
+ # TODO: implement
diff --git a/src/hydrilla/py.typed b/src/hydrilla/py.typed
new file mode 100644
index 0000000..f41d511
--- /dev/null
+++ b/src/hydrilla/py.typed
@@ -0,0 +1,5 @@
+SPDX-License-Identifier: CC0-1.0
+
+Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org>
+
+Available under the terms of Creative Commons Zero v1.0 Universal.
diff --git a/src/hydrilla/schemas/1.x b/src/hydrilla/schemas/1.x
new file mode 160000
+Subproject 09634f3446866f712a022327683b1149d8f46bf
diff --git a/src/hydrilla/schemas/2.x b/src/hydrilla/schemas/2.x
new file mode 160000
+Subproject 7206db45f277c10c34d1b7ed9bd35343ac742d3
diff --git a/src/hydrilla/server/config.json b/src/hydrilla/server/config.json
index bde341c..e307548 100644
--- a/src/hydrilla/server/config.json
+++ b/src/hydrilla/server/config.json
@@ -28,9 +28,6 @@
// What port to listen on (if not being run through WSGI).
"port": 10112,
- // What localization to use for console messages and served HTML files.
- "language": "en_US",
-
// Whether to exit upon emitting a warning.
"werror": false
}
diff --git a/src/hydrilla/server/config.py b/src/hydrilla/server/config.py
index 1edd070..c7c5657 100644
--- a/src/hydrilla/server/config.py
+++ b/src/hydrilla/server/config.py
@@ -31,9 +31,9 @@ import json
from pathlib import Path
-import jsonschema
+import jsonschema # type: ignore
-from .. import util
+from .. import json_instances
config_schema = {
'$schema': 'http://json-schema.org/draft-07/schema#',
@@ -92,7 +92,7 @@ def load(config_paths: list[Path]=[here / 'config.json'],
continue
raise e from None
- new_config = json.loads(util.strip_json_comments(json_text))
+ new_config = json_instances.strip_json_comments(json_text)
jsonschema.validate(new_config, config_schema)
config.update(new_config)
diff --git a/src/hydrilla/server/locales/en_US/LC_MESSAGES/hydrilla-messages.po b/src/hydrilla/server/locales/en_US/LC_MESSAGES/hydrilla-messages.po
deleted file mode 100644
index 1998f89..0000000
--- a/src/hydrilla/server/locales/en_US/LC_MESSAGES/hydrilla-messages.po
+++ /dev/null
@@ -1,151 +0,0 @@
-# SPDX-License-Identifier: CC0-1.0
-#
-# English (United States) translations for hydrilla.
-# Copyright (C) 2021, 2022 Wojtek Kosior <koszko@koszko.org>
-# Available under the terms of Creative Commons Zero v1.0 Universal.
-msgid ""
-msgstr ""
-"Project-Id-Version: hydrilla.builder 0.1\n"
-"Report-Msgid-Bugs-To: koszko@koszko.org\n"
-"POT-Creation-Date: 2022-05-31 18:21+0200\n"
-"PO-Revision-Date: 2022-02-12 00:00+0000\n"
-"Last-Translator: Wojtek Kosior <koszko@koszko.org>\n"
-"Language: en_US\n"
-"Language-Team: en_US <koszko@koszko.org>\n"
-"Plural-Forms: nplurals=2; plural=(n != 1)\n"
-"MIME-Version: 1.0\n"
-"Content-Type: text/plain; charset=utf-8\n"
-"Content-Transfer-Encoding: 8bit\n"
-"Generated-By: Babel 2.8.0\n"
-
-#: src/hydrilla/server/serve.py:127
-#, python-brace-format
-msgid "uuid_mismatch_{identifier}"
-msgstr "Two different uuids were specified for item '{identifier}'."
-
-#: src/hydrilla/server/serve.py:134
-#, python-brace-format
-msgid "version_clash_{identifier}_{version}"
-msgstr "Version '{version}' specified more than once for item '{identifier}'."
-
-#: src/hydrilla/server/serve.py:250 src/hydrilla/server/serve.py:262
-msgid "invalid_URL_{}"
-msgstr "Invalid URL/pattern: '{}'."
-
-#: src/hydrilla/server/serve.py:254
-msgid "disallowed_protocol_{}"
-msgstr "Disallowed protocol: '{}'."
-
-#: src/hydrilla/server/serve.py:307
-msgid "malcontent_dir_path_not_dir_{}"
-msgstr "Provided 'malcontent_dir' path does not name a directory: {}"
-
-#: src/hydrilla/server/serve.py:326
-msgid "couldnt_load_item_from_{}"
-msgstr "Couldn't load item from {}."
-
-#: src/hydrilla/server/serve.py:351
-msgid "item_{item}_in_file_{file}"
-msgstr "Item {item} incorrectly present under {file}."
-
-#: src/hydrilla/server/serve.py:357
-msgid "item_version_{ver}_in_file_{file}"
-msgstr "Item version {ver} incorrectly present under {file}."
-
-#: src/hydrilla/server/serve.py:380
-msgid "no_dep_{resource}_{ver}_{dep}"
-msgstr "Unknown dependency '{dep}' of resource '{resource}', version '{ver}'."
-
-#: src/hydrilla/server/serve.py:391
-msgid "no_payload_{mapping}_{ver}_{payload}"
-msgstr "Unknown payload '{payload}' of mapping '{mapping}', version '{ver}'."
-
-#: src/hydrilla/server/serve.py:403
-msgid "no_mapping_{required_by}_{ver}_{required}"
-msgstr "Unknown mapping '{required}' required by '{required_by}', version '{ver}'."
-
-#: src/hydrilla/server/serve.py:430
-msgid "couldnt_register_{mapping}_{ver}_{pattern}"
-msgstr ""
-"Couldn't register mapping '{mapping}', version '{ver}' (pattern "
-"'{pattern}')."
-
-#: src/hydrilla/server/serve.py:583 src/hydrilla/server/serve.py:606
-#: src/hydrilla/server/serve.py:650
-#, python-format
-msgid "%(prog)s_%(version)s_license"
-msgstr ""
-"%(prog)s %(version)s\n"
-"Copyright (C) 2021,2022 Wojtek Kosior and contributors.\n"
-"License GPLv3+: GNU AGPL version 3 or later "
-"<https://gnu.org/licenses/gpl.html>\n"
-"This is free software: you are free to change and redistribute it.\n"
-"There is NO WARRANTY, to the extent permitted by law."
-
-#: src/hydrilla/server/serve.py:592
-msgid "serve_hydrilla_packages_explain_wsgi_considerations"
-msgstr ""
-"Serve Hydrilla packages.\n"
-"\n"
-"This command is meant to be a quick way to run a local or development "
-"Hydrilla instance. For better performance, consider deployment using "
-"WSGI."
-
-#: src/hydrilla/server/serve.py:595
-msgid "directory_to_serve_from_overrides_config"
-msgstr ""
-"Directory to serve files from. Overrides value from the config file (if "
-"any)."
-
-#: src/hydrilla/server/serve.py:597
-msgid "project_url_to_display_overrides_config"
-msgstr ""
-"Project url to display on generated HTML pages. Overrides value from the "
-"config file (if any)."
-
-#: src/hydrilla/server/serve.py:599
-msgid "tcp_port_to_listen_on_overrides_config"
-msgstr ""
-"TCP port number to listen on (0-65535). Overrides value from the config "
-"file (if any)."
-
-#: src/hydrilla/server/serve.py:602
-msgid "path_to_config_file_explain_default"
-msgstr ""
-"Path to Hydrilla server configuration file (optional, by default Hydrilla"
-" loads its own config file, which in turn tries to load "
-"/etc/hydrilla/config.json)."
-
-#: src/hydrilla/server/serve.py:604
-msgid "language_to_use_overrides_config"
-msgstr ""
-"Language to use (also affects served HTML files). Overrides value from "
-"the config file (if any)."
-
-#: src/hydrilla/server/serve.py:607 src/hydrilla/server/serve.py:651
-msgid "version_printing"
-msgstr "Print version information and exit."
-
-#: src/hydrilla/server/serve.py:640
-msgid "config_option_{}_not_supplied"
-msgstr "Missing configuration option '{}'."
-
-#: src/hydrilla/server/serve.py:644
-msgid "serve_hydrilla_packages_wsgi_help"
-msgstr ""
-"Serve Hydrilla packages.\n"
-"\n"
-"This program is a WSGI script that runs Hydrilla repository behind an "
-"HTTP server like Apache2 or Nginx. You can configure Hydrilla through the"
-" /etc/hydrilla/config.json file."
-
-#. 'hydrilla' as a title
-#: src/hydrilla/server/templates/base.html:99
-#: src/hydrilla/server/templates/base.html:105
-msgid "hydrilla"
-msgstr "Hydrilla"
-
-#: src/hydrilla/server/templates/index.html:29
-msgid "hydrilla_welcome"
-msgstr "Welcome to Hydrilla!"
-
diff --git a/src/hydrilla/server/serve.py b/src/hydrilla/server/serve.py
index 779f3d2..8f0d557 100644
--- a/src/hydrilla/server/serve.py
+++ b/src/hydrilla/server/serve.py
@@ -36,16 +36,18 @@ import logging
from pathlib import Path
from hashlib import sha256
from abc import ABC, abstractmethod
-from typing import Optional, Union, Iterable
+from typing import Optional, Union, Iterable, TypeVar, Generic
import click
import flask
from werkzeug import Response
-from .. import util
+from .. import _version, versions, json_instances
+from ..item_infos import ResourceInfo, MappingInfo, VersionedItemInfo
+from ..translations import smart_gettext as _, translation as make_translation
+#from ..url_patterns import PatternTree
from . import config
-from . import _version
here = Path(__file__).resolve().parent
@@ -54,243 +56,20 @@ generated_by = {
'version': _version.version
}
-class ItemInfo(ABC):
- """Shortened data of a resource/mapping."""
- def __init__(self, item_obj: dict, major_schema_version: int):
- """Initialize ItemInfo using item definition read from JSON."""
- self.version = util.normalize_version(item_obj['version'])
- self.identifier = item_obj['identifier']
- self.uuid = item_obj.get('uuid')
- self.long_name = item_obj['long_name']
-
- self.required_mappings = []
- if major_schema_version >= 2:
- self.required_mappings = [map_ref['identifier'] for map_ref in
- item_obj.get('required_mappings', [])]
-
- def path(self) -> str:
- """
- Get a relative path to this item's JSON definition with respect to
- directory containing items of this type.
- """
- return f'{self.identifier}/{util.version_string(self.version)}'
-
-class ResourceInfo(ItemInfo):
- """Shortened data of a resource."""
- def __init__(self, resource_obj: dict, major_schema_version: int):
- """Initialize ResourceInfo using resource definition read from JSON."""
- super().__init__(resource_obj, major_schema_version)
-
- dependencies = resource_obj.get('dependencies', [])
- self.dependencies = [res_ref['identifier'] for res_ref in dependencies]
-
-class MappingInfo(ItemInfo):
- """Shortened data of a mapping."""
- def __init__(self, mapping_obj: dict, major_schema_version: int):
- """Initialize MappingInfo using mapping definition read from JSON."""
- super().__init__(mapping_obj, major_schema_version)
-
- self.payloads = {}
- for pattern, res_ref in mapping_obj.get('payloads', {}).items():
- self.payloads[pattern] = res_ref['identifier']
-
- def as_query_result(self) -> str:
- """
- Produce a json.dump()-able object describing this mapping as one of a
- collection of query results.
- """
- return {
- 'version': self.version,
- 'identifier': self.identifier,
- 'long_name': self.long_name
- }
-
-class VersionedItemInfo:
- """Stores data of multiple versions of given resource/mapping."""
- def __init__(self):
- self.uuid = None
- self.identifier = None
- self.by_version = {}
- self.known_versions = []
-
- def register(self, item_info: ItemInfo) -> None:
- """
- Make item info queryable by version. Perform sanity checks for uuid.
- """
- if self.identifier is None:
- self.identifier = item_info.identifier
-
- if self.uuid is None:
- self.uuid = item_info.uuid
-
- if self.uuid is not None and self.uuid != item_info.uuid:
- raise ValueError(f_('uuid_mismatch_{identifier}')
- .format(identifier=self.identifier))
-
- ver = item_info.version
- ver_str = util.version_string(ver)
-
- if ver_str in self.by_version:
- raise ValueError(f_('version_clash_{identifier}_{version}')
- .format(identifier=self.identifier,
- version=ver_str))
-
- self.by_version[ver_str] = item_info
- self.known_versions.append(ver)
-
- def get_by_ver(self, ver: Optional[list[int]]=None) -> Optional[ItemInfo]:
- """
- Find and return info of the newest version of item.
-
- If ver is specified, instead find and return info of that version of the
- item (or None if absent).
- """
- ver = util.version_string(ver or self.known_versions[-1])
-
- return self.by_version.get(ver)
-
- def get_all(self) -> list[ItemInfo]:
- """
- Return a list of item info for all its versions, from oldest ot newest.
- """
- return [self.by_version[util.version_string(ver)]
- for ver in self.known_versions]
-
-class PatternTreeNode:
- """
- "Pattern Tree" is how we refer to the data structure used for querying
- Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal
- is to make it possible for given URL to quickly retrieve all known patterns
- that match it.
- """
- def __init__(self):
- self.wildcard_matches = [None, None, None]
- self.literal_match = None
- self.children = {}
-
- def search(self, segments):
- """
- Yields all matches of this segments sequence against the tree that
- starts at this node. Results are produces in order from greatest to
- lowest pattern specificity.
- """
- nodes = [self]
-
- for segment in segments:
- next_node = nodes[-1].children.get(segment)
- if next_node is None:
- break
-
- nodes.append(next_node)
-
- nsegments = len(segments)
- cond_literal = lambda: len(nodes) == nsegments
- cond_wildcard = [
- lambda: len(nodes) + 1 == nsegments and segments[-1] != '*',
- lambda: len(nodes) + 1 < nsegments,
- lambda: len(nodes) + 1 != nsegments or segments[-1] != '***'
- ]
-
- while nodes:
- node = nodes.pop()
-
- for item, condition in [(node.literal_match, cond_literal),
- *zip(node.wildcard_matches, cond_wildcard)]:
- if item is not None and condition():
- yield item
-
- def add(self, segments, item_instantiator):
- """
- Make item queryable through (this branch of) the Pattern Tree. If there
- was not yet any item associated with the tree path designated by
- segments, create a new one using item_instantiator() function. Return
- all items matching this path (both the ones that existed and the ones
- just created).
- """
- node = self
- segment = None
-
- for segment in segments:
- wildcards = node.wildcard_matches
-
- child = node.children.get(segment) or PatternTreeNode()
- node.children[segment] = child
- node = child
-
- if node.literal_match is None:
- node.literal_match = item_instantiator()
-
- if segment not in ('*', '**', '***'):
- return [node.literal_match]
-
- if wildcards[len(segment) - 1] is None:
- wildcards[len(segment) - 1] = item_instantiator()
-
- return [node.literal_match, wildcards[len(segment) - 1]]
-
-proto_regex = re.compile(r'^(?P<proto>\w+)://(?P<rest>.*)$')
-user_re = r'[^/?#@]+@' # r'(?P<user>[^/?#@]+)@' # discarded for now
-query_re = r'\??[^#]*' # r'\??(?P<query>[^#]*)' # discarded for now
-domain_re = r'(?P<domain>[^/?#]+)'
-path_re = r'(?P<path>[^?#]*)'
-http_regex = re.compile(f'{domain_re}{path_re}{query_re}.*')
-ftp_regex = re.compile(f'(?:{user_re})?{domain_re}{path_re}.*')
-
-class UrlError(ValueError):
- """Used to report a URL or URL pattern that is invalid or unsupported."""
- pass
-
-class DeconstructedUrl:
- """Represents a deconstructed URL or URL pattern"""
- def __init__(self, url):
- self.url = url
-
- match = proto_regex.match(url)
- if not match:
- raise UrlError(f_('invalid_URL_{}').format(url))
-
- self.proto = match.group('proto')
- if self.proto not in ('http', 'https', 'ftp'):
- raise UrlError(f_('disallowed_protocol_{}').format(proto))
-
- if self.proto == 'ftp':
- match = ftp_regex.match(match.group('rest'))
- elif self.proto in ('http', 'https'):
- match = http_regex.match(match.group('rest'))
-
- if not match:
- raise UrlError(f_('invalid_URL_{}').format(url))
-
- self.domain = match.group('domain').split('.')
- self.domain.reverse()
- self.path = [*filter(None, match.group('path').split('/'))]
-
-class PatternMapping:
- """
- A mapping info, together with one of its patterns, as stored in Pattern
- Tree.
- """
- def __init__(self, pattern: str, mapping_info: MappingInfo):
- self.pattern = pattern
- self.mapping_info = mapping_info
-
- def register(self, pattern_tree: dict):
- """
- Make self queryable through the Pattern Tree passed in the argument.
- """
- deco = DeconstructedUrl(self.pattern)
-
- domain_tree = pattern_tree.get(deco.proto) or PatternTreeNode()
- pattern_tree[deco.proto] = domain_tree
-
- for path_tree in domain_tree.add(deco.domain, PatternTreeNode):
- for match_list in path_tree.add(deco.path, list):
- match_list.append(self)
+ # def as_query_result(self) -> dict[str, Union[str, list[int]]]:
+ # """
+ # Produce a json.dump()-able object describing this mapping as one of a
+ # collection of query results.
+ # """
+ # return {
+ # 'version': self.version,
+ # 'identifier': self.identifier,
+ # 'long_name': self.long_name
+ # }
class Malcontent:
"""
- Instance of this class represents a directory with files that can be loaded
- and served by Hydrilla.
+ Represent a directory with files that can be loaded and served by Hydrilla.
"""
def __init__(self, malcontent_dir_path: Path):
"""
@@ -298,13 +77,15 @@ class Malcontent:
malcontent_dir_path for serveable site-modifying packages and loads
them into its data structures.
"""
- self.infos = {'resource': {}, 'mapping': {}}
- self.pattern_tree = {}
+ self.resource_infos: dict[str, VersionedItemInfo[ResourceInfo]] = {}
+ self.mapping_infos: dict[str, VersionedItemInfo[MappingInfo]] = {}
+
+ self.pattern_tree: PatternTree[MappingInfo] = PatternTree()
self.malcontent_dir_path = malcontent_dir_path
if not self.malcontent_dir_path.is_dir():
- raise ValueError(f_('malcontent_dir_path_not_dir_{}')
+ raise ValueError(_('malcontent_dir_path_not_dir_{}')
.format(malcontent_dir_path))
for item_type in ('mapping', 'resource'):
@@ -323,18 +104,27 @@ class Malcontent:
if flask.current_app._hydrilla_werror:
raise e from None
- msg = f_('couldnt_load_item_from_{}').format(ver_file)
+ msg = _('couldnt_load_item_from_{}').format(ver_file)
logging.error(msg, exc_info=True)
self._report_missing()
self._finalize()
+ @staticmethod
+ def _register_info(infos: dict[str, VersionedItemInfo[VersionedType]],
+ identifier: str, item_info: VersionedType) -> None:
+ """
+ ...........
+ """
+ infos.setdefault(identifier, VersionedItemInfo())\
+ .register(item_info)
+
def _load_item(self, item_type: str, ver_file: Path) -> None:
"""
Reads, validates and autocompletes serveable mapping/resource
definition, then registers information from it in data structures.
"""
- version = util.parse_version(ver_file.name)
+ version = versions.parse_version(ver_file.name)
identifier = ver_file.parent.name
item_json, major = util.load_instance_from_file(ver_file)
@@ -342,32 +132,35 @@ class Malcontent:
util.validator_for(f'api_{item_type}_description-{major}.schema.json')\
.validate(item_json)
- if item_type == 'resource':
- item_info = ResourceInfo(item_json, major)
- else:
- item_info = MappingInfo(item_json, major)
+ # Assertion needed for mypy. If validation passed, this should not fail.
+ assert major is not None
+
+ item_info: ItemInfo = ResourceInfo(item_json, major) \
+ if item_type == 'resource' else MappingInfo(item_json, major)
if item_info.identifier != identifier:
- msg = f_('item_{item}_in_file_{file}')\
+ msg = _('item_{item}_in_file_{file}')\
.format({'item': item_info.identifier, 'file': ver_file})
raise ValueError(msg)
if item_info.version != version:
ver_str = util.version_string(item_info.version)
- msg = f_('item_version_{ver}_in_file_{file}')\
+ msg = _('item_version_{ver}_in_file_{file}')\
.format({'ver': ver_str, 'file': ver_file})
raise ValueError(msg)
- versioned_info = self.infos[item_type].get(identifier)
- if versioned_info is None:
- versioned_info = VersionedItemInfo()
- self.infos[item_type][identifier] = versioned_info
+ if isinstance(item_info, ResourceInfo):
+ self._register_info(self.resource_infos, identifier, item_info)
+ elif isinstance(item_info, MappingInfo):
+ self._register_info(self.mapping_infos, identifier, item_info)
- versioned_info.register(item_info)
-
- def _all_of_type(self, item_type: str) -> Iterable[ItemInfo]:
- """Iterator over all registered versions of all mappings/resources."""
- for versioned_info in self.infos[item_type].values():
+ @staticmethod
+ def _all_infos(infos: dict[str, VersionedItemInfo[VersionedType]]) \
+ -> Iterable[VersionedType]:
+ """
+ ...........
+ """
+ for versioned_info in infos.values():
for item_info in versioned_info.by_version.values():
yield item_info
@@ -377,38 +170,38 @@ class Malcontent:
were not loaded.
"""
def report_missing_dependency(info: ResourceInfo, dep: str) -> None:
- msg = f_('no_dep_{resource}_{ver}_{dep}')\
+ msg = _('no_dep_{resource}_{ver}_{dep}')\
.format(dep=dep, resource=info.identifier,
ver=util.version_string(info.version))
logging.error(msg)
- for resource_info in self._all_of_type('resource'):
+ for resource_info in self._all_infos(self.resource_infos):
for dep in resource_info.dependencies:
- if dep not in self.infos['resource']:
+ if dep not in self.resource_infos:
report_missing_dependency(resource_info, dep)
def report_missing_payload(info: MappingInfo, payload: str) -> None:
- msg = f_('no_payload_{mapping}_{ver}_{payload}')\
+ msg = _('no_payload_{mapping}_{ver}_{payload}')\
.format(mapping=info.identifier, payload=payload,
ver=util.version_string(info.version))
logging.error(msg)
- for mapping_info in self._all_of_type('mapping'):
+ for mapping_info in self._all_infos(self.mapping_infos):
for payload in mapping_info.payloads.values():
- if payload not in self.infos['resource']:
+ if payload not in self.resource_infos:
report_missing_payload(mapping_info, payload)
- def report_missing_mapping(info: Union[MappingInfo, ResourceInfo],
+ def report_missing_mapping(info: ItemInfo,
required_mapping: str) -> None:
msg = _('no_mapping_{required_by}_{ver}_{required}')\
.format(required_by=info.identifier, required=required_mapping,
ver=util.version_string(info.version))
logging.error(msg)
- for item_info in (*self._all_of_type('mapping'),
- *self._all_of_type('resource')):
+ for item_info in (*self._all_infos(self.mapping_infos),
+ *self._all_infos(self.resource_infos)):
for required in item_info.required_mappings:
- if required not in self.infos['mapping']:
+ if required not in self.mapping_infos:
report_missing_mapping(item_info, required)
def _finalize(self):
@@ -416,18 +209,19 @@ class Malcontent:
Initialize structures needed to serve queries. Called once after all
data gets loaded.
"""
- for infos_dict in self.infos.values():
- for versioned_info in infos_dict.values():
+ for versioned_info in (*self.mapping_infos.values(),
+ *self.resource_infos.values()):
versioned_info.known_versions.sort()
- for info in self._all_of_type('mapping'):
+ for info in self._all_infos(self.mapping_infos):
for pattern in info.payloads:
try:
- PatternMapping(pattern, info).register(self.pattern_tree)
+ self.pattern_tree = \
+ self.pattern_tree.register(pattern, info)
except Exception as e:
if flask.current_app._hydrilla_werror:
raise e from None
- msg = f_('couldnt_register_{mapping}_{ver}_{pattern}')\
+ msg = _('couldnt_register_{mapping}_{ver}_{pattern}')\
.format(mapping=info.identifier, pattern=pattern,
ver=util.version_string(info.version))
logging.error(msg)
@@ -439,27 +233,16 @@ class Malcontent:
If multiple versions of a mapping are applicable, only the most recent
is included in the result.
"""
- deco = DeconstructedUrl(url)
-
- collected = {}
-
- domain_tree = self.pattern_tree.get(deco.proto) or PatternTreeNode()
-
- def process_mapping(pattern_mapping: PatternMapping) -> None:
- if url[-1] != '/' and pattern_mapping.pattern[-1] == '/':
- return
-
- info = pattern_mapping.mapping_info
+ collected: dict[str, MappingInfo] = {}
+ for result_set in self.pattern_tree.search(url):
+ for wrapped_mapping_info in result_set:
+ info = wrapped_mapping_info.item
+ previous = collected.get(info.identifier)
+ if previous and previous.version > info.version:
+ continue
- if info.identifier not in collected or \
- info.version > collected[info.identifier].version:
collected[info.identifier] = info
- for path_tree in domain_tree.search(deco.domain):
- for matches_list in path_tree.search(deco.path):
- for pattern_mapping in matches_list:
- process_mapping(pattern_mapping)
-
return list(collected.values())
bp = flask.Blueprint('bp', __package__)
@@ -484,8 +267,6 @@ class HydrillaApp(flask.Flask):
]
}
- self._hydrilla_translation = \
- util.translation(here / 'locales', hydrilla_config['language'])
self._hydrilla_project_url = hydrilla_config['hydrilla_project_url']
self._hydrilla_port = hydrilla_config['port']
self._hydrilla_werror = hydrilla_config.get('werror', False)
@@ -506,8 +287,8 @@ class HydrillaApp(flask.Flask):
'hydrilla_project_url' global variable and to install proper
translations.
"""
- env = super().create_jinja_environment(*args, **kwargs)
- env.install_gettext_translations(self._hydrilla_translation)
+ env = super().create_jinja_environment(*args, **kwargs) # type: ignore
+ env.install_gettext_translations(make_translation())
env.globals['hydrilla_project_url'] = self._hydrilla_project_url
return env
@@ -519,9 +300,6 @@ class HydrillaApp(flask.Flask):
"""
return super().run(*args, port=self._hydrilla_port, **kwargs)
-def f_(text_key):
- return flask.current_app._hydrilla_translation.gettext(text_key)
-
def malcontent():
return flask.current_app._hydrilla_malcontent
@@ -542,7 +320,12 @@ def get_resource_or_mapping(item_type: str, identifier: str) -> Response:
identifier = match.group(1)
- versioned_info = malcontent().infos[item_type].get(identifier)
+ if item_type == 'resource':
+ infos = malcontent().resource_infos
+ else:
+ infos = malcontent().mapping_infos
+
+ versioned_info = infos.get(identifier)
info = versioned_info and versioned_info.get_by_ver()
if info is None:
@@ -586,9 +369,6 @@ default_config_path = Path('/etc/hydrilla/config.json')
default_malcontent_dir = '/var/lib/hydrilla/malcontent'
default_project_url = 'https://hydrillabugs.koszko.org/projects/hydrilla/wiki'
-console_gettext = util.translation(here / 'locales').gettext
-_ = console_gettext
-
@click.command(help=_('serve_hydrilla_packages_explain_wsgi_considerations'))
@click.option('-m', '--malcontent-dir',
type=click.Path(exists=True, file_okay=False),
@@ -600,24 +380,21 @@ _ = console_gettext
@click.option('-c', '--config', 'config_path',
type=click.Path(exists=True, dir_okay=False, resolve_path=True),
help=_('path_to_config_file_explain_default'))
-@click.option('-l', '--language', type=click.STRING,
- help=_('language_to_use_overrides_config'))
@click.version_option(version=_version.version, prog_name='Hydrilla',
message=_('%(prog)s_%(version)s_license'),
help=_('version_printing'))
def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str],
- port: Optional[int], config_path: Optional[str],
- language: Optional[str]) -> None:
+ port: Optional[int], config_path: Optional[str]) -> None:
"""
Run a development Hydrilla server.
This command is meant to be the entry point of hydrilla command exported by
this package.
"""
- config_load_opts = {} if config_path is None \
- else {'config_path': [Path(config_path)]}
-
- hydrilla_config = config.load(**config_load_opts)
+ if config_path is None:
+ hydrilla_config = config.load()
+ else:
+ hydrilla_config = config.load(config_paths=[Path(config_path)])
if malcontent_dir is not None:
hydrilla_config['malcontent_dir'] = str(Path(malcontent_dir).resolve())
@@ -628,14 +405,7 @@ def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str],
if port is not None:
hydrilla_config['port'] = port
- if language is not None:
- hydrilla_config['language'] = language
-
- lang = hydrilla_config.get('language')
- _ = console_gettext if lang is None else \
- util.translation(here / 'locales', lang).gettext
-
- for opt in ('malcontent_dir', 'hydrilla_project_url', 'port', 'language'):
+ for opt in ('malcontent_dir', 'hydrilla_project_url', 'port'):
if opt not in hydrilla_config:
raise ValueError(_('config_option_{}_not_supplied').format(opt))
@@ -649,7 +419,7 @@ def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str],
@click.version_option(version=_version.version, prog_name='Hydrilla',
message=_('%(prog)s_%(version)s_license'),
help=_('version_printing'))
-def start_wsgi() -> None:
+def start_wsgi() -> flask.Flask:
"""
Create application object for use in WSGI deployment.
diff --git a/src/hydrilla/translations.py b/src/hydrilla/translations.py
new file mode 100644
index 0000000..a963e82
--- /dev/null
+++ b/src/hydrilla/translations.py
@@ -0,0 +1,104 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# Handling of gettext for Hydrilla.
+#
+# This file is part of Hydrilla
+#
+# Copyright (C) 2021, 2022 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+# Enable using with Python 3.7.
+from __future__ import annotations
+
+import locale as lcl
+import gettext
+
+from pathlib import Path
+from typing import Optional
+
+here = Path(__file__).resolve().parent
+
+localedir = here / 'locales'
+
+supported_locales = [f.name for f in localedir.iterdir() if f.is_dir()]
+
+default_locale = 'en_US'
+
+def select_best_locale() -> str:
+ """
+ ....
+
+ Otherwise, try to determine system's default language and use that.
+ """
+ # TODO: Stop referenceing flask here. Instead, allow other code to register
+ # custom locale resolvers and register flask-aware resolver during
+ # runtime from within the flask-related part(s) of the application.
+ try:
+ import flask
+ use_flask = flask.has_request_context()
+ except ModuleNotFoundError:
+ use_flask = False
+
+ if use_flask:
+ return flask.request.accept_languages.best_match(
+ supported_locales,
+ default=default_locale
+ )
+
+ # https://stackoverflow.com/questions/3425294/how-to-detect-the-os-default-language-in-python
+ # I am not going to surrender to Microbugs' nonfree, crappy OS to test it,
+ # so the lines inside try: block may actually fail.
+ locale: Optional[str] = lcl.getdefaultlocale()[0]
+ try:
+ from ctypes.windll import kernel32 as windll # type: ignore
+ locale = lcl.windows_locale[windll.GetUserDefaultUILanguage()]
+ except:
+ pass
+
+ return locale if locale in supported_locales else default_locale
+
+translations: dict[str, gettext.NullTranslations] = {}
+
+def translation(locale: Optional[str] = None) -> gettext.NullTranslations:
+ """
+ Configure translations for domain 'messages' and return the object that
+ represents them. If the requested locale is not available, fall back to
+ 'en_US'.
+ """
+ if locale is None:
+ locale = select_best_locale()
+
+ if not (localedir / locale).is_dir():
+ locale = 'en_US'
+
+ if locale not in translations:
+ translations[locale] = gettext.translation(
+ 'messages',
+ localedir=localedir,
+ languages=[locale]
+ )
+
+ return translations[locale]
+
+def smart_gettext(msg: str, locale: Optional[str] = None) -> str:
+ """...."""
+ return translation(locale).gettext(msg)
+
+_ = smart_gettext
diff --git a/src/hydrilla/url_patterns.py b/src/hydrilla/url_patterns.py
new file mode 100644
index 0000000..8e80379
--- /dev/null
+++ b/src/hydrilla/url_patterns.py
@@ -0,0 +1,181 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# Data structure for querying URL patterns.
+#
+# This file is part of Hydrilla&Haketilo.
+#
+# Copyright (C) 2021, 2022 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+"""
+This module contains functions for deconstruction and construction of URLs and
+Haketilo URL patterns.
+
+Data structures for querying data using URL patterns are also defined there.
+"""
+
+# Enable using with Python 3.7.
+from __future__ import annotations
+
+import re
+import urllib.parse as up
+import typing as t
+import dataclasses as dc
+
+from immutables import Map
+
+from hydrilla.translations import smart_gettext as _
+from hydrilla.exceptions import HaketiloException
+
+default_ports: t.Mapping[str, int] = Map(http=80, https=443, ftp=21)
+
+@dc.dataclass(frozen=True, unsafe_hash=True)
+class ParsedUrl:
+ """...."""
+ orig_url: str # orig_url used in __hash__()
+ scheme: str = dc.field(hash=False)
+ domain_labels: tuple[str, ...] = dc.field(hash=False)
+ path_segments: tuple[str, ...] = dc.field(hash=False)
+ has_trailing_slash: bool = dc.field(hash=False)
+ port: int = dc.field(hash=False)
+
+ # def reconstruct_url(self) -> str:
+ # """...."""
+ # scheme = self.orig_scheme
+
+ # netloc = '.'.join(reversed(self.domain_labels))
+ # if scheme == self.scheme and \
+ # self.port is not None and \
+ # default_ports[scheme] != self.port:
+ # netloc += f':{self.port}'
+
+ # path = '/'.join(('', *self.path_segments))
+ # if self.has_trailing_slash:
+ # path += '/'
+
+ # return f'{scheme}://{netloc}{path}'
+
+# URLs with those schemes will be recognized but not all of them have to be
+# actually supported by Hydrilla server and Haketilo proxy.
+supported_schemes = 'http', 'https', 'ftp', 'file'
+
+def _parse_pattern_or_url(url: str, orig_url: str, is_pattern: bool = False) \
+ -> ParsedUrl:
+ """...."""
+ if not is_pattern:
+ assert orig_url == url
+
+ parse_result = up.urlparse(url)
+
+ # Verify the parsed URL is valid
+ has_hostname = parse_result.hostname is not None
+ if not parse_result.scheme or \
+ (parse_result.scheme == 'file' and parse_result.port is not None) or \
+ (parse_result.scheme == 'file' and has_hostname) or \
+ (parse_result.scheme != 'file' and not has_hostname):
+ if is_pattern:
+ msg = _('err.url_pattern_{}.bad').format(orig_url)
+ raise HaketiloException(msg)
+ else:
+ raise HaketiloException(_('err.url_{}.bad') .format(url))
+
+ # Verify the URL uses a known scheme and extract it.
+ scheme = parse_result.scheme
+
+ if parse_result.scheme not in supported_schemes:
+ if is_pattern:
+ msg = _('err.url_pattern_{}.bad_scheme').format(orig_url)
+ raise HaketiloException(msg)
+ else:
+ raise HaketiloException(_('err.url_{}.bad_scheme').format(url))
+
+ # Extract and keep information about special pattern schemas used.
+ if is_pattern and orig_url.startswith('http*:'):
+ if parse_result.port:
+ fmt = _('err.url_pattern_{}.special_scheme_port')
+ raise HaketiloException(fmt.format(orig_url))
+
+ # Extract URL's explicit port or deduce the port based on URL's protocol.
+ try:
+ explicit_port = parse_result.port
+ port_out_of_range = explicit_port == 0
+ except ValueError:
+ port_out_of_range = True
+
+ if port_out_of_range:
+ if is_pattern:
+ msg = _('err.url_pattern_{}.bad_port').format(orig_url)
+ raise HaketiloException(msg)
+ else:
+ raise HaketiloException(_('err.url_{}.bad_port').format(url))
+
+ port = t.cast(int, explicit_port or default_ports.get(parse_result.scheme))
+
+ # Make URL's hostname into a list of labels in reverse order. E.g.
+ # 'https://a.bc..de.fg.com/h/i/' -> ['com', 'fg', 'de', 'bc', 'a']
+ hostname = parse_result.hostname or ''
+ domain_labels_with_empty = reversed(hostname.split('.'))
+ domain_labels = tuple(lbl for lbl in domain_labels_with_empty if lbl)
+
+ # Make URL's path into a list of segments. E.g.
+ # 'https://ab.cd/e//f/g/' -> ['e', 'f', 'g']
+ path_segments_with_empty = parse_result.path.split('/')
+ path_segments = tuple(sgmt for sgmt in path_segments_with_empty if sgmt)
+
+ # Record whether a trailing '/' is present in the URL.
+ has_trailing_slash = parse_result.path.endswith('/')
+
+ # Perform some additional sanity checks and return the result.
+ if is_pattern:
+ if parse_result.query:
+ msg = _('err.url_pattern_{}.has_query').format(orig_url)
+ raise HaketiloException(msg)
+
+ if parse_result.fragment:
+ msg = _('err.url_pattern_{}.has_frag').format(orig_url)
+ raise HaketiloException(msg)
+
+ return ParsedUrl(
+ orig_url = orig_url,
+ scheme = scheme,
+ port = port,
+ domain_labels = domain_labels,
+ path_segments = path_segments,
+ has_trailing_slash = has_trailing_slash
+ )
+
+replace_scheme_regex = re.compile(r'^[^:]*')
+
+def parse_pattern(url_pattern: str) -> t.Sequence[ParsedUrl]:
+ """...."""
+ if url_pattern.startswith('http*:'):
+ patterns = [
+ replace_scheme_regex.sub('http', url_pattern),
+ replace_scheme_regex.sub('https', url_pattern)
+ ]
+ else:
+ patterns = [url_pattern]
+
+ return tuple(_parse_pattern_or_url(pat, url_pattern, True)
+ for pat in patterns)
+
+def parse_url(url: str) -> ParsedUrl:
+ """...."""
+ return _parse_pattern_or_url(url, url)
diff --git a/src/hydrilla/versions.py b/src/hydrilla/versions.py
new file mode 100644
index 0000000..a7a9f29
--- /dev/null
+++ b/src/hydrilla/versions.py
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# Functions to operate on version numbers.
+#
+# This file is part of Hydrilla&Haketilo.
+#
+# Copyright (C) 2021, 2022 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+"""
+This module contains functions for deconstruction and construction of version
+strings and version tuples.
+"""
+
+# Enable using with Python 3.7.
+from __future__ import annotations
+
+import typing as t
+
+def normalize_version(ver: t.Sequence[int]) -> tuple[int, ...]:
+ """Strip right-most zeroes from 'ver'. The original list is not modified."""
+ new_len = 0
+ for i, num in enumerate(ver):
+ if num != 0:
+ new_len = i + 1
+
+ return tuple(ver[:new_len])
+
+def parse_version(ver_str: str) -> tuple[int, ...]:
+ """
+ Convert 'ver_str' into an array representation, e.g. for ver_str="4.6.13.0"
+ return [4, 6, 13, 0].
+ """
+ return tuple(int(num) for num in ver_str.split('.'))
+
+def version_string(ver: t.Sequence[int], rev: t.Optional[int] = None) -> str:
+ """
+ Produce version's string representation (optionally with revision), like:
+ 1.2.3-5
+ No version normalization is performed.
+ """
+ return '.'.join(str(n) for n in ver) + ('' if rev is None else f'-{rev}')