From 52d12a4fa124daa1595529e3e7008276a7986d95 Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Mon, 13 Jun 2022 11:06:49 +0200 Subject: unfinished partial work --- .gitignore | 6 +- .gitmodules | 10 +- MANIFEST.in | 4 +- conftest.py | 58 ++ doc/man/man1/hydrilla-builder.1 | 94 +++ doc/uml/classes.uxf | 759 +++++++++++++++++++ mypy.ini | 7 + pyproject.toml | 9 +- pytest.ini | 16 - setup.cfg | 44 +- src/hydrilla/__init__.py | 10 +- src/hydrilla/builder/__init__.py | 7 + src/hydrilla/builder/__main__.py | 9 + src/hydrilla/builder/_version.py | 5 + src/hydrilla/builder/build.py | 485 ++++++++++++ src/hydrilla/builder/common_errors.py | 65 ++ src/hydrilla/builder/local_apt.py | 432 +++++++++++ src/hydrilla/builder/piggybacking.py | 117 +++ src/hydrilla/exceptions.py | 40 + src/hydrilla/item_infos.py | 344 +++++++++ src/hydrilla/json_instances.py | 207 ++++++ src/hydrilla/locales/en_US/LC_MESSAGES/messages.po | 252 +++++++ src/hydrilla/locales/pl_PL/LC_MESSAGES/messages.po | 258 +++++++ src/hydrilla/mitmproxy_launcher/__main__.py | 11 + src/hydrilla/mitmproxy_launcher/launch.py | 77 ++ src/hydrilla/pattern_tree.py | 339 +++++++++ src/hydrilla/proxy/__init__.py | 5 + src/hydrilla/proxy/addon.py | 177 +++++ src/hydrilla/proxy/flow_handlers.py | 383 ++++++++++ src/hydrilla/proxy/policies.py | 76 ++ src/hydrilla/proxy/state.py | 73 ++ src/hydrilla/proxy/store.py | 40 + src/hydrilla/py.typed | 5 + src/hydrilla/schemas/1.x | 1 + src/hydrilla/schemas/2.x | 1 + src/hydrilla/server/config.json | 3 - src/hydrilla/server/config.py | 6 +- .../locales/en_US/LC_MESSAGES/hydrilla-messages.po | 151 ---- src/hydrilla/server/serve.py | 406 +++------- src/hydrilla/translations.py | 104 +++ src/hydrilla/url_patterns.py | 181 +++++ src/hydrilla/versions.py | 59 ++ tests/helpers.py | 51 ++ tests/test_build.py | 818 +++++++++++++++++++++ tests/test_item_infos.py | 527 +++++++++++++ tests/test_json_instances.py | 194 +++++ tests/test_local_apt.py | 754 +++++++++++++++++++ tests/test_pattern_tree.py | 454 ++++++++++++ tests/test_server.py | 30 +- tests/test_url_patterns.py | 188 +++++ tests/test_versions.py | 41 ++ tests/url_patterns_common.py | 23 + 52 files changed, 7877 insertions(+), 539 deletions(-) create mode 100644 doc/man/man1/hydrilla-builder.1 create mode 100644 doc/uml/classes.uxf create mode 100644 mypy.ini delete mode 100644 pytest.ini create mode 100644 src/hydrilla/builder/__init__.py create mode 100644 src/hydrilla/builder/__main__.py create mode 100644 src/hydrilla/builder/_version.py create mode 100644 src/hydrilla/builder/build.py create mode 100644 src/hydrilla/builder/common_errors.py create mode 100644 src/hydrilla/builder/local_apt.py create mode 100644 src/hydrilla/builder/piggybacking.py create mode 100644 src/hydrilla/exceptions.py create mode 100644 src/hydrilla/item_infos.py create mode 100644 src/hydrilla/json_instances.py create mode 100644 src/hydrilla/locales/en_US/LC_MESSAGES/messages.po create mode 100644 src/hydrilla/locales/pl_PL/LC_MESSAGES/messages.po create mode 100644 src/hydrilla/mitmproxy_launcher/__main__.py create mode 100644 src/hydrilla/mitmproxy_launcher/launch.py create mode 100644 src/hydrilla/pattern_tree.py create mode 100644 src/hydrilla/proxy/__init__.py create mode 100644 src/hydrilla/proxy/addon.py create mode 100644 src/hydrilla/proxy/flow_handlers.py create mode 100644 src/hydrilla/proxy/policies.py create mode 100644 src/hydrilla/proxy/state.py create mode 100644 src/hydrilla/proxy/store.py create mode 100644 src/hydrilla/py.typed create mode 160000 src/hydrilla/schemas/1.x create mode 160000 src/hydrilla/schemas/2.x delete mode 100644 src/hydrilla/server/locales/en_US/LC_MESSAGES/hydrilla-messages.po create mode 100644 src/hydrilla/translations.py create mode 100644 src/hydrilla/url_patterns.py create mode 100644 src/hydrilla/versions.py create mode 100644 tests/helpers.py create mode 100644 tests/test_build.py create mode 100644 tests/test_item_infos.py create mode 100644 tests/test_json_instances.py create mode 100644 tests/test_local_apt.py create mode 100644 tests/test_pattern_tree.py create mode 100644 tests/test_url_patterns.py create mode 100644 tests/test_versions.py create mode 100644 tests/url_patterns_common.py diff --git a/.gitignore b/.gitignore index ee528db..e2b391c 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,6 @@ dist *.egg-info *.pyc setuptools -src/hydrilla/server/_version.py -src/hydrilla/server/locales/hydrilla-messages.pot -hydrilla-messages.mo +src/hydrilla/_version.py +src/hydrilla/locales/messages.pot +messages.mo diff --git a/.gitmodules b/.gitmodules index 271d652..5e269b4 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,6 +4,14 @@ # # Available under the terms of Creative Commons Zero v1.0 Universal. -[submodule "src/test/source-package-example"] +[submodule "hydrilla-json-schemas-1.x"] + path = src/hydrilla/schemas/1.x + url = ../hydrilla-json-schemas + branch = series-1.x +[submodule "hydrilla-json-schemas-2.x"] + path = src/hydrilla/schemas/2.x + url = ../hydrilla-json-schemas/ + branch = koszko +[submodule "hydrilla-source-package-example"] path = tests/source-package-example url = ../hydrilla-source-package-example/ diff --git a/MANIFEST.in b/MANIFEST.in index 558e461..26efb4d 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -4,7 +4,9 @@ # # Available under the terms of Creative Commons Zero v1.0 Universal. -include src/hydrilla/server/locales/*/LC_MESSAGES/hydrilla-messages.po +include src/hydrilla/py.typed +include src/hydrilla/schemas/*/*.schema.json* +include src/hydrilla/locales/*/LC_MESSAGES/messages.po include tests/source-package-example/* include tests/source-package-example/LICENSES/* include tests/source-package-example/.reuse/* diff --git a/conftest.py b/conftest.py index 1aef80a..cde023a 100644 --- a/conftest.py +++ b/conftest.py @@ -7,5 +7,63 @@ import sys from pathlib import Path +import pytest +import pkgutil +from tempfile import TemporaryDirectory +from typing import Iterable + here = Path(__file__).resolve().parent sys.path.insert(0, str(here / 'src')) + +from hydrilla import translations as hydrilla_translations + +@pytest.fixture(autouse=True) +def no_requests(monkeypatch): + """Remove requests.sessions.Session.request for all tests.""" + monkeypatch.delattr('requests.sessions.Session.request') + +@pytest.fixture +def mock_subprocess_run(monkeypatch, request): + """ + Temporarily replace subprocess.run() with a function supplied through pytest + marker 'subprocess_run'. + + The marker excepts 2 arguments: + * the module inside which the subprocess attribute should be mocked and + * a run() function to use. + """ + where, mocked_run = request.node.get_closest_marker('subprocess_run').args + + class MockedSubprocess: + """Minimal mocked version of the subprocess module.""" + run = mocked_run + + monkeypatch.setattr(where, 'subprocess', MockedSubprocess) + +@pytest.fixture(autouse=True) +def no_gettext(monkeypatch, request): + """ + Make gettext return all strings untranslated unless we request otherwise. + """ + if request.node.get_closest_marker('enable_gettext'): + return + + class MockedTraslations: + """Replacement for gettext.GNUTranslations.""" + def __init__(self, dummy_locale): + """Initialize this MockedTranslations.""" + pass + def gettext(self, msg): + """Return translated string unmodified.""" + return msg + + monkeypatch.setattr(hydrilla_translations, 'translation', MockedTraslations) + +@pytest.fixture +def tmpdir() -> Iterable[Path]: + """ + Provide test case with a temporary directory that will be automatically + deleted after the test. + """ + with TemporaryDirectory() as tmpdir: + yield Path(tmpdir) diff --git a/doc/man/man1/hydrilla-builder.1 b/doc/man/man1/hydrilla-builder.1 new file mode 100644 index 0000000..f58ab97 --- /dev/null +++ b/doc/man/man1/hydrilla-builder.1 @@ -0,0 +1,94 @@ +.\" SPDX-License-Identifier: CC0-1.0 +.\" +.\" Man page for Hydrilla builder. +.\" +.\" Copyright (C) 2022 Wojtek Kosior +.\" +.\" Available under the terms of Creative Commons Zero v1.0 Universal. + +.TH HYDRILLA-BUILDER 1 2022-04-22 "Hydrilla 1.0" "Hydrilla Manual" + +.SH NAME +hydrilla-builder \- Generate packages to be served by Hydrilla + +.SH SYNOPSIS +.B "hydrilla\-builder \-\-help" +.br +.B "hydrilla\-builder [\-s \fISOURCE\/\fP] [\-i\ \fIINDEX_PATH\/\fP]" +.B "\-d \fIDESTINATION\/\fP" +.br +(See the OPTIONS section for alternate option syntax with long option +names.) + +.SH DESCRIPTION +.I hydrilla-builder +is a tool which takes a Hydrilla source package and generates files of a +built package, suitable for serving by the Hydrilla server. + +As of Hydrilla version 1.0 +.I hydrilla-builder +does not yet perform nor trigger actions like compilation, minification or +bundling of source code files. Its main function is to automate the process +of computing SHA256 cryptographic sums of package files and including them +in JSON definitions. + +In addition, +.B hydrilla\-builder +can generate an SPDX report from source package if the +\*(lqreuse_generate_spdx_report\*(rq property is set to true in index.json. + +.SH OPTIONS +.TP +.B \-\^\-help +Output a usage message and exit. + +.TP +.BI \-s " SOURCE" "\fR,\fP \-\^\-srcdir=" SOURCE +Use +.I SOURCE +as source package directory to build from. +If not specified, current directory is used. + +.TP +.BI \-i " INDEX_PATH" "\fR,\fP \-\^\-index\-json=" INDEX_PATH +Process the JSON file under +.I INDEX_PATH +instead of index.json inside source directory. +.I INDEX_PATH +may be either absolute or relative. +In the latter case it is resolved with respect to the source directory. + +File provided as +.I INDEX_PATH +will also be included in the generated source archive as +\*(lqindex.json\*(rq, substituting any file with such name that could be +present in the source directory. + +.TP +.BI \-d " DESTINATION" "\fR,\fP \-\^\-dstdir=" DESTINATION +Write generated files under +.IR DESTINATION . +Files are written in such way that +.I DESTINATION +is valid for being passed to Hydrilla to serve packages from. + +.TP +.B \-\^\-version +Show version information for this instance of +.I hydrilla-builder +on the standard output and exit successfully. + +.SH "EXIT STATUS" +The exit status is 0 if build was performed successfully or if the +.B \-\^\-help +option was passed. It is a number different from 0 in all other cases. + +.SH "SEE ALSO" +.SS "Manual Pages" +.BR hydrilla (1). + +.SS "Full Documentation" +.UR https://hydrillabugs.koszko.org/projects/hydrilla/wiki +Online documentation +.UE +is available on Hydrilla issue tracker. diff --git a/doc/uml/classes.uxf b/doc/uml/classes.uxf new file mode 100644 index 0000000..1dcbb56 --- /dev/null +++ b/doc/uml/classes.uxf @@ -0,0 +1,759 @@ + + + 8 + + UMLClass + + 248 + 80 + 128 + 32 + + hydrilla.proxy.addon. +HaketiloAddon + + + + + UMLClass + + 384 + 320 + 184 + 32 + + hydrilla.pattern_tree. +PatternTree[Policy] + + + + + UMLClass + + 288 + 200 + 176 + 32 + + hydrilla.proxy.state. +HaketiloState + + + + Relation + + 336 + 104 + 24 + 112 + + lt=<<<<- +m1=1 +m2=1 + 10.0;10.0;10.0;120.0 + + + Relation + + 424 + 224 + 24 + 112 + + lt=<<<<- +m1=1 +m2=1 + + 10.0;10.0;10.0;120.0 + + + UMLClass + + 560 + 384 + 136 + 32 + + /hydrilla.proxy.policies./ +/Policy/ + + + + + Relation + + 536 + 320 + 112 + 80 + + lt=[URL pattern]-> +m2=1..* + + + 40.0;20.0;100.0;20.0;100.0;80.0 + + + UMLNote + + 400 + 32 + 152 + 32 + + Receives&processes events from mitmproxy. + + + + Relation + + 368 + 56 + 48 + 40 + + lt=. + 10.0;30.0;40.0;10.0 + + + UMLNote + + 624 + 88 + 136 + 48 + + Keeps track of all rules, mappings and resources. + + + + Relation + + 456 + 120 + 184 + 96 + + lt=. + 10.0;100.0;210.0;10.0 + + + UMLClass + + 416 + 768 + 136 + 32 + + hydrilla.proxy.policies. +PayloadPolicy + + + + UMLClass + + 416 + 528 + 136 + 32 + + /hydrilla.proxy.policies./ +/BlockPolicy/ + + + + UMLClass + + 416 + 672 + 136 + 32 + + /hydrilla.proxy.policies./ +/AllowPolicy/ + + + + UMLClass + + 416 + 720 + 136 + 32 + + hydrilla.proxy.policies. +MetaResourcePolicy + + + + Relation + + 544 + 408 + 120 + 392 + + lt=<<. + 130.0;10.0;130.0;470.0;10.0;470.0 + + + Relation + + 544 + 408 + 104 + 344 + + lt=<<. + 110.0;10.0;110.0;410.0;10.0;410.0 + + + Relation + + 544 + 408 + 88 + 296 + + lt=<<. + 90.0;10.0;90.0;350.0;10.0;350.0 + + + Relation + + 544 + 408 + 72 + 152 + + lt=<<. + 70.0;10.0;70.0;170.0;10.0;170.0 + + + Relation + + 328 + 728 + 104 + 24 + + lt=<<<<- + 10.0;10.0;110.0;10.0 + + + UMLClass + + 168 + 720 + 168 + 32 + + hydrilla.proxy.flow_handlers. +FlowHandlerMetaResource + + + + Relation + + 488 + 352 + 256 + 504 + + lt=<<<<- +m1=1 +m2=1 + 10.0;560.0;10.0;610.0;300.0;610.0;300.0;10.0 + + + UMLClass + + 648 + 328 + 192 + 32 + + hydrilla.item_infos. +MappingInfo + + + + UMLClass + + 576 + 176 + 232 + 32 + + hydrilla.item_infos. +VersionedItemInfo[MappingInfo] + + + + Relation + + 432 + 208 + 160 + 56 + + lt=[repository]-> +m2=1..* + + + 40.0;10.0;100.0;10.0;100.0;40.0;180.0;40.0 + + + UMLClass + + 576 + 224 + 232 + 32 + + hydrilla.item_infos. +VersionedItemInfo[ResourceInfo] + + + + Relation + + 504 + 184 + 88 + 48 + + lt=-> +m2=1..* + + + 10.0;40.0;10.0;10.0;90.0;10.0 + + + UMLClass + + 648 + 280 + 192 + 32 + + hydrilla.item_infos. +ResourceInfo + + + + Relation + + 784 + 184 + 160 + 184 + + lt=[version]-> +m2=1..* + 30.0;10.0;180.0;10.0;180.0;200.0;70.0;200.0 + + + Relation + + 784 + 232 + 128 + 88 + + lt=[version]-> +m2=1..* + + + 30.0;10.0;140.0;10.0;140.0;80.0;70.0;80.0 + + + Relation + + 336 + 536 + 96 + 24 + + lt=<<<<- + 10.0;10.0;100.0;10.0 + + + UMLClass + + 176 + 528 + 168 + 32 + + hydrilla.proxy.flow_handlers. +FlowHandlerBlockScripts + + + + Relation + + 328 + 680 + 104 + 24 + + lt=<<<<- + 10.0;10.0;110.0;10.0 + + + UMLClass + + 168 + 672 + 168 + 32 + + hydrilla.proxy.flow_handlers. +FlowHandlerAllowScripts + + + + Relation + + 328 + 776 + 104 + 24 + + lt=<<<<- + 10.0;10.0;110.0;10.0 + + + UMLClass + + 168 + 768 + 168 + 32 + + hydrilla.proxy.flow_handlers. +FlowHandlerInjectPayload + + + + UMLClass + + 8 + 304 + 168 + 32 + + /hydrilla.proxy.flow_handlers./ +/FlowHandler/ + + + + Relation + + 104 + 328 + 88 + 232 + + lt=<<. + 10.0;10.0;10.0;270.0;90.0;270.0 + + + Relation + + 88 + 328 + 96 + 376 + + lt=<<. + 10.0;10.0;10.0;450.0;100.0;450.0 + + + Relation + + 72 + 328 + 112 + 424 + + lt=<<. + 10.0;10.0;10.0;510.0;120.0;510.0 + + + Relation + + 56 + 328 + 128 + 472 + + lt=<<. + 10.0;10.0;10.0;570.0;140.0;570.0 + + + Relation + + 48 + 80 + 216 + 240 + + lt=<. +<<instantiates>> + + + + + + + + + + + + + + + + + 10.0;280.0;10.0;10.0;250.0;10.0 + + + UMLClass + + 296 + 432 + 136 + 32 + + hydrilla.proxy.policies. +FallbackBlockPolicy + + + + Relation + + 480 + 456 + 24 + 88 + + lt=<<. + 10.0;90.0;10.0;10.0 + + + UMLClass + + 152 + 432 + 136 + 32 + + hydrilla.proxy.policies. +ErrorBlockPolicy + + + + Relation + + 360 + 456 + 144 + 56 + + lt=. + 160.0;50.0;10.0;50.0;10.0;10.0 + + + UMLClass + + 440 + 432 + 136 + 32 + + hydrilla.proxy.policies. +RuleBlockPolicy + + + + Relation + + 216 + 456 + 168 + 56 + + lt=. + 190.0;50.0;10.0;50.0;10.0;10.0 + + + UMLClass + + 296 + 576 + 136 + 32 + + hydrilla.proxy.policies. +FallbackAllowPolicy + + + + Relation + + 360 + 600 + 144 + 56 + + lt=. + 160.0;50.0;10.0;50.0;10.0;10.0 + + + UMLClass + + 440 + 576 + 136 + 32 + + hydrilla.proxy.policies. +RuleAllowPolicy + + + + Relation + + 480 + 600 + 24 + 88 + + lt=<<. + 10.0;90.0;10.0;10.0 + + + UMLNote + + 616 + 40 + 144 + 32 + + Facilitates storing and modifying data on-disk. + + + + Relation + + 584 + 48 + 48 + 48 + + lt=. + 10.0;40.0;40.0;10.0 + + + UMLClass + + 432 + 80 + 160 + 32 + + hydrilla.proxy.store. +HaketiloStore + + + + Relation + + 440 + 104 + 24 + 112 + + lt=<<- + 10.0;10.0;10.0;120.0 + + + UMLClass + + 248 + 272 + 88 + 24 + + lt=. +StateUpdater + + + + Relation + + 136 + 264 + 128 + 56 + + lt=<. +<<produces>> + 140.0;20.0;10.0;20.0;10.0;50.0 + + + Relation + + 312 + 224 + 64 + 64 + + lt=<. +<<uses>> + 10.0;10.0;10.0;60.0 + + + Relation + + 256 + 104 + 64 + 184 + + lt=<. +<<uses>> + + + + + + + 10.0;210.0;10.0;10.0 + + + UMLNote + + 136 + 176 + 96 + 24 + + Function type + + + + Relation + + 184 + 192 + 80 + 96 + + lt=. + 80.0;100.0;10.0;10.0 + + diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..a9b026f --- /dev/null +++ b/mypy.ini @@ -0,0 +1,7 @@ +[mypy] + +[mypy-ruamel] +ignore_missing_imports = True + +[mypy-mitmproxy.contrib.kaitaistruct] +ignore_missing_imports = True diff --git a/pyproject.toml b/pyproject.toml index 623201c..a582eff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,11 +9,16 @@ build-backend = "setuptools.build_meta" requires = ["setuptools>=44", "wheel", "setuptools_scm>=5.0", "babel"] [tool.setuptools_scm] -write_to = "src/hydrilla/server/_version.py" +write_to = "src/hydrilla/_version.py" [tool.pytest.ini_options] minversion = "6.0" -addopts = "-ra -q" +addopts = "-ra" testpaths = [ "tests" ] +markers = [ + "mod_before_build: define a callback to use to modify test packages before their build", + "mod_after_build: define a callback to use to modify test packages after their build", + "subprocess_run: define how mocked subprocess.run should behave" +] diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index b4ea538..0000000 --- a/pytest.ini +++ /dev/null @@ -1,16 +0,0 @@ -# SPDX-License-Identifier: CC0-1.0 - -# Disable deprecation warnings from third-party libraries -# -# Copyright (C) 2021 Wojtek Kosior -# -# Available under the terms of Creative Commons Zero v1.0 Universal. - -[pytest] -filterwarnings = - ignore::DeprecationWarning:werkzeug.*: - ignore::DeprecationWarning:jinja2.*: - -markers = - mod_before_build: define a callback to use to modify test packages before their build - mod_after_build: define a callback to use to modify test packages after their build diff --git a/setup.cfg b/setup.cfg index e18c8ae..16dc04e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -8,7 +8,7 @@ name = hydrilla author = Wojtek Kosior author_email = koszko@koszko.org -description = Hydrilla repository server +description = Hydrilla&Haketilo custom website resources tools long_description = file: README.md long_description_content_type = text/markdown url = https://git.koszko.org/pydrilla @@ -19,9 +19,11 @@ license = AGPL-3.0-or-later classifiers = Development Status :: 4 - Beta Intended Audience :: Developers + Intended Audience :: End Users/Desktop Environment :: Web Environment Environment :: Console Topic :: Internet :: WWW/HTTP :: WSGI + Topic :: Internet :: Proxy Servers License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+) Natural Language :: English Operating System :: OS Independent @@ -35,25 +37,35 @@ packages = find: include_package_data=True python_requires = >= 3.7 install_requires = - hydrilla.builder==1.1b1 - flask jsonschema>=3.0 + click + immutables [options.package_data] -hydrilla.server = locales/*/LC_MESSAGES/hydrilla-messages.mo +hydrilla = + locales/*/LC_MESSAGES/hydrilla-messages.mo + py.typed [options.extras_require] -test = pytest +test = pytest; flask setup = setuptools_scm; babel +builder = +server = flask +haketilo = mitmproxy; beautifulsoup4[html5lib] + +SPDX = reuse +all = reuse; flask; mitmproxy; beautifulsoup4 + [options.packages.find] where = src -exclude = - test [options.entry_points] console_scripts = hydrilla = hydrilla.server:start + hydrilla-server = hydrilla.server:start + hydrilla-builder = hydrilla.builder.build:perform + haketilo = hydrilla.proxy_launcher:start [extract_messages] mapping_file = babel.cfg @@ -61,22 +73,22 @@ keywords = _ f_ add_comments = TRANSLATORS: width = 80 input_dirs = src/hydrilla -output_file = src/hydrilla/server/locales/hydrilla-messages.pot +output_file = src/hydrilla/locales/messages.pot msgid_bugs_address = koszko@koszko.org sort_by_file = True copyright_holder = Wojtek Kosior [init_catalog] -input_file = src/hydrilla/server/locales/hydrilla-messages.pot -output_dir = src/hydrilla/server/locales/ -domain = hydrilla-messages +input_file = src/hydrilla/locales/messages.pot +output_dir = src/hydrilla/locales/ +domain = messages [update_catalog] -input_file = src/hydrilla/server/locales/hydrilla-messages.pot -output_dir = src/hydrilla/server/locales/ -domain = hydrilla-messages +input_file = src/hydrilla/locales/messages.pot +output_dir = src/hydrilla/locales/ +domain = messages [compile_catalog] -directory = src/hydrilla/server/locales +directory = src/hydrilla/locales use_fuzzy = True -domain = hydrilla-messages +domain = messages diff --git a/src/hydrilla/__init__.py b/src/hydrilla/__init__.py index 6aeb276..d382ead 100644 --- a/src/hydrilla/__init__.py +++ b/src/hydrilla/__init__.py @@ -1,7 +1,5 @@ -# SPDX-License-Identifier: 0BSD +# SPDX-License-Identifier: CC0-1.0 -# Copyright (C) 2013-2020, PyPA - -# https://packaging.python.org/en/latest/guides/packaging-namespace-packages/#pkgutil-style-namespace-packages - -__path__ = __import__('pkgutil').extend_path(__path__, __name__) +# Copyright (C) 2022 Wojtek Kosior +# +# Available under the terms of Creative Commons Zero v1.0 Universal. diff --git a/src/hydrilla/builder/__init__.py b/src/hydrilla/builder/__init__.py new file mode 100644 index 0000000..73dc579 --- /dev/null +++ b/src/hydrilla/builder/__init__.py @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior +# +# Available under the terms of Creative Commons Zero v1.0 Universal. + +from .build import Build diff --git a/src/hydrilla/builder/__main__.py b/src/hydrilla/builder/__main__.py new file mode 100644 index 0000000..87dc9e2 --- /dev/null +++ b/src/hydrilla/builder/__main__.py @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior +# +# Available under the terms of Creative Commons Zero v1.0 Universal. + +from . import build + +build.perform() diff --git a/src/hydrilla/builder/_version.py b/src/hydrilla/builder/_version.py new file mode 100644 index 0000000..2feb153 --- /dev/null +++ b/src/hydrilla/builder/_version.py @@ -0,0 +1,5 @@ +# coding: utf-8 +# file generated by setuptools_scm +# don't change, don't track in version control +version = '1.1b1' +version_tuple = (1, '1b1') diff --git a/src/hydrilla/builder/build.py b/src/hydrilla/builder/build.py new file mode 100644 index 0000000..acc6576 --- /dev/null +++ b/src/hydrilla/builder/build.py @@ -0,0 +1,485 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Building Hydrilla packages. +# +# This file is part of Hydrilla +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +# Enable using with Python 3.7. +from __future__ import annotations + +import json +import re +import zipfile +import subprocess +from pathlib import Path, PurePosixPath +from hashlib import sha256 +from sys import stderr +from contextlib import contextmanager +from tempfile import TemporaryDirectory, TemporaryFile +from typing import Optional, Iterable, Iterator, Union + +import jsonschema # type: ignore +import click + +from .. import _version, json_instances, versions +from ..translations import smart_gettext as _ +from . import local_apt +from .piggybacking import Piggybacked +from .common_errors import * + +here = Path(__file__).resolve().parent + +schemas_root = 'https://hydrilla.koszko.org/schemas' + +generated_by = { + 'name': 'hydrilla.builder', + 'version': _version.version +} + +class ReuseError(SubprocessError): + """ + Exception used to report various problems when calling the REUSE tool. + """ + +def generate_spdx_report(root: Path) -> bytes: + """ + Use REUSE tool to generate an SPDX report for sources under 'root' and + return the report's contents as 'bytes'. + + In case the directory tree under 'root' does not constitute a + REUSE-compliant package, as exception is raised with linting report + included in it. + + In case the reuse tool is not installed, an exception is also raised. + """ + for command in [ + ['reuse', '--root', str(root), 'lint'], + ['reuse', '--root', str(root), 'spdx'] + ]: + try: + cp = subprocess.run(command, capture_output=True, text=True) + except FileNotFoundError: + msg = _('couldnt_execute_{}_is_it_installed').format('reuse') + raise ReuseError(msg) + + if cp.returncode != 0: + msg = _('command_{}_failed').format(' '.join(command)) + raise ReuseError(msg, cp) + + return cp.stdout.encode() + +class FileRef: + """Represent reference to a file in the package.""" + def __init__(self, path: PurePosixPath, contents: bytes) -> None: + """Initialize FileRef.""" + self.include_in_distribution = False + self.include_in_source_archive = True + self.path = path + self.contents = contents + + self.contents_hash = sha256(contents).digest().hex() + + def make_ref_dict(self) -> dict[str, str]: + """ + Represent the file reference through a dict that can be included in JSON + defintions. + """ + return { + 'file': str(self.path), + 'sha256': self.contents_hash + } + +@contextmanager +def piggybacked_system(piggyback_def: Optional[dict], + piggyback_files: Optional[Path]) \ + -> Iterator[Piggybacked]: + """ + Resolve resources from a foreign software packaging system. Optionally, use + package files (.deb's, etc.) from a specified directory instead of resolving + and downloading them. + """ + if piggyback_def is None: + yield Piggybacked() + else: + # apt is the only supported system right now + assert piggyback_def['system'] == 'apt' + + with local_apt.piggybacked_system(piggyback_def, piggyback_files) \ + as piggybacked: + yield piggybacked + +class Build: + """ + Build a Hydrilla package. + """ + def __init__(self, srcdir: Path, index_json_path: Path, + piggyback_files: Optional[Path]=None): + """ + Initialize a build. All files to be included in a distribution package + are loaded into memory, all data gets validated and all necessary + computations (e.g. preparing of hashes) are performed. + """ + self.srcdir = srcdir.resolve() + self.piggyback_files = piggyback_files + if piggyback_files is None: + piggyback_default_path = \ + srcdir.parent / f'{srcdir.name}.foreign-packages' + if piggyback_default_path.exists(): + self.piggyback_files = piggyback_default_path + + self.files_by_path: dict[PurePosixPath, FileRef] = {} + self.resource_list: list[dict] = [] + self.mapping_list: list[dict] = [] + + if not index_json_path.is_absolute(): + index_json_path = (self.srcdir / index_json_path) + + index_obj = json_instances.read_instance(index_json_path) + schema_fmt = 'package_source-{}.schema.json' + major = json_instances.validate_instance(index_obj, schema_fmt) + + index_desired_path = PurePosixPath('index.json') + self.files_by_path[index_desired_path] = \ + FileRef(index_desired_path, index_json_path.read_bytes()) + + self._process_index_json(index_obj, major) + + def _process_file(self, filename: Union[str, PurePosixPath], + piggybacked: Piggybacked, + include_in_distribution: bool=True): + """ + Resolve 'filename' relative to srcdir, load it to memory (if not loaded + before), compute its hash and store its information in + 'self.files_by_path'. + + 'filename' shall represent a relative path withing package directory. + + if 'include_in_distribution' is True it shall cause the file to not only + be included in the source package's zipfile, but also written as one of + built package's files. + + For each file an attempt is made to resolve it using 'piggybacked' + object. If a file is found and pulled from foreign software packaging + system this way, it gets automatically excluded from inclusion in + Hydrilla source package's zipfile. + + Return file's reference object that can be included in JSON defintions + of various kinds. + """ + include_in_source_archive = True + + desired_path = PurePosixPath(filename) + if '..' in desired_path.parts: + msg = _('path_contains_double_dot_{}').format(filename) + raise FileReferenceError(msg) + + path = piggybacked.resolve_file(desired_path) + if path is None: + path = (self.srcdir / desired_path).resolve() + if not path.is_relative_to(self.srcdir): + raise FileReferenceError(_('loading_{}_outside_package_dir') + .format(filename)) + + if str(path.relative_to(self.srcdir)) == 'index.json': + raise FileReferenceError(_('loading_reserved_index_json')) + else: + include_in_source_archive = False + + file_ref = self.files_by_path.get(desired_path) + if file_ref is None: + if not path.is_file(): + msg = _('referenced_file_{}_missing').format(desired_path) + raise FileReferenceError(msg) + + file_ref = FileRef(desired_path, path.read_bytes()) + self.files_by_path[desired_path] = file_ref + + if include_in_distribution: + file_ref.include_in_distribution = True + + if not include_in_source_archive: + file_ref.include_in_source_archive = False + + return file_ref.make_ref_dict() + + def _prepare_source_package_zip(self, source_name: str, + piggybacked: Piggybacked) -> str: + """ + Create and store in memory a .zip archive containing files needed to + build this source package. + + 'src_dir_name' shall not contain any slashes ('/'). + + Return zipfile's sha256 sum's hexstring. + """ + tf = TemporaryFile() + source_dir_path = PurePosixPath(source_name) + piggybacked_dir_path = PurePosixPath(f'{source_name}.foreign-packages') + + with zipfile.ZipFile(tf, 'w') as zf: + for file_ref in self.files_by_path.values(): + if file_ref.include_in_source_archive: + zf.writestr(str(source_dir_path / file_ref.path), + file_ref.contents) + + for desired_path, real_path in piggybacked.archive_files(): + zf.writestr(str(piggybacked_dir_path / desired_path), + real_path.read_bytes()) + + tf.seek(0) + self.source_zip_contents = tf.read() + + return sha256(self.source_zip_contents).digest().hex() + + def _process_item(self, as_what: str, item_def: dict, + piggybacked: Piggybacked): + """ + Process 'item_def' as definition of a resource or mapping (determined by + 'as_what' param) and store in memory its processed form and files used + by it. + + Return a minimal item reference suitable for using in source + description. + """ + resulting_schema_version = [1] + + copy_props = ['identifier', 'long_name', 'description', + *filter(lambda p: p in item_def, ('comment', 'uuid'))] + + new_item_obj: dict = {} + + if as_what == 'resource': + item_list = self.resource_list + + copy_props.append('revision') + + script_file_refs = [self._process_file(f['file'], piggybacked) + for f in item_def.get('scripts', [])] + + deps = [{'identifier': res_ref['identifier']} + for res_ref in item_def.get('dependencies', [])] + + new_item_obj['dependencies'] = \ + [*piggybacked.resource_must_depend, *deps] + new_item_obj['scripts'] = script_file_refs + else: + item_list = self.mapping_list + + payloads = {} + for pat, res_ref in item_def.get('payloads', {}).items(): + payloads[pat] = {'identifier': res_ref['identifier']} + + new_item_obj['payloads'] = payloads + + new_item_obj['version'] = \ + versions.normalize_version(item_def['version']) + + if as_what == 'mapping' and item_def['type'] == "mapping_and_resource": + new_item_obj['version'].append(item_def['revision']) + + if self.source_schema_ver >= [2]: + # handle 'required_mappings' field + required = [{'identifier': map_ref['identifier']} + for map_ref in item_def.get('required_mappings', [])] + if required: + resulting_schema_version = max(resulting_schema_version, [2]) + new_item_obj['required_mappings'] = required + + # handle 'permissions' field + permissions = item_def.get('permissions', {}) + processed_permissions = {} + + if permissions.get('cors_bypass'): + processed_permissions['cors_bypass'] = True + if permissions.get('eval'): + processed_permissions['eval'] = True + + if processed_permissions: + new_item_obj['permissions'] = processed_permissions + resulting_schema_version = max(resulting_schema_version, [2]) + + # handle '{min,max}_haketilo_version' fields + for minmax, default in ('min', [1]), ('max', [65536]): + constraint = item_def.get(f'{minmax}_haketilo_version') + if constraint in (None, default): + continue + + copy_props.append(f'{minmax}_haketilo_version') + resulting_schema_version = max(resulting_schema_version, [2]) + + new_item_obj.update((p, item_def[p]) for p in copy_props) + + new_item_obj['$schema'] = ''.join([ + schemas_root, + f'/api_{as_what}_description', + '-', + versions.version_string(resulting_schema_version), + '.schema.json' + ]) + new_item_obj['type'] = as_what + new_item_obj['source_copyright'] = self.copyright_file_refs + new_item_obj['source_name'] = self.source_name + new_item_obj['generated_by'] = generated_by + + item_list.append(new_item_obj) + + props_in_ref = ('type', 'identifier', 'version', 'long_name') + return dict([(prop, new_item_obj[prop]) for prop in props_in_ref]) + + def _process_index_json(self, index_obj: dict, + major_schema_version: int) -> None: + """ + Process 'index_obj' as contents of source package's index.json and store + in memory this source package's zipfile as well as package's individual + files and computed definitions of the source package and items defined + in it. + """ + self.source_schema_ver = \ + versions.normalize_version(get_schema_version(index_obj)) + + out_schema = f'{schemas_root}/api_source_description-1.schema.json' + + self.source_name = index_obj['source_name'] + + generate_spdx = index_obj.get('reuse_generate_spdx_report', False) + if generate_spdx: + contents = generate_spdx_report(self.srcdir) + spdx_path = PurePosixPath('report.spdx') + spdx_ref = FileRef(spdx_path, contents) + + spdx_ref.include_in_source_archive = False + self.files_by_path[spdx_path] = spdx_ref + + piggyback_def = None + if self.source_schema_ver >= [2] and 'piggyback_on' in index_obj: + piggyback_def = index_obj['piggyback_on'] + + with piggybacked_system(piggyback_def, self.piggyback_files) \ + as piggybacked: + copyright_to_process = [ + *(file_ref['file'] for file_ref in index_obj['copyright']), + *piggybacked.package_license_files + ] + self.copyright_file_refs = [self._process_file(f, piggybacked) + for f in copyright_to_process] + + if generate_spdx and not spdx_ref.include_in_distribution: + raise FileReferenceError(_('report_spdx_not_in_copyright_list')) + + item_refs = [] + for item_def in index_obj['definitions']: + if 'mapping' in item_def['type']: + ref = self._process_item('mapping', item_def, piggybacked) + item_refs.append(ref) + if 'resource' in item_def['type']: + ref = self._process_item('resource', item_def, piggybacked) + item_refs.append(ref) + + for file_ref in index_obj.get('additional_files', []): + self._process_file(file_ref['file'], piggybacked, + include_in_distribution=False) + + zipfile_sha256 = self._prepare_source_package_zip\ + (self.source_name, piggybacked) + + source_archives_obj = {'zip' : {'sha256': zipfile_sha256}} + + self.source_description = { + '$schema': out_schema, + 'source_name': self.source_name, + 'source_copyright': self.copyright_file_refs, + 'upstream_url': index_obj['upstream_url'], + 'definitions': item_refs, + 'source_archives': source_archives_obj, + 'generated_by': generated_by + } + + if 'comment' in index_obj: + self.source_description['comment'] = index_obj['comment'] + + def write_source_package_zip(self, dstpath: Path): + """ + Create a .zip archive containing files needed to build this source + package and write it at 'dstpath'. + """ + with open(dstpath, 'wb') as output: + output.write(self.source_zip_contents) + + def write_package_files(self, dstpath: Path): + """Write package files under 'dstpath' for distribution.""" + file_dir_path = (dstpath / 'file' / 'sha256').resolve() + file_dir_path.mkdir(parents=True, exist_ok=True) + + for file_ref in self.files_by_path.values(): + if file_ref.include_in_distribution: + file_path = file_dir_path / file_ref.contents_hash + file_path.write_bytes(file_ref.contents) + + source_dir_path = (dstpath / 'source').resolve() + source_dir_path.mkdir(parents=True, exist_ok=True) + source_name = self.source_description["source_name"] + + with open(source_dir_path / f'{source_name}.json', 'wt') as out_str: + json.dump(self.source_description, out_str) + + with open(source_dir_path / f'{source_name}.zip', 'wb') as out_bin: + out_bin.write(self.source_zip_contents) + + for item_type, item_list in [ + ('resource', self.resource_list), + ('mapping', self.mapping_list) + ]: + item_type_dir_path = (dstpath / item_type).resolve() + + for item_def in item_list: + item_dir_path = item_type_dir_path / item_def['identifier'] + item_dir_path.mkdir(parents=True, exist_ok=True) + + version = '.'.join([str(n) for n in item_def['version']]) + with open(item_dir_path / version, 'wt') as output: + json.dump(item_def, output) + +dir_type = click.Path(exists=True, file_okay=False, resolve_path=True) + +@click.command(help=_('build_package_from_srcdir_to_dstdir')) +@click.option('-s', '--srcdir', default='./', type=dir_type, show_default=True, + help=_('source_directory_to_build_from')) +@click.option('-i', '--index-json', default='index.json', type=click.Path(), + help=_('path_instead_of_index_json')) +@click.option('-p', '--piggyback-files', type=click.Path(), + help=_('path_instead_for_piggyback_files')) +@click.option('-d', '--dstdir', type=dir_type, required=True, + help=_('built_package_files_destination')) +@click.version_option(version=_version.version, prog_name='Hydrilla builder', + message=_('%(prog)s_%(version)s_license'), + help=_('version_printing')) +def perform(srcdir, index_json, piggyback_files, dstdir): + """ + Execute Hydrilla builder to turn source package into a distributable one. + + This command is meant to be the entry point of hydrilla-builder command + exported by this package. + """ + build = Build(Path(srcdir), Path(index_json), + piggyback_files and Path(piggyback_files)) + build.write_package_files(Path(dstdir)) diff --git a/src/hydrilla/builder/common_errors.py b/src/hydrilla/builder/common_errors.py new file mode 100644 index 0000000..ed4d0d2 --- /dev/null +++ b/src/hydrilla/builder/common_errors.py @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Error classes. +# +# This file is part of Hydrilla +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +This module defines error types for use in other parts of Hydrilla builder. +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +from pathlib import Path +from typing import Optional +from subprocess import CompletedProcess as CP + +from ..translations import smart_gettext as _ + +class DistroError(Exception): + """ + Exception used to report problems when resolving an OS distribution. + """ + +class FileReferenceError(Exception): + """ + Exception used to report various problems concerning files referenced from + source package. + """ + +class SubprocessError(Exception): + """ + Exception used to report problems related to execution of external + processes, includes. various problems when calling apt-* and dpkg-* + commands. + """ + def __init__(self, msg: str, cp: Optional[CP]=None) -> None: + """Initialize this SubprocessError""" + if cp and cp.stdout: + msg = '\n\n'.join([msg, _('STDOUT_OUTPUT_heading'), cp.stdout]) + + if cp and cp.stderr: + msg = '\n\n'.join([msg, _('STDERR_OUTPUT_heading'), cp.stderr]) + + super().__init__(msg) diff --git a/src/hydrilla/builder/local_apt.py b/src/hydrilla/builder/local_apt.py new file mode 100644 index 0000000..bdfc76f --- /dev/null +++ b/src/hydrilla/builder/local_apt.py @@ -0,0 +1,432 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Using a local APT. +# +# This file is part of Hydrilla +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +# Enable using with Python 3.7. +from __future__ import annotations + +import zipfile +import shutil +import re +import subprocess +CP = subprocess.CompletedProcess +from pathlib import Path, PurePosixPath +from tempfile import TemporaryDirectory, NamedTemporaryFile +from hashlib import sha256 +from urllib.parse import unquote +from contextlib import contextmanager +from typing import Optional, Iterable, Iterator + +from ..translations import smart_gettext as _ +from .piggybacking import Piggybacked +from .common_errors import * + +here = Path(__file__).resolve().parent + +""" +Default cache directory to save APT configurations and downloaded GPG keys in. +""" +default_apt_cache_dir = Path.home() / '.cache' / 'hydrilla' / 'builder' / 'apt' + +""" +Default keyserver to use. +""" +default_keyserver = 'hkps://keyserver.ubuntu.com:443' + +""" +Default keys to download when using a local APT. +""" +default_keys = [ + # Trisquel + 'E6C27099CA21965B734AEA31B4EFB9F38D8AEBF1', + '60364C9869F92450421F0C22B138CA450C05112F', + # Ubuntu + '630239CC130E1A7FD81A27B140976EAF437D05B5', + '790BC7277767219C42C86F933B4FE6ACC0B21F32', + 'F6ECB3762474EDA9D21B7022871920D1991BC93C', + # Debian + '6D33866EDD8FFA41C0143AEDDCC9EFBF77E11517', + '80D15823B7FD1561F9F7BCDDDC30D7C23CBBABEE', + 'AC530D520F2F3269F5E98313A48449044AAD5C5D' +] + +"""sources.list file contents for known distros.""" +default_lists = { + 'nabia': [f'{type} http://archive.trisquel.info/trisquel/ nabia{suf} main' + for type in ('deb', 'deb-src') + for suf in ('', '-updates', '-security')] +} + +class GpgError(Exception): + """ + Exception used to report various problems when calling GPG. + """ + +class AptError(SubprocessError): + """ + Exception used to report various problems when calling apt-* and dpkg-* + commands. + """ + +def run(command, **kwargs): + """A wrapped around subprocess.run that sets some default options.""" + return subprocess.run(command, **kwargs, env={'LANG': 'en_US'}, + capture_output=True, text=True) + +class Apt: + """ + This class represents an APT instance and can be used to call apt-get + commands with it. + """ + def __init__(self, apt_conf: str) -> None: + """Initialize this Apt object.""" + self.apt_conf = apt_conf + + def get(self, *args: str, **kwargs) -> CP: + """ + Run apt-get with the specified arguments and raise a meaningful AptError + when something goes wrong. + """ + command = ['apt-get', '-c', self.apt_conf, *args] + try: + cp = run(command, **kwargs) + except FileNotFoundError: + msg = _('couldnt_execute_{}_is_it_installed').format('apt-get') + raise AptError(msg) + + if cp.returncode != 0: + msg = _('command_{}_failed').format(' '.join(command)) + raise AptError(msg, cp) + + return cp + +def cache_dir() -> Path: + """ + Return the directory used to cache data (APT configurations, keyrings) to + speed up repeated operations. + + This function first ensures the directory exists. + """ + default_apt_cache_dir.mkdir(parents=True, exist_ok=True) + return default_apt_cache_dir + +class SourcesList: + """Representation of apt's sources.list contents.""" + def __init__(self, list: list[str]=[], + codename: Optional[str]=None) -> None: + """Initialize this SourcesList.""" + self.codename = None + self.list = [*list] + self.has_extra_entries = bool(self.list) + + if codename is not None: + if codename not in default_lists: + raise DistroError(_('distro_{}_unknown').format(codename)) + + self.codename = codename + self.list.extend(default_lists[codename]) + + def identity(self) -> str: + """ + Produce a string that uniquely identifies this sources.list contents. + """ + if self.codename and not self.has_extra_entries: + return self.codename + + return sha256('\n'.join(sorted(self.list)).encode()).digest().hex() + +def apt_conf(directory: Path) -> str: + """ + Given local APT's directory, produce a configuration suitable for running + APT there. + + 'directory' must not contain any special characters including quotes and + spaces. + """ + return f''' +Architecture "amd64"; +Dir "{directory}"; +Dir::State "{directory}/var/lib/apt"; +Dir::State::status "{directory}/var/lib/dpkg/status"; +Dir::Etc::SourceList "{directory}/etc/apt.sources.list"; +Dir::Etc::SourceParts ""; +Dir::Cache "{directory}/var/cache/apt"; +pkgCacheGen::Essential "none"; +Dir::Etc::Trusted "{directory}/etc/trusted.gpg"; +''' + +def apt_keyring(keys: list[str]) -> bytes: + """ + Download the requested keys if necessary and export them as a keyring + suitable for passing to APT. + + The keyring is returned as a bytes value that should be written to a file. + """ + try: + from gnupg import GPG # type: ignore + except ModuleNotFoundError: + raise GpgError(_('couldnt_import_{}_is_it_installed').format('gnupg')) + + gpg = GPG(keyring=str(cache_dir() / 'master_keyring.gpg')) + for key in keys: + if gpg.list_keys(keys=[key]) != []: + continue + + if gpg.recv_keys(default_keyserver, key).imported == 0: + raise GpgError(_('gpg_couldnt_recv_key_{}').format(key)) + + return gpg.export_keys(keys, armor=False, minimal=True) + +def cache_apt_root(apt_root: Path, destination_zip: Path) -> None: + """ + Zip an APT root directory for later use and move the zipfile to the + requested destination. + """ + temporary_zip_path = None + try: + tmpfile = NamedTemporaryFile(suffix='.zip', prefix='tmp_', + dir=cache_dir(), delete=False) + temporary_zip_path = Path(tmpfile.name) + + to_skip = {Path('etc') / 'apt.conf', Path('etc') / 'trusted.gpg'} + + with zipfile.ZipFile(tmpfile, 'w') as zf: + for member in apt_root.rglob('*'): + relative = member.relative_to(apt_root) + if relative not in to_skip: + # This call will also properly add empty folders to zip file + zf.write(member, relative, zipfile.ZIP_DEFLATED) + + shutil.move(temporary_zip_path, destination_zip) + finally: + if temporary_zip_path is not None and temporary_zip_path.exists(): + temporary_zip_path.unlink() + +def setup_local_apt(directory: Path, list: SourcesList, keys: list[str]) -> Apt: + """ + Create files and directories necessary for running APT without root rights + inside 'directory'. + + 'directory' must not contain any special characters including quotes and + spaces and must be empty. + + Return an Apt object that can be used to call apt-get commands. + """ + apt_root = directory / 'apt_root' + + conf_text = apt_conf(apt_root) + keyring_bytes = apt_keyring(keys) + + apt_zipfile = cache_dir() / f'apt_{list.identity()}.zip' + if apt_zipfile.exists(): + with zipfile.ZipFile(apt_zipfile) as zf: + zf.extractall(apt_root) + + for to_create in ( + apt_root / 'var' / 'lib' / 'apt' / 'partial', + apt_root / 'var' / 'lib' / 'apt' / 'lists', + apt_root / 'var' / 'cache' / 'apt' / 'archives' / 'partial', + apt_root / 'etc' / 'apt' / 'preferences.d', + apt_root / 'var' / 'lib' / 'dpkg', + apt_root / 'var' / 'log' / 'apt' + ): + to_create.mkdir(parents=True, exist_ok=True) + + conf_path = apt_root / 'etc' / 'apt.conf' + trusted_path = apt_root / 'etc' / 'trusted.gpg' + status_path = apt_root / 'var' / 'lib' / 'dpkg' / 'status' + list_path = apt_root / 'etc' / 'apt.sources.list' + + conf_path.write_text(conf_text) + trusted_path.write_bytes(keyring_bytes) + status_path.touch() + list_path.write_text('\n'.join(list.list)) + + apt = Apt(str(conf_path)) + apt.get('update') + + cache_apt_root(apt_root, apt_zipfile) + + return apt + +@contextmanager +def local_apt(list: SourcesList, keys: list[str]) -> Iterator[Apt]: + """ + Create a temporary directory with proper local APT configuration in it. + Yield an Apt object that can be used to issue apt-get commands. + + This function returns a context manager that will remove the directory on + close. + """ + with TemporaryDirectory() as td_str: + td = Path(td_str) + yield setup_local_apt(td, list, keys) + +def download_apt_packages(list: SourcesList, keys: list[str], + packages: list[str], destination_dir: Path, + with_deps: bool) -> list[str]: + """ + Set up a local APT, update it using the specified sources.list configuration + and use it to download the specified packages. + + This function downloads .deb files of packages matching the amd64 + architecture (which includes packages with architecture 'all') as well as + all their corresponding source package files and (if requested) the debs + and source files of all their declared dependencies. + + Return value is a list of names of all downloaded files. + """ + install_line_regex = re.compile(r'^Inst (?P\S+) \((?P\S+) ') + + with local_apt(list, keys) as apt: + if with_deps: + cp = apt.get('install', '--yes', '--just-print', *packages) + + lines = cp.stdout.split('\n') + matches = [install_line_regex.match(l) for l in lines] + packages = [f'{m.group("name")}={m.group("version")}' + for m in matches if m] + + if not packages: + raise AptError(_('apt_install_output_not_understood'), cp) + + # Download .debs to indirectly to destination_dir by first placing them + # in a temporary subdirectory. + with TemporaryDirectory(dir=destination_dir) as td_str: + td = Path(td_str) + cp = apt.get('download', *packages, cwd=td) + + deb_name_regex = re.compile( + r''' + ^ + (?P[^_]+) + _ + (?P[^_]+) + _ + .+ # architecture (or 'all') + \.deb + $ + ''', + re.VERBOSE) + + names_vers = [] + downloaded = [] + for deb_file in td.iterdir(): + match = deb_name_regex.match(deb_file.name) + if match is None: + msg = _('apt_download_gave_bad_filename_{}')\ + .format(deb_file.name) + raise AptError(msg, cp) + + names_vers.append(( + unquote(match.group('name')), + unquote(match.group('ver')) + )) + downloaded.append(deb_file.name) + + apt.get('source', '--download-only', + *[f'{n}={v}' for n, v in names_vers], cwd=td) + + for source_file in td.iterdir(): + if source_file.name in downloaded: + continue + + downloaded.append(source_file.name) + + for filename in downloaded: + shutil.move(td / filename, destination_dir / filename) + + return downloaded + +@contextmanager +def piggybacked_system(piggyback_def: dict, foreign_packages: Optional[Path]) \ + -> Iterator[Piggybacked]: + """ + Resolve resources from APT. Optionally, use package files (.deb's, etc.) + from a specified directory instead of resolving and downloading them. + + The directories and files created for the yielded Piggybacked object shall + be deleted when this context manager gets closed. + """ + assert piggyback_def['system'] == 'apt' + + with TemporaryDirectory() as td_str: + td = Path(td_str) + root = td / 'root' + root.mkdir() + + if foreign_packages is None: + archives = td / 'archives' + archives.mkdir() + else: + archives = foreign_packages / 'apt' + archives.mkdir(exist_ok=True) + + if [*archives.glob('*.deb')] == []: + sources_list = SourcesList(piggyback_def.get('sources_list', []), + piggyback_def.get('distribution')) + packages = piggyback_def['packages'] + with_deps = piggyback_def['dependencies'] + pgp_keys = [ + *default_keys, + *piggyback_def.get('trusted_keys', []) + ] + + download_apt_packages( + list=sources_list, + keys=pgp_keys, + packages=packages, + destination_dir=archives, + with_deps=with_deps + ) + + for deb in archives.glob('*.deb'): + command = ['dpkg-deb', '-x', str(deb), str(root)] + try: + cp = run(command) + except FileNotFoundError: + msg = _('couldnt_execute_{}_is_it_installed'.format('dpkg-deb')) + raise AptError(msg) + + if cp.returncode != 0: + msg = _('command_{}_failed').format(' '.join(command)) + raise AptError(msg, cp) + + docs_dir = root / 'usr' / 'share' / 'doc' + copyright_paths = [p / 'copyright' for p in docs_dir.iterdir()] \ + if docs_dir.exists() else [] + copyright_pure_paths = [PurePosixPath('.apt-root') / p.relative_to(root) + for p in copyright_paths if p.exists()] + + standard_depends = piggyback_def.get('depend_on_base_packages', True) + must_depend = [{'identifier': 'apt-common-licenses'}] \ + if standard_depends else [] + + yield Piggybacked( + archives={'apt': archives}, + roots={'.apt-root': root}, + package_license_files=copyright_pure_paths, + resource_must_depend=must_depend + ) diff --git a/src/hydrilla/builder/piggybacking.py b/src/hydrilla/builder/piggybacking.py new file mode 100644 index 0000000..5813509 --- /dev/null +++ b/src/hydrilla/builder/piggybacking.py @@ -0,0 +1,117 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Handling of software packaged for other distribution systems. +# +# This file is part of Hydrilla +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +This module contains definitions that may be reused by multiple piggybacked +software system backends. +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +from pathlib import Path, PurePosixPath +from typing import Optional, Iterable + +from ..translations import smart_gettext as _ +from .common_errors import * + +here = Path(__file__).resolve().parent + +class Piggybacked: + """ + Store information about foreign resources in use. + + Public attributes: + 'resource_must_depend' (read-only) + 'package_license_files' (read-only) + """ + def __init__(self, archives: dict[str, Path]={}, roots: dict[str, Path]={}, + package_license_files: list[PurePosixPath]=[], + resource_must_depend: list[dict]=[]): + """ + Initialize this Piggybacked object. + + 'archives' maps piggybacked system names to directories that contain + package(s)' archive files. An 'archives' object may look like + {'apt': PosixPath('/path/to/dir/with/debs/and/tarballs')}. + + 'roots' associates directory names to be virtually inserted under + Hydrilla source package directory with paths to real filesystem + directories that hold their desired contents, i.e. unpacked foreign + packages. + + 'package_license_files' lists paths to license files that should be + included with the Haketilo package that will be produced. The paths are + to be resolved using 'roots' dictionary. + + 'resource_must_depend' lists names of Haketilo packages that the + produced resources will additionally depend on. This is meant to help + distribute common licenses with a separate Haketilo package. + """ + self.archives = archives + self.roots = roots + self.package_license_files = package_license_files + self.resource_must_depend = resource_must_depend + + def resolve_file(self, file_ref_name: PurePosixPath) -> Optional[Path]: + """ + 'file_ref_name' is a path as may appear in an index.json file. Check if + the file belongs to one of the roots we have and return either a path + to the relevant file under this root or None. + + It is not being checked whether the file actually exists in the + filesystem. + """ + parts = file_ref_name.parts + if not parts: + return None + + root_path = self.roots.get(parts[0]) + if root_path is None: + return None + + path = root_path + + for part in parts[1:]: + path = path / part + + path = path.resolve() + + if not path.is_relative_to(root_path): + raise FileReferenceError(_('loading_{}_outside_piggybacked_dir') + .format(file_ref_name)) + + return path + + def archive_files(self) -> Iterable[tuple[PurePosixPath, Path]]: + """ + Yield all archive files in use. Each yielded tuple holds file's desired + path relative to the piggybacked archives directory to be created and + its current real path. + """ + for system, real_dir in self.archives.items(): + for path in real_dir.rglob('*'): + yield PurePosixPath(system) / path.relative_to(real_dir), path diff --git a/src/hydrilla/exceptions.py b/src/hydrilla/exceptions.py new file mode 100644 index 0000000..112d98c --- /dev/null +++ b/src/hydrilla/exceptions.py @@ -0,0 +1,40 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Custom exceptions and logging. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2021, 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +This module contains utilities for reading and validation of JSON instances. +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +class HaketiloException(Exception): + """ + Type used for exceptions generated by Haketilo code. Instances of this type + are expected to have their error messages localized. + can + """ + pass diff --git a/src/hydrilla/item_infos.py b/src/hydrilla/item_infos.py new file mode 100644 index 0000000..c366ab5 --- /dev/null +++ b/src/hydrilla/item_infos.py @@ -0,0 +1,344 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Reading resources, mappings and other JSON documents from the filesystem. +# +# This file is part of Hydrilla&Haketilo +# +# Copyright (C) 2021, 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +..... +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +import typing as t +import dataclasses as dc + +from pathlib import Path, PurePath + +from immutables import Map, MapMutation + +from . import versions, json_instances +from .url_patterns import parse_pattern, ParsedUrl +from .exceptions import HaketiloException +from .translations import smart_gettext as _ + +VerTuple = t.Tuple[int, ...] + +@dc.dataclass(frozen=True, unsafe_hash=True) +class ItemRef: + """....""" + identifier: str + +RefObjs = t.Sequence[t.Mapping[str, t.Any]] + +def make_item_refs_seq(ref_objs: RefObjs) -> tuple[ItemRef, ...]: + """....""" + return tuple(ItemRef(ref['identifier']) for ref in ref_objs) + +def make_required_mappings(refs_objs: t.Any, schema_compat: int) \ + -> tuple[ItemRef, ...]: + """....""" + if schema_compat < 2: + return () + + return make_item_refs_seq(refs_objs) + +@dc.dataclass(frozen=True, unsafe_hash=True) +class FileRef: + """....""" + name: str + sha256: str + +def make_file_refs_seq(ref_objs: RefObjs) -> tuple[FileRef, ...]: + """....""" + return tuple(FileRef(ref['file'], ref['sha256']) for ref in ref_objs) + +@dc.dataclass(frozen=True, unsafe_hash=True) +class GeneratedBy: + """....""" + name: str + version: t.Optional[str] + + @staticmethod + def make(generated_obj: t.Optional[t.Mapping[str, t.Any]]) -> \ + t.Optional['GeneratedBy']: + """....""" + if generated_obj is None: + return None + + return GeneratedBy( + name = generated_obj['name'], + version = generated_obj.get('version') + ) + +@dc.dataclass(frozen=True, unsafe_hash=True) +class ItemInfoBase: + """....""" + repository: str # repository used in __hash__() + source_name: str = dc.field(hash=False) + source_copyright: tuple[FileRef, ...] = dc.field(hash=False) + version: VerTuple # version used in __hash__() + identifier: str # identifier used in __hash__() + uuid: t.Optional[str] = dc.field(hash=False) + long_name: str = dc.field(hash=False) + required_mappings: tuple[ItemRef, ...] = dc.field(hash=False) + generated_by: t.Optional[GeneratedBy] = dc.field(hash=False) + + def path_relative_to_type(self) -> str: + """ + Get a relative path to this item's JSON definition with respect to + directory containing items of this type. + """ + return f'{self.identifier}/{versions.version_string(self.version)}' + + def path(self) -> str: + """ + Get a relative path to this item's JSON definition with respect to + malcontent directory containing loadable items. + """ + return f'{self.type_name}/{self.path_relative_to_type()}' + + @property + def versioned_identifier(self): + """....""" + return f'{self.identifier}-{versions.version_string(self.version)}' + + @staticmethod + def _get_base_init_kwargs( + item_obj: t.Mapping[str, t.Any], + schema_compat: int, + repository: str + ) -> t.Mapping[str, t.Any]: + """....""" + source_copyright = make_file_refs_seq(item_obj['source_copyright']) + + version = versions.normalize_version(item_obj['version']) + + required_mappings = make_required_mappings( + item_obj.get('required_mappings', []), + schema_compat + ) + + generated_by = GeneratedBy.make(item_obj.get('generated_by')) + + return Map( + repository = repository, + source_name = item_obj['source_name'], + source_copyright = source_copyright, + version = version, + identifier = item_obj['identifier'], + uuid = item_obj.get('uuid'), + long_name = item_obj['long_name'], + required_mappings = required_mappings, + generated_by = generated_by + ) + + # class property + type_name = '!INVALID!' + +InstanceOrPath = t.Union[Path, str, dict[str, t.Any]] + +@dc.dataclass(frozen=True, unsafe_hash=True) +class ResourceInfo(ItemInfoBase): + """....""" + revision: int = dc.field(hash=False) + dependencies: tuple[ItemRef, ...] = dc.field(hash=False) + scripts: tuple[FileRef, ...] = dc.field(hash=False) + + @property + def versioned_identifier(self): + """....""" + return f'{super().versioned_identifier()}-{self.revision}' + + @staticmethod + def make( + item_obj: t.Mapping[str, t.Any], + schema_compat: int, + repository: str + ) -> 'ResourceInfo': + """....""" + base_init_kwargs = ItemInfoBase._get_base_init_kwargs( + item_obj, + schema_compat, + repository + ) + + return ResourceInfo( + **base_init_kwargs, + + revision = item_obj['revision'], + dependencies = make_item_refs_seq(item_obj.get('dependencies', [])), + scripts = make_file_refs_seq(item_obj.get('scripts', [])), + ) + + @staticmethod + def load(instance_or_path: 'InstanceOrPath', repository: str) \ + -> 'ResourceInfo': + """....""" + return _load_item_info(ResourceInfo, instance_or_path, repository) + + # class property + type_name = 'resource' + +def make_payloads(payloads_obj: t.Mapping[str, t.Any]) \ + -> t.Mapping[ParsedUrl, ItemRef]: + """....""" + mapping: list[tuple[ParsedUrl, ItemRef]] = [] + + for pattern, ref_obj in payloads_obj.items(): + ref = ItemRef(ref_obj['identifier']) + mapping.extend((parsed, ref) for parsed in parse_pattern(pattern)) + + return Map(mapping) + +@dc.dataclass(frozen=True, unsafe_hash=True) +class MappingInfo(ItemInfoBase): + """....""" + payloads: t.Mapping[ParsedUrl, ItemRef] = dc.field(hash=False) + + @staticmethod + def make( + item_obj: t.Mapping[str, t.Any], + schema_compat: int, + repository: str + ) -> 'MappingInfo': + """....""" + base_init_kwargs = ItemInfoBase._get_base_init_kwargs( + item_obj, + schema_compat, + repository + ) + + return MappingInfo( + **base_init_kwargs, + + payloads = make_payloads(item_obj.get('payloads', {})) + ) + + @staticmethod + def load(instance_or_path: 'InstanceOrPath', repository: str) \ + -> 'MappingInfo': + """....""" + return _load_item_info(MappingInfo, instance_or_path, repository) + + # class property + type_name = 'mapping' + + +LoadedType = t.TypeVar('LoadedType', ResourceInfo, MappingInfo) + +def _load_item_info( + info_type: t.Type[LoadedType], + instance_or_path: InstanceOrPath, + repository: str +) -> LoadedType: + """Read, validate and autocomplete a mapping/resource description.""" + instance = json_instances.read_instance(instance_or_path) + + schema_fmt = f'api_{info_type.type_name}_description-{{}}.schema.json' + + schema_compat = json_instances.validate_instance(instance, schema_fmt) + + # We know from successful validation that instance is a dict. + return info_type.make( + t.cast('dict[str, t.Any]', instance), + schema_compat, + repository + ) + + +VersionedType = t.TypeVar('VersionedType', ResourceInfo, MappingInfo) + +@dc.dataclass(frozen=True) +class VersionedItemInfo(t.Generic[VersionedType]): + """Stores data of multiple versions of given resource/mapping.""" + uuid: t.Optional[str] = None + identifier: str = '' + _by_version: Map[VerTuple, VersionedType] = Map() + _initialized: bool = False + + def register(self, item_info: VersionedType) -> 'VersionedInfoSelfType': + """ + Make item info queryable by version. Perform sanity checks for uuid. + """ + identifier = item_info.identifier + if self._initialized: + assert identifier == self.identifier + + if self.uuid is not None: + uuid: t.Optional[str] = self.uuid + if item_info.uuid is not None and self.uuid != item_info.uuid: + raise HaketiloException(_('uuid_mismatch_{identifier}') + .format(identifier=identifier)) + else: + uuid = item_info.uuid + + by_version = self._by_version.set(item_info.version, item_info) + + return VersionedItemInfo( + identifier = identifier, + uuid = uuid, + _by_version = by_version, + _initialized = True + ) + + def unregister(self, version: VerTuple) -> 'VersionedInfoSelfType': + """....""" + try: + by_version = self._by_version.delete(version) + except KeyError: + by_version = self._by_version + + return dc.replace(self, _by_version=by_version) + + def is_empty(self) -> bool: + """....""" + return len(self._by_version) == 0 + + def newest_version(self) -> VerTuple: + """....""" + assert not self.is_empty() + + return max(self._by_version.keys()) + + def get_newest(self) -> VersionedType: + """Find and return info of the newest version of item.""" + newest = self._by_version[self.newest_version()] + assert newest is not None + return newest + + def get_by_ver(self, ver: t.Iterable[int]) -> t.Optional[VersionedType]: + """ + Find and return info of the specified version of the item (or None if + absent). + """ + return self._by_version.get(tuple(ver)) + + def get_all(self) -> t.Iterator[VersionedType]: + """Generate item info for all its versions, from oldest ot newest.""" + for version in sorted(self._by_version.keys()): + yield self._by_version[version] + +# Below we define 1 type used by recursively-typed VersionedItemInfo. +VersionedInfoSelfType = VersionedItemInfo[VersionedType] diff --git a/src/hydrilla/json_instances.py b/src/hydrilla/json_instances.py new file mode 100644 index 0000000..40b213b --- /dev/null +++ b/src/hydrilla/json_instances.py @@ -0,0 +1,207 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Handling JSON objects. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2021, 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +This module contains utilities for reading and validation of JSON instances. +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +import re +import json +import os +import typing as t + +from pathlib import Path, PurePath + +from jsonschema import RefResolver, Draft7Validator # type: ignore + +from .translations import smart_gettext as _ +from .exceptions import HaketiloException +from .versions import parse_version + +here = Path(__file__).resolve().parent + +_strip_comment_re = re.compile(r''' +^ # match from the beginning of each line +( # catch the part before '//' comment + (?: # this group matches either a string or a single out-of-string character + [^"/] | + " + (?: # this group matches any in-a-string character + [^"\\] | # match any normal character + \\[^u] | # match any escaped character like '\f' or '\n' + \\u[a-fA-F0-9]{4} # match an escape + )* + " + )* +) +# expect either end-of-line or a comment: +# * unterminated strings will cause matching to fail +# * bad comment (with '/' instead of '//') will be indicated by second group +# having length 1 instead of 2 or 0 +(//?|$) +''', re.VERBOSE) + +def strip_json_comments(text: str) -> str: + """ + Accept JSON text with optional C++-style ('//') comments and return the text + with comments removed. Consecutive slashes inside strings are handled + properly. A spurious single slash ('/') shall generate an error. Errors in + JSON itself shall be ignored. + """ + stripped_text = [] + for line_num, line in enumerate(text.split('\n'), start=1): + match = _strip_comment_re.match(line) + + if match is None: # unterminated string + # ignore this error, let the json module report it + stripped = line + elif len(match[2]) == 1: + msg_fmt = _('bad_json_comment_line_{line_num}_char_{char_num}') + + raise HaketiloException(msg_fmt.format( + line_num = line_num, + char_num = len(match[1]) + 1 + )) + else: + stripped = match[1] + + stripped_text.append(stripped) + + return '\n'.join(stripped_text) + +_schema_name_re = re.compile(r''' +(?P[^/]*) +- +(?P + (?P[1-9][0-9]*) + (?: # this repeated group matches the remaining version numbers + \. + (?:[1-9][0-9]*|0) + )* +) +\.schema\.json +$ +''', re.VERBOSE) + +schema_paths: dict[str, Path] = {} +for path in (here / 'schemas').rglob('*.schema.json'): + match = _schema_name_re.match(path.name) + assert match is not None + + schema_name_base = match.group('name_base') + schema_ver_list = match.group('ver').split('.') + + for i in range(len(schema_ver_list)): + schema_ver = '.'.join(schema_ver_list[:i+1]) + schema_paths[f'{schema_name_base}-{schema_ver}.schema.json'] = path + +schema_paths.update([(f'https://hydrilla.koszko.org/schemas/{name}', path) + for name, path in schema_paths.items()]) + +schemas: dict[Path, dict[str, t.Any]] = {} + +def _get_schema(schema_name: str) -> dict[str, t.Any]: + """Return loaded JSON of the requested schema. Cache results.""" + path = schema_paths.get(schema_name) + if path is None: + raise HaketiloException(_('unknown_schema_{}').format(schema_name)) + + if path not in schemas: + schemas[path] = json.loads(path.read_text()) + + return schemas[path] + +def validator_for(schema: t.Union[str, dict[str, t.Any]]) -> Draft7Validator: + """ + Prepare a validator for the provided schema. + + Other schemas under '../schemas' can be referenced. + """ + if isinstance(schema, str): + schema = _get_schema(schema) + + resolver = RefResolver( + base_uri=schema['$id'], + referrer=schema, + handlers={'https': _get_schema} + ) + + return Draft7Validator(schema, resolver=resolver) + +def parse_instance(text: str) -> object: + """Parse 'text' as JSON with additional '//' comments support.""" + return json.loads(strip_json_comments(text)) + +InstanceOrPath = t.Union[Path, str, dict[str, t.Any]] + +def read_instance(instance_or_path: InstanceOrPath) -> object: + """....""" + if isinstance(instance_or_path, dict): + return instance_or_path + + with open(instance_or_path, 'rt') as handle: + text = handle.read() + + try: + return parse_instance(text) + except: + raise HaketiloException(_('text_in_{}_not_valid_json')\ + .format(instance_or_path)) + +def get_schema_version(instance: object) -> tuple[int, ...]: + """ + Parse passed object's "$schema" property and return the schema version tuple. + """ + ver_str: t.Optional[str] = None + + if isinstance(instance, dict) and type(instance.get('$schema')) is str: + match = _schema_name_re.search(instance['$schema']) + ver_str = match.group('ver') if match else None + + if ver_str is not None: + return parse_version(ver_str) + else: + raise HaketiloException(_('no_schema_number_in_instance')) + +def get_schema_major_number(instance: object) -> int: + """ + Parse passed object's "$schema" property and return the major number of + schema version. + """ + return get_schema_version(instance)[0] + +def validate_instance(instance: object, schema_name_fmt: str) -> int: + """....""" + major = get_schema_major_number(instance) + schema_name = schema_name_fmt.format(major) + validator = validator_for(schema_name) + + validator.validate(instance) + + return major diff --git a/src/hydrilla/locales/en_US/LC_MESSAGES/messages.po b/src/hydrilla/locales/en_US/LC_MESSAGES/messages.po new file mode 100644 index 0000000..12abee5 --- /dev/null +++ b/src/hydrilla/locales/en_US/LC_MESSAGES/messages.po @@ -0,0 +1,252 @@ +# SPDX-License-Identifier: CC0-1.0 +# +# English (United States) translations for hydrilla. +# Copyright (C) 2021, 2022 Wojtek Kosior +# Available under the terms of Creative Commons Zero v1.0 Universal. +msgid "" +msgstr "" +"Project-Id-Version: hydrilla 2.0\n" +"Report-Msgid-Bugs-To: koszko@koszko.org\n" +"POT-Creation-Date: 2022-06-07 10:23+0200\n" +"PO-Revision-Date: 2022-02-12 00:00+0000\n" +"Last-Translator: Wojtek Kosior \n" +"Language: en_US\n" +"Language-Team: en_US \n" +"Plural-Forms: nplurals=2; plural=(n != 1)\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.8.0\n" + +#: src/hydrilla/builder/build.py:83 src/hydrilla/builder/local_apt.py:116 +#: src/hydrilla/builder/local_apt.py:408 +msgid "couldnt_execute_{}_is_it_installed" +msgstr "Could not execute '{}'. Is the tool installed and reachable via PATH?" + +#: src/hydrilla/builder/build.py:87 src/hydrilla/builder/local_apt.py:120 +#: src/hydrilla/builder/local_apt.py:412 +msgid "command_{}_failed" +msgstr "The following command finished execution with a non-zero exit status: {}" + +#: src/hydrilla/builder/build.py:160 +msgid "unknown_schema_package_source_{}" +msgstr "" +"The provided JSON at '{}' does not use any of the known package source " +"JSON schemas." + +#: src/hydrilla/builder/build.py:196 +msgid "path_contains_double_dot_{}" +msgstr "" +"Attempt to load '{}' which includes a forbidden parent reference ('..') " +"in the path." + +#: src/hydrilla/builder/build.py:203 +msgid "loading_{}_outside_package_dir" +msgstr "Attempt to load '{}' which lies outside package source directory." + +#: src/hydrilla/builder/build.py:207 +msgid "loading_reserved_index_json" +msgstr "Attempt to load 'index.json' which is a reserved filename." + +#: src/hydrilla/builder/build.py:214 +msgid "referenced_file_{}_missing" +msgstr "Referenced file '{}' is missing." + +#: src/hydrilla/builder/build.py:396 +msgid "report_spdx_not_in_copyright_list" +msgstr "" +"Told to generate 'report.spdx' but 'report.spdx' is not listed among " +"copyright files. Refusing to proceed." + +#: src/hydrilla/builder/build.py:473 +msgid "build_package_from_srcdir_to_dstdir" +msgstr "" +"Build Hydrilla package from `scrdir` and write the resulting files under " +"`dstdir`." + +#: src/hydrilla/builder/build.py:475 +msgid "source_directory_to_build_from" +msgstr "Source directory to build from." + +#: src/hydrilla/builder/build.py:477 +msgid "path_instead_of_index_json" +msgstr "" +"Path to file to be processed instead of index.json (if not absolute, " +"resolved relative to srcdir)." + +#: src/hydrilla/builder/build.py:479 +msgid "path_instead_for_piggyback_files" +msgstr "" +"Path to a non-standard directory with foreign packages' archive files to " +"use." + +#: src/hydrilla/builder/build.py:481 +msgid "built_package_files_destination" +msgstr "Destination directory to write built package files to." + +#: src/hydrilla/builder/build.py:483 src/hydrilla/server/serve.py:582 +#: src/hydrilla/server/serve.py:604 src/hydrilla/server/serve.py:647 +#, python-format +msgid "%(prog)s_%(version)s_license" +msgstr "" +"%(prog)s %(version)s\n" +"Copyright (C) 2021,2022 Wojtek Kosior and contributors.\n" +"License GPLv3+: GNU AGPL version 3 or later " +"\n" +"This is free software: you are free to change and redistribute it.\n" +"There is NO WARRANTY, to the extent permitted by law." + +#: src/hydrilla/builder/build.py:484 src/hydrilla/server/serve.py:605 +#: src/hydrilla/server/serve.py:648 +msgid "version_printing" +msgstr "Print version information and exit." + +#: src/hydrilla/builder/common_errors.py:58 +msgid "STDOUT_OUTPUT_heading" +msgstr "## Command's standard output ##" + +#: src/hydrilla/builder/common_errors.py:61 +msgid "STDERR_OUTPUT_heading" +msgstr "## Command's standard error output ##" + +#: src/hydrilla/builder/local_apt.py:145 +msgid "distro_{}_unknown" +msgstr "Attempt to use an unknown software distribution '{}'." + +#: src/hydrilla/builder/local_apt.py:189 +msgid "couldnt_import_{}_is_it_installed" +msgstr "" +"Could not import '{}'. Is the module installed and visible to this Python" +" instance?" + +#: src/hydrilla/builder/local_apt.py:197 +msgid "gpg_couldnt_recv_key_{}" +msgstr "Could not import PGP key '{}'." + +#: src/hydrilla/builder/local_apt.py:311 +msgid "apt_install_output_not_understood" +msgstr "The output of an 'apt-get install' command was not understood." + +#: src/hydrilla/builder/local_apt.py:337 +msgid "apt_download_gave_bad_filename_{}" +msgstr "The 'apt-get download' command produced a file with unexpected name '{}'." + +#: src/hydrilla/builder/piggybacking.py:100 +msgid "loading_{}_outside_piggybacked_dir" +msgstr "" +"Attempt to load '{}' which lies outside piggybacked packages files root " +"directory." + +#: src/hydrilla/server/serve.py:126 +#, python-brace-format +msgid "uuid_mismatch_{identifier}" +msgstr "Two different uuids were specified for item '{identifier}'." + +#: src/hydrilla/server/serve.py:133 +#, python-brace-format +msgid "version_clash_{identifier}_{version}" +msgstr "Version '{version}' specified more than once for item '{identifier}'." + +#: src/hydrilla/server/serve.py:249 src/hydrilla/server/serve.py:261 +msgid "invalid_URL_{}" +msgstr "Invalid URL/pattern: '{}'." + +#: src/hydrilla/server/serve.py:253 +msgid "disallowed_protocol_{}" +msgstr "Disallowed protocol: '{}'." + +#: src/hydrilla/server/serve.py:306 +msgid "malcontent_dir_path_not_dir_{}" +msgstr "Provided 'malcontent_dir' path does not name a directory: {}" + +#: src/hydrilla/server/serve.py:325 +msgid "couldnt_load_item_from_{}" +msgstr "Couldn't load item from {}." + +#: src/hydrilla/server/serve.py:350 +msgid "item_{item}_in_file_{file}" +msgstr "Item {item} incorrectly present under {file}." + +#: src/hydrilla/server/serve.py:356 +msgid "item_version_{ver}_in_file_{file}" +msgstr "Item version {ver} incorrectly present under {file}." + +#: src/hydrilla/server/serve.py:379 +msgid "no_dep_{resource}_{ver}_{dep}" +msgstr "Unknown dependency '{dep}' of resource '{resource}', version '{ver}'." + +#: src/hydrilla/server/serve.py:390 +msgid "no_payload_{mapping}_{ver}_{payload}" +msgstr "Unknown payload '{payload}' of mapping '{mapping}', version '{ver}'." + +#: src/hydrilla/server/serve.py:402 +msgid "no_mapping_{required_by}_{ver}_{required}" +msgstr "Unknown mapping '{required}' required by '{required_by}', version '{ver}'." + +#: src/hydrilla/server/serve.py:429 +msgid "couldnt_register_{mapping}_{ver}_{pattern}" +msgstr "" +"Couldn't register mapping '{mapping}', version '{ver}' (pattern " +"'{pattern}')." + +#: src/hydrilla/server/serve.py:590 +msgid "serve_hydrilla_packages_explain_wsgi_considerations" +msgstr "" +"Serve Hydrilla packages.\n" +"\n" +"This command is meant to be a quick way to run a local or development " +"Hydrilla instance. For better performance, consider deployment using " +"WSGI." + +#: src/hydrilla/server/serve.py:593 +msgid "directory_to_serve_from_overrides_config" +msgstr "" +"Directory to serve files from. Overrides value from the config file (if " +"any)." + +#: src/hydrilla/server/serve.py:595 +msgid "project_url_to_display_overrides_config" +msgstr "" +"Project url to display on generated HTML pages. Overrides value from the " +"config file (if any)." + +#: src/hydrilla/server/serve.py:597 +msgid "tcp_port_to_listen_on_overrides_config" +msgstr "" +"TCP port number to listen on (0-65535). Overrides value from the config " +"file (if any)." + +#: src/hydrilla/server/serve.py:600 +msgid "path_to_config_file_explain_default" +msgstr "" +"Path to Hydrilla server configuration file (optional, by default Hydrilla" +" loads its own config file, which in turn tries to load " +"/etc/hydrilla/config.json)." + +#: src/hydrilla/server/serve.py:637 +msgid "config_option_{}_not_supplied" +msgstr "Missing configuration option '{}'." + +#: src/hydrilla/server/serve.py:641 +msgid "serve_hydrilla_packages_wsgi_help" +msgstr "" +"Serve Hydrilla packages.\n" +"\n" +"This program is a WSGI script that runs Hydrilla repository behind an " +"HTTP server like Apache2 or Nginx. You can configure Hydrilla through the" +" /etc/hydrilla/config.json file." + +#. 'hydrilla' as a title +#: src/hydrilla/server/templates/base.html:99 +#: src/hydrilla/server/templates/base.html:105 +msgid "hydrilla" +msgstr "Hydrilla" + +#: src/hydrilla/server/templates/index.html:29 +msgid "hydrilla_welcome" +msgstr "Welcome to Hydrilla!" + +#: src/hydrilla/util/_util.py:86 +msgid "bad_comment" +msgstr "bad comment" + diff --git a/src/hydrilla/locales/pl_PL/LC_MESSAGES/messages.po b/src/hydrilla/locales/pl_PL/LC_MESSAGES/messages.po new file mode 100644 index 0000000..57cca4d --- /dev/null +++ b/src/hydrilla/locales/pl_PL/LC_MESSAGES/messages.po @@ -0,0 +1,258 @@ +# SPDX-License-Identifier: CC0-1.0 +# +# English (United States) translations for hydrilla. +# Copyright (C) 2021, 2022 Wojtek Kosior +# Available under the terms of Creative Commons Zero v1.0 Universal. +msgid "" +msgstr "" +"Project-Id-Version: hydrilla 2.0\n" +"Report-Msgid-Bugs-To: koszko@koszko.org\n" +"POT-Creation-Date: 2022-06-07 10:23+0200\n" +"PO-Revision-Date: 2022-02-12 00:00+0000\n" +"Last-Translator: Wojtek Kosior \n" +"Language: en_US\n" +"Language-Team: en_US \n" +"Plural-Forms: nplurals=2; plural=(n != 1)\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.8.0\n" + +#: src/hydrilla/builder/build.py:83 src/hydrilla/builder/local_apt.py:116 +#: src/hydrilla/builder/local_apt.py:408 +msgid "couldnt_execute_{}_is_it_installed" +msgstr "Could not execute '{}'. Is the tool installed and reachable via PATH?" + +#: src/hydrilla/builder/build.py:87 src/hydrilla/builder/local_apt.py:120 +#: src/hydrilla/builder/local_apt.py:412 +msgid "command_{}_failed" +msgstr "The following command finished execution with a non-zero exit status: {}" + +#: src/hydrilla/builder/build.py:160 +msgid "unknown_schema_package_source_{}" +msgstr "" +"The provided JSON at '{}' does not use any of the known package source " +"JSON schemas." + +#: src/hydrilla/builder/build.py:196 +msgid "path_contains_double_dot_{}" +msgstr "" +"Attempt to load '{}' which includes a forbidden parent reference ('..') " +"in the path." + +#: src/hydrilla/builder/build.py:203 +msgid "loading_{}_outside_package_dir" +msgstr "Attempt to load '{}' which lies outside package source directory." + +#: src/hydrilla/builder/build.py:207 +msgid "loading_reserved_index_json" +msgstr "Attempt to load 'index.json' which is a reserved filename." + +#: src/hydrilla/builder/build.py:214 +msgid "referenced_file_{}_missing" +msgstr "Referenced file '{}' is missing." + +#: src/hydrilla/builder/build.py:396 +msgid "report_spdx_not_in_copyright_list" +msgstr "" +"Told to generate 'report.spdx' but 'report.spdx' is not listed among " +"copyright files. Refusing to proceed." + +#: src/hydrilla/builder/build.py:473 +msgid "build_package_from_srcdir_to_dstdir" +msgstr "" +"Build Hydrilla package from `scrdir` and write the resulting files under " +"`dstdir`." + +#: src/hydrilla/builder/build.py:475 +msgid "source_directory_to_build_from" +msgstr "Source directory to build from." + +#: src/hydrilla/builder/build.py:477 +msgid "path_instead_of_index_json" +msgstr "" +"Path to file to be processed instead of index.json (if not absolute, " +"resolved relative to srcdir)." + +#: src/hydrilla/builder/build.py:479 +msgid "path_instead_for_piggyback_files" +msgstr "" +"Path to a non-standard directory with foreign packages' archive files to " +"use." + +#: src/hydrilla/builder/build.py:481 +msgid "built_package_files_destination" +msgstr "Destination directory to write built package files to." + +#: src/hydrilla/builder/build.py:483 src/hydrilla/server/serve.py:582 +#: src/hydrilla/server/serve.py:604 src/hydrilla/server/serve.py:647 +#, python-format +msgid "%(prog)s_%(version)s_license" +msgstr "" +"%(prog)s %(version)s\n" +"Copyright (C) 2021,2022 Wojtek Kosior and contributors.\n" +"License GPLv3+: GNU AGPL version 3 or later " +"\n" +"This is free software: you are free to change and redistribute it.\n" +"There is NO WARRANTY, to the extent permitted by law." + +#: src/hydrilla/builder/build.py:484 src/hydrilla/server/serve.py:605 +#: src/hydrilla/server/serve.py:648 +msgid "version_printing" +msgstr "Print version information and exit." + +#: src/hydrilla/builder/common_errors.py:58 +msgid "STDOUT_OUTPUT_heading" +msgstr "## Command's standard output ##" + +#: src/hydrilla/builder/common_errors.py:61 +msgid "STDERR_OUTPUT_heading" +msgstr "## Command's standard error output ##" + +#: src/hydrilla/builder/local_apt.py:145 +msgid "distro_{}_unknown" +msgstr "Attempt to use an unknown software distribution '{}'." + +#: src/hydrilla/builder/local_apt.py:189 +msgid "couldnt_import_{}_is_it_installed" +msgstr "" +"Could not import '{}'. Is the module installed and visible to this Python" +" instance?" + +#: src/hydrilla/builder/local_apt.py:197 +msgid "gpg_couldnt_recv_key_{}" +msgstr "Could not import PGP key '{}'." + +#: src/hydrilla/builder/local_apt.py:311 +msgid "apt_install_output_not_understood" +msgstr "The output of an 'apt-get install' command was not understood." + +#: src/hydrilla/builder/local_apt.py:337 +msgid "apt_download_gave_bad_filename_{}" +msgstr "The 'apt-get download' command produced a file with unexpected name '{}'." + +#: src/hydrilla/builder/piggybacking.py:100 +msgid "loading_{}_outside_piggybacked_dir" +msgstr "" +"Attempt to load '{}' which lies outside piggybacked packages files root " +"directory." + +#: src/hydrilla/server/serve.py:126 +#, python-brace-format +msgid "uuid_mismatch_{identifier}" +msgstr "Two different uuids were specified for item '{identifier}'." + +#: src/hydrilla/server/serve.py:133 +#, python-brace-format +msgid "version_clash_{identifier}_{version}" +msgstr "Version '{version}' specified more than once for item '{identifier}'." + +#: src/hydrilla/server/serve.py:249 src/hydrilla/server/serve.py:261 +msgid "invalid_URL_{}" +msgstr "Invalid URL/pattern: '{}'." + +#: src/hydrilla/server/serve.py:253 +msgid "disallowed_protocol_{}" +msgstr "Disallowed protocol: '{}'." + +#: src/hydrilla/server/serve.py:306 +msgid "malcontent_dir_path_not_dir_{}" +msgstr "Provided 'malcontent_dir' path does not name a directory: {}" + +#: src/hydrilla/server/serve.py:325 +msgid "couldnt_load_item_from_{}" +msgstr "Couldn't load item from {}." + +#: src/hydrilla/server/serve.py:350 +msgid "item_{item}_in_file_{file}" +msgstr "Item {item} incorrectly present under {file}." + +#: src/hydrilla/server/serve.py:356 +msgid "item_version_{ver}_in_file_{file}" +msgstr "Item version {ver} incorrectly present under {file}." + +#: src/hydrilla/server/serve.py:379 +msgid "no_dep_{resource}_{ver}_{dep}" +msgstr "Unknown dependency '{dep}' of resource '{resource}', version '{ver}'." + +#: src/hydrilla/server/serve.py:390 +msgid "no_payload_{mapping}_{ver}_{payload}" +msgstr "Unknown payload '{payload}' of mapping '{mapping}', version '{ver}'." + +#: src/hydrilla/server/serve.py:402 +msgid "no_mapping_{required_by}_{ver}_{required}" +msgstr "Unknown mapping '{required}' required by '{required_by}', version '{ver}'." + +#: src/hydrilla/server/serve.py:429 +msgid "couldnt_register_{mapping}_{ver}_{pattern}" +msgstr "" +"Couldn't register mapping '{mapping}', version '{ver}' (pattern " +"'{pattern}')." + +#: src/hydrilla/server/serve.py:590 +msgid "serve_hydrilla_packages_explain_wsgi_considerations" +msgstr "" +"Serve Hydrilla packages.\n" +"\n" +"This command is meant to be a quick way to run a local or development " +"Hydrilla instance. For better performance, consider deployment using " +"WSGI." + +#: src/hydrilla/server/serve.py:593 +msgid "directory_to_serve_from_overrides_config" +msgstr "" +"Directory to serve files from. Overrides value from the config file (if " +"any)." + +#: src/hydrilla/server/serve.py:595 +msgid "project_url_to_display_overrides_config" +msgstr "" +"Project url to display on generated HTML pages. Overrides value from the " +"config file (if any)." + +#: src/hydrilla/server/serve.py:597 +msgid "tcp_port_to_listen_on_overrides_config" +msgstr "" +"TCP port number to listen on (0-65535). Overrides value from the config " +"file (if any)." + +#: src/hydrilla/server/serve.py:600 +msgid "path_to_config_file_explain_default" +msgstr "" +"Path to Hydrilla server configuration file (optional, by default Hydrilla" +" loads its own config file, which in turn tries to load " +"/etc/hydrilla/config.json)." + +#: src/hydrilla/server/serve.py:602 +msgid "language_to_use_overrides_config" +msgstr "" +"Language to use (also affects served HTML files). Overrides value from " +"the config file (if any)." + +#: src/hydrilla/server/serve.py:637 +msgid "config_option_{}_not_supplied" +msgstr "Missing configuration option '{}'." + +#: src/hydrilla/server/serve.py:641 +msgid "serve_hydrilla_packages_wsgi_help" +msgstr "" +"Serve Hydrilla packages.\n" +"\n" +"This program is a WSGI script that runs Hydrilla repository behind an " +"HTTP server like Apache2 or Nginx. You can configure Hydrilla through the" +" /etc/hydrilla/config.json file." + +#. 'hydrilla' as a title +#: src/hydrilla/server/templates/base.html:99 +#: src/hydrilla/server/templates/base.html:105 +msgid "hydrilla" +msgstr "Hydrilla po polsku" + +#: src/hydrilla/server/templates/index.html:29 +msgid "hydrilla_welcome" +msgstr "Welcome to Hydrilla!" + +#: src/hydrilla/util/_util.py:86 +msgid "bad_comment" +msgstr "bad comment" + diff --git a/src/hydrilla/mitmproxy_launcher/__main__.py b/src/hydrilla/mitmproxy_launcher/__main__.py new file mode 100644 index 0000000..f2ec78a --- /dev/null +++ b/src/hydrilla/mitmproxy_launcher/__main__.py @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior +# +# Available under the terms of Creative Commons Zero v1.0 Universal. + +import sys + +from . import launch + +launch.launch() diff --git a/src/hydrilla/mitmproxy_launcher/launch.py b/src/hydrilla/mitmproxy_launcher/launch.py new file mode 100644 index 0000000..c826598 --- /dev/null +++ b/src/hydrilla/mitmproxy_launcher/launch.py @@ -0,0 +1,77 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Code for starting mitmproxy +# +# This file is part of Hydrilla +# +# Copyright (C) 2021, 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + + +# We want to run mitmproxy with our script as an addon. A simple way would be to +# find something like a 'main' function in mitmproxy, import it and call here. +# Unfortunately, there is currently no guarantee that such function can be +# considered mitmproxy's stable programming API. For this reason we instead +# spawn a new process. + +import sys +import os +import subprocess as sp + +from pathlib import Path + +import click + +from .. import _version +from ..translations import smart_gettext as _ + +@click.command(help=_('cli_help.haketilo')) +@click.option('-p', '--port', default=8080, type=click.IntRange(0, 65535), + help=_('cli_opt.haketilo.port')) +@click.option('-d', '--directory', default='~/.haketilo/', + type=click.Path(file_okay=False), + help=_('cli_opt.haketilo.dir')) +@click.version_option(version=_version.version, prog_name='Haketilo proxy', + message=_('%(prog)s_%(version)s_license'), + help=_('cli_opt.haketilo.version')) +def launch(port: int, directory: str): + """ + .... + """ + directory_path = Path(os.path.expanduser(directory)).resolve() + + directory_path.mkdir(parents=True, exist_ok=True) + + script_path = directory_path / 'addon.py' + + script_path.write_text(''' +from hydrilla.mitmproxy_addon.addon import Haketilo + +addons = [Haketilo()] +''') + + code = sp.call(['mitmdump', + '-p', str(port), + '--set', f'confdir={directory_path / "mitmproxy"}' + '--set', 'upstream_cert=false', + '--set', f'haketilo_dir={directory_path}' + '--scripts', str(script_path)]) + + sys.exit(code) diff --git a/src/hydrilla/pattern_tree.py b/src/hydrilla/pattern_tree.py new file mode 100644 index 0000000..1128a06 --- /dev/null +++ b/src/hydrilla/pattern_tree.py @@ -0,0 +1,339 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Data structure for querying URL patterns. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2021, 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +This module defines data structures for querying data using URL patterns. +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +import sys +import typing as t +import dataclasses as dc + +from immutables import Map + +from .url_patterns import ParsedUrl, parse_url +from .translations import smart_gettext as _ + + +WrapperStoredType = t.TypeVar('WrapperStoredType', bound=t.Hashable) + +@dc.dataclass(frozen=True, unsafe_hash=True) +class StoredTreeItem(t.Generic[WrapperStoredType]): + """ + In the Pattern Tree, each item is stored together with the pattern used to + register it. + """ + pattern: ParsedUrl + item: WrapperStoredType + +# if sys.version_info >= (3, 8): +# CopyableType = t.TypeVar('CopyableType', bound='Copyable') + +# class Copyable(t.Protocol): +# """Certain classes in Pattern Tree depend on this interface.""" +# def copy(self: CopyableType) -> CopyableType: +# """Make a distinct instance with the same properties as this one.""" +# ... +# else: +# Copyable = t.Any + +NodeStoredType = t.TypeVar('NodeStoredType') + +@dc.dataclass(frozen=True) +class PatternTreeNode(t.Generic[NodeStoredType]): + """....""" + children: 'NodeChildrenType' = Map() + literal_match: t.Optional[NodeStoredType] = None + + def is_empty(self) -> bool: + """....""" + return len(self.children) == 0 and self.literal_match is None + + def update_literal_match( + self, + new_match_item: t.Optional[NodeStoredType] + ) -> 'NodeSelfType': + """....""" + return dc.replace(self, literal_match=new_match_item) + + def get_child(self, child_key: str) -> t.Optional['NodeSelfType']: + """....""" + return self.children.get(child_key) + + def remove_child(self, child_key: str) -> 'NodeSelfType': + """....""" + try: + children = self.children.delete(child_key) + except: + children = self.children + + return dc.replace(self, children=children) + + def set_child(self, child_key: str, child: 'NodeSelfType') \ + -> 'NodeSelfType': + """....""" + return dc.replace(self, children=self.children.set(child_key, child)) + +# Below we define 2 types used by recursively-typed PatternTreeNode. +NodeSelfType = PatternTreeNode[NodeStoredType] +NodeChildrenType = Map[str, NodeSelfType] + + +BranchStoredType = t.TypeVar('BranchStoredType') + +ItemUpdater = t.Callable[ + [t.Optional[BranchStoredType]], + t.Optional[BranchStoredType] +] + +@dc.dataclass(frozen=True) +class PatternTreeBranch(t.Generic[BranchStoredType]): + """....""" + root_node: PatternTreeNode[BranchStoredType] = PatternTreeNode() + + def is_empty(self) -> bool: + """....""" + return self.root_node.is_empty() + + # def copy(self) -> 'BranchSelfType': + # """....""" + # return dc.replace(self) + + def update(self, segments: t.Iterable[str], item_updater: ItemUpdater) \ + -> 'BranchSelfType': + """ + ....... + """ + node = self.root_node + nodes_segments = [] + + for segment in segments: + next_node = node.get_child(segment) + + nodes_segments.append((node, segment)) + + node = PatternTreeNode() if next_node is None else next_node + + node = node.update_literal_match(item_updater(node.literal_match)) + + while nodes_segments: + prev_node, segment = nodes_segments.pop() + + if node.is_empty(): + node = prev_node.remove_child(segment) + else: + node = prev_node.set_child(segment, node) + + return dc.replace(self, root_node=node) + + def search(self, segments: t.Sequence[str]) -> t.Iterable[BranchStoredType]: + """ + Yields all matches of this segments sequence against the tree. Results + are produced in order from greatest to lowest pattern specificity. + """ + nodes = [self.root_node] + + for segment in segments: + next_node = nodes[-1].get_child(segment) + if next_node is None: + break + + nodes.append(next_node) + + nsegments = len(segments) + cond_literal = lambda: len(nodes) == nsegments + cond_wildcard = [ + lambda: len(nodes) + 1 == nsegments and segments[-1] != '*', + lambda: len(nodes) + 1 < nsegments, + lambda: len(nodes) + 1 != nsegments or segments[-1] != '***' + ] + + while nodes: + node = nodes.pop() + + wildcard_matches = [node.get_child(wc) for wc in ('*', '**', '***')] + + for match_node, condition in [ + (node, cond_literal), + *zip(wildcard_matches, cond_wildcard) + ]: + if match_node is not None: + if match_node.literal_match is not None: + if condition(): + yield match_node.literal_match + +# Below we define 1 type used by recursively-typed PatternTreeBranch. +BranchSelfType = PatternTreeBranch[BranchStoredType] + + +FilterStoredType = t.TypeVar('FilterStoredType', bound=t.Hashable) +FilterWrappedType = StoredTreeItem[FilterStoredType] + +def filter_by_trailing_slash( + items: t.Iterable[FilterWrappedType], + with_slash: bool +) -> frozenset[FilterWrappedType]: + """....""" + return frozenset(wrapped for wrapped in items + if with_slash == wrapped.pattern.has_trailing_slash) + +TreeStoredType = t.TypeVar('TreeStoredType', bound=t.Hashable) + +StoredSet = frozenset[StoredTreeItem[TreeStoredType]] +PathBranch = PatternTreeBranch[StoredSet] +DomainBranch = PatternTreeBranch[PathBranch] +TreeRoot = Map[tuple[str, int], DomainBranch] + +@dc.dataclass(frozen=True) +class PatternTree(t.Generic[TreeStoredType]): + """ + "Pattern Tree" is how we refer to the data structure used for querying + Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal + is to make it possible to quickly retrieve all known patterns that match + a given URL. + """ + _by_scheme_and_port: TreeRoot = Map() + + def _register( + self, + parsed_pattern: ParsedUrl, + item: TreeStoredType, + register: bool = True + ) -> 'TreeSelfType': + """ + Make an item wrapped in StoredTreeItem object queryable through the + Pattern Tree by the given parsed URL pattern. + """ + wrapped_item = StoredTreeItem(parsed_pattern, item) + + def item_updater(item_set: t.Optional[StoredSet]) \ + -> t.Optional[StoredSet]: + """....""" + if item_set is None: + item_set = frozenset() + + if register: + item_set = item_set.union((wrapped_item,)) + else: + item_set = item_set.difference((wrapped_item,)) + + return None if len(item_set) == 0 else item_set + + def path_branch_updater(path_branch: t.Optional[PathBranch]) \ + -> t.Optional[PathBranch]: + """....""" + if path_branch is None: + path_branch = PatternTreeBranch() + + path_branch = path_branch.update( + parsed_pattern.path_segments, + item_updater + ) + + return None if path_branch.is_empty() else path_branch + + key = (parsed_pattern.scheme, parsed_pattern.port) + domain_tree = self._by_scheme_and_port.get(key, PatternTreeBranch()) + + new_domain_tree = domain_tree.update( + parsed_pattern.domain_labels, + path_branch_updater + ) + + if new_domain_tree.is_empty(): + try: + new_root = self._by_scheme_and_port.delete(key) + except KeyError: + new_root = self._by_scheme_and_port + else: + new_root = self._by_scheme_and_port.set(key, new_domain_tree) + + return dc.replace(self, _by_scheme_and_port=new_root) + + # def _register( + # self, + # url_pattern: str, + # item: TreeStoredType, + # register: bool = True + # ) -> 'TreeSelfType': + # """ + # .... + # """ + # tree = self + + # for parsed_pat in parse_pattern(url_pattern): + # wrapped_item = StoredTreeItem(parsed_pat, item) + # tree = tree._register_with_parsed_pattern( + # parsed_pat, + # wrapped_item, + # register + # ) + + # return tree + + def register(self, parsed_pattern: ParsedUrl, item: TreeStoredType) \ + -> 'TreeSelfType': + """ + Make item queryable through the Pattern Tree by the given URL pattern. + """ + return self._register(parsed_pattern, item) + + def deregister(self, parsed_pattern: ParsedUrl, item: TreeStoredType) \ + -> 'TreeSelfType': + """ + Make item no longer queryable through the Pattern Tree by the given URL + pattern. + """ + return self._register(parsed_pattern, item, register=False) + + def search(self, url: t.Union[ParsedUrl, str]) -> t.Iterable[StoredSet]: + """ + .... + """ + parsed_url = parse_url(url) if isinstance(url, str) else url + + key = (parsed_url.scheme, parsed_url.port) + domain_tree = self._by_scheme_and_port.get(key) + if domain_tree is None: + return + + if parsed_url.has_trailing_slash: + slash_options = [True, False] + else: + slash_options = [False] + + for path_tree in domain_tree.search(parsed_url.domain_labels): + for item_set in path_tree.search(parsed_url.path_segments): + for with_slash in slash_options: + items = filter_by_trailing_slash(item_set, with_slash) + if len(items) > 0: + yield items + +# Below we define 1 type used by recursively-typed PatternTree. +TreeSelfType = PatternTree[TreeStoredType] diff --git a/src/hydrilla/proxy/__init__.py b/src/hydrilla/proxy/__init__.py new file mode 100644 index 0000000..d382ead --- /dev/null +++ b/src/hydrilla/proxy/__init__.py @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior +# +# Available under the terms of Creative Commons Zero v1.0 Universal. diff --git a/src/hydrilla/proxy/addon.py b/src/hydrilla/proxy/addon.py new file mode 100644 index 0000000..7d6487b --- /dev/null +++ b/src/hydrilla/proxy/addon.py @@ -0,0 +1,177 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Haketilo addon for Mitmproxy. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +This module contains the definition of a mitmproxy addon that gets instantiated +from addon script. +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +import os.path +import typing as t +import dataclasses as dc + +from threading import Lock +from pathlib import Path +from contextlib import contextmanager + +from mitmproxy import http, addonmanager, ctx +from mitmproxy.script import concurrent + +from .flow_handlers import make_flow_handler, FlowHandler +from .state import HaketiloState +from ..translations import smart_gettext as _ + +FlowHandlers = dict[int, FlowHandler] + +StateUpdater = t.Callable[[HaketiloState], None] + +HTTPHandlerFun = t.Callable[ + ['HaketiloAddon', http.HTTPFlow], + t.Optional[StateUpdater] +] + +def http_event_handler(handler_fun: HTTPHandlerFun): + """....decorator""" + def wrapped_handler(self: 'HaketiloAddon', flow: http.HTTPFlow): + """....""" + with self.configured_lock: + assert self.configured + + assert self.state is not None + + state_updater = handler_fun(self, flow) + + if state_updater is not None: + state_updater(self.state) + + return wrapped_handler + +@dc.dataclass +class HaketiloAddon: + """ + ....... + """ + configured: bool = False + configured_lock: Lock = dc.field(default_factory=Lock) + + state: t.Optional[HaketiloState] = None + + flow_handlers: FlowHandlers = dc.field(default_factory=dict) + handlers_lock: Lock = dc.field(default_factory=Lock) + + def load(self, loader: addonmanager.Loader) -> None: + """....""" + loader.add_option( + name = 'haketilo_dir', + typespec = str, + default = '~/.haketilo/', + help = "Point to a Haketilo data directory to use", + ) + + def configure(self, updated: set[str]) -> None: + """....""" + if 'haketilo_dir' not in updated: + return + + with self.configured_lock: + if self.configured: + ctx.log.warn(_('haketilo_dir_already_configured')) + return + + haketilo_dir = Path(ctx.options.haketilo_dir) + self.state = HaketiloState(haketilo_dir / 'store') + + def assign_handler(self, flow: http.HTTPFlow, flow_handler: FlowHandler) \ + -> None: + """....""" + with self.handlers_lock: + self.flow_handlers[id(flow)] = flow_handler + + def lookup_handler(self, flow: http.HTTPFlow) -> FlowHandler: + """....""" + with self.handlers_lock: + return self.flow_handlers[id(flow)] + + def forget_handler(self, flow: http.HTTPFlow) -> None: + """....""" + with self.handlers_lock: + self.flow_handlers.pop(id(flow), None) + + @concurrent + @http_event_handler + def requestheaders(self, flow: http.HTTPFlow) -> t.Optional[StateUpdater]: + """ + ..... + """ + assert self.state is not None + + policy = self.state.select_policy(flow.request.url) + + flow_handler = make_flow_handler(flow, policy) + + self.assign_handler(flow, flow_handler) + + return flow_handler.on_requestheaders() + + @concurrent + @http_event_handler + def request(self, flow: http.HTTPFlow) -> t.Optional[StateUpdater]: + """ + .... + """ + return self.lookup_handler(flow).on_request() + + @concurrent + @http_event_handler + def responseheaders(self, flow: http.HTTPFlow) -> t.Optional[StateUpdater]: + """ + ...... + """ + return self.lookup_handler(flow).on_responseheaders() + + @concurrent + @http_event_handler + def response(self, flow: http.HTTPFlow) -> t.Optional[StateUpdater]: + """ + ...... + """ + updater = self.lookup_handler(flow).on_response() + + self.forget_handler(flow) + + return updater + + @http_event_handler + def error(self, flow: http.HTTPFlow) -> None: + """....""" + self.forget_handler(flow) + +addons = [ + HaketiloAddon() +] diff --git a/src/hydrilla/proxy/flow_handlers.py b/src/hydrilla/proxy/flow_handlers.py new file mode 100644 index 0000000..605c7f9 --- /dev/null +++ b/src/hydrilla/proxy/flow_handlers.py @@ -0,0 +1,383 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Logic for modifying mitmproxy's HTTP flows. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +This module's file gets passed to Mitmproxy as addon script and makes it serve +as Haketilo proxy. +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +import re +import typing as t +import dataclasses as dc + +import bs4 # type: ignore + +from mitmproxy import http +from mitmproxy.net.http import Headers +from mitmproxy.script import concurrent + +from .state import HaketiloState +from . import policies + +StateUpdater = t.Callable[[HaketiloState], None] + +@dc.dataclass(frozen=True) +class FlowHandler: + """....""" + flow: http.HTTPFlow + policy: policies.Policy + + stream_request: bool = False + stream_response: bool = False + + def on_requestheaders(self) -> t.Optional[StateUpdater]: + """....""" + if self.stream_request: + self.flow.request.stream = True + + return None + + def on_request(self) -> t.Optional[StateUpdater]: + """....""" + return None + + def on_responseheaders(self) -> t.Optional[StateUpdater]: + """....""" + assert self.flow.response is not None + + if self.stream_response: + self.flow.response.stream = True + + return None + + def on_response(self) -> t.Optional[StateUpdater]: + """....""" + return None + +@dc.dataclass(frozen=True) +class FlowHandlerAllowScripts(FlowHandler): + """....""" + policy: policies.AllowPolicy + + stream_request: bool = True + stream_response: bool = True + +csp_header_names_and_dispositions = ( + ('content-security-policy', 'enforce'), + ('content-security-policy-report-only', 'report'), + ('x-content-security-policy', 'enforce'), + ('x-content-security-policy', 'report'), + ('x-webkit-csp', 'enforce'), + ('x-webkit-csp', 'report') +) + +csp_enforce_header_names_set = { + name for name, disposition in csp_header_names_and_dispositions + if disposition == 'enforce' +} + +@dc.dataclass +class ContentSecurityPolicy: + directives: dict[str, list[str]] + header_name: str + disposition: str + + @staticmethod + def deserialize( + serialized: str, + header_name: str, + disposition: str = 'enforce' + ) -> 'ContentSecurityPolicy': + """....""" + # For more info, see: + # https://www.w3.org/TR/CSP3/#parse-serialized-policy + directives = {} + + for serialized_directive in serialized.split(';'): + if not serialized_directive.isascii(): + continue + + serialized_directive = serialized_directive.strip() + if len(serialized_directive) == 0: + continue + + tokens = serialized_directive.split() + directive_name = tokens.pop(0).lower() + directive_value = tokens + + # Specs mention giving warnings for duplicate directive names but + # from our proxy's perspective this is not important right now. + if directive_name in directives: + continue + + directives[directive_name] = directive_value + + return ContentSecurityPolicy(directives, header_name, disposition) + + def serialize(self) -> str: + """....""" + serialized_directives = [] + for name, value_list in self.directives.items(): + serialized_directives.append(f'{name} {" ".join(value_list)}') + + return ';'.join(serialized_directives) + +def extract_csp(headers: Headers) -> tuple[ContentSecurityPolicy, ...]: + """....""" + csp_policies = [] + + for header_name, disposition in csp_header_names_and_dispositions: + for serialized_list in headers.get(header_name, ''): + for serialized in serialized_list.split(','): + policy = ContentSecurityPolicy.deserialize( + serialized, + header_name, + disposition + ) + + if policy.directives != {}: + csp_policies.append(policy) + + return tuple(csp_policies) + +csp_script_directive_names = ( + 'script-src', + 'script-src-elem', + 'script-src-attr' +) + +@dc.dataclass(frozen=True) +class FlowHandlerBlockScripts(FlowHandler): + policy: policies.BlockPolicy + + stream_request: bool = True + stream_response: bool = True + + def on_responseheaders(self) -> t.Optional[StateUpdater]: + """....""" + super().on_responseheaders() + + assert self.flow.response is not None + + csp_policies = extract_csp(self.flow.response.headers) + + for header_name, _ in csp_header_names_and_dispositions: + del self.flow.response.headers[header_name] + + for policy in csp_policies: + if policy.disposition != 'enforce': + continue + + policy.directives.pop('report-to') + policy.directives.pop('report-uri') + + self.flow.response.headers.add( + policy.header_name, + policy.serialize() + ) + + extra_csp = ';'.join(( + "script-src 'none'", + "script-src-elem 'none'", + "script-src-attr 'none'" + )) + + self.flow.response.headers.add('Content-Security-Policy', extra_csp) + + return None + +# For details of 'Content-Type' header's structure, see: +# https://datatracker.ietf.org/doc/html/rfc7231#section-3.1.1.1 +content_type_reg = re.compile(r''' +^ +(?P[\w-]+/[\w-]+) +\s* +(?: + ; + (?:[^;]*;)* # match possible parameter other than "charset" +) +\s* +charset= # no whitespace allowed in parameter as per RFC +(?P + [\w-]+ + | + "[\w-]+" # quotes are optional per RFC +) +(?:;[^;]+)* # match possible parameter other than "charset" +$ # forbid possible dangling characters after closing '"' +''', re.VERBOSE | re.IGNORECASE) + +def deduce_content_type(headers: Headers) \ + -> tuple[t.Optional[str], t.Optional[str]]: + """....""" + content_type = headers.get('content-type') + if content_type is None: + return (None, None) + + match = content_type_reg.match(content_type) + if match is None: + return (None, None) + + mime, encoding = match.group('mime'), match.group('encoding') + + if encoding is not None: + encoding = encoding.lower() + + return mime, encoding + +UTF8_BOM = b'\xEF\xBB\xBF' +BOMs = ( + (UTF8_BOM, 'utf-8'), + (b'\xFE\xFF', 'utf-16be'), + (b'\xFF\xFE', 'utf-16le') +) + +def block_attr(element: bs4.PageElement, atrr_name: str) -> None: + """....""" + # TODO: implement + pass + +@dc.dataclass(frozen=True) +class FlowHandlerInjectPayload(FlowHandler): + """....""" + policy: policies.PayloadPolicy + + stream_request: bool = True + + def __post_init__(self) -> None: + """....""" + script_src = f"script-src {self.policy.assets_base_url()}" + if self.policy.is_eval_allowed(): + script_src = f"{script_src} 'unsafe-eval'" + + self.new_csp = '; '.join(( + script_src, + "script-src-elem 'none'", + "script-src-attr 'none'" + )) + + def on_responseheaders(self) -> t.Optional[StateUpdater]: + """....""" + super().on_responseheaders() + + assert self.flow.response is not None + + for header_name, _ in csp_header_names_and_dispositions: + del self.flow.response.headers[header_name] + + self.flow.response.headers.add('Content-Security-Policy', self.new_csp) + + return None + + def on_response(self) -> t.Optional[StateUpdater]: + """....""" + super().on_response() + + assert self.flow.response is not None + + if self.flow.response.content is None: + return None + + mime, encoding = deduce_content_type(self.flow.response.headers) + if mime is None or 'html' not in mime: + return None + + # A UTF BOM overrides encoding specified by the header. + for bom, encoding_name in BOMs: + if self.flow.response.content.startswith(bom): + encoding = encoding_name + + soup = bs4.BeautifulSoup( + markup = self.flow.response.content, + from_encoding = encoding, + features = 'html5lib' + ) + + # Inject scripts. + script_parent = soup.find('body') or soup.find('html') + if script_parent is None: + return None + + for url in self.policy.script_urls(): + script_parent.append(bs4.Tag(name='script', attrs={'src': url})) + + # Remove Content Security Policy that could possibly block injected + # scripts. + for meta in soup.select('head meta[http-equiv]'): + header_name = meta.attrs.get('http-equiv', '').lower().strip() + if header_name in csp_enforce_header_names_set: + block_attr(meta, 'http-equiv') + block_attr(meta, 'content') + + # Appending a three-byte Byte Order Mark (BOM) will force the browser to + # decode this as UTF-8 regardless of the 'Content-Type' header. See: + # https://www.w3.org/International/tests/repository/html5/the-input-byte-stream/results-basics#precedence + self.flow.response.content = UTF8_BOM + soup.encode() + + return None + +@dc.dataclass(frozen=True) +class FlowHandlerMetaResource(FlowHandler): + """....""" + policy: policies.MetaResourcePolicy + + def on_request(self) -> t.Optional[StateUpdater]: + """....""" + super().on_request() + # TODO: implement + #self.flow.response = .... + + return None + +def make_flow_handler(flow: http.HTTPFlow, policy: policies.Policy) \ + -> FlowHandler: + """....""" + if isinstance(policy, policies.BlockPolicy): + return FlowHandlerBlockScripts(flow, policy) + + if isinstance(policy, policies.AllowPolicy): + return FlowHandlerAllowScripts(flow, policy) + + if isinstance(policy, policies.PayloadPolicy): + return FlowHandlerInjectPayload(flow, policy) + + assert isinstance(policy, policies.MetaResourcePolicy) + # def response_creator(request: http.HTTPRequest) -> http.HTTPResponse: + # """....""" + # replacement_details = make_replacement_resource( + # policy.replacement, + # request.path + # ) + + # return http.HTTPResponse.make( + # replacement_details.status_code, + # replacement_details.content, + # replacement_details.content_type + # ) + return FlowHandlerMetaResource(flow, policy) diff --git a/src/hydrilla/proxy/policies.py b/src/hydrilla/proxy/policies.py new file mode 100644 index 0000000..5e9451b --- /dev/null +++ b/src/hydrilla/proxy/policies.py @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Various policies for altering HTTP requests. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +import dataclasses as dc +import typing as t + +from abc import ABC + +class Policy(ABC): + pass + +class PayloadPolicy(Policy): + """....""" + def assets_base_url(self) -> str: + """....""" + return 'https://example.com/static/' + + def script_urls(self) -> t.Sequence[str]: + """....""" + # TODO: implement + return ('https://example.com/static/somescript.js',) + + def is_eval_allowed(self) -> bool: + """....""" + # TODO: implement + return True + +class MetaResourcePolicy(Policy): + pass + +class AllowPolicy(Policy): + pass + +@dc.dataclass +class RuleAllowPolicy(AllowPolicy): + pattern: str + +class FallbackAllowPolicy(AllowPolicy): + pass + +class BlockPolicy(Policy): + pass + +@dc.dataclass +class RuleBlockPolicy(BlockPolicy): + pattern: str + +class FallbackBlockPolicy(BlockPolicy): + pass + +@dc.dataclass +class ErrorBlockPolicy(BlockPolicy): + error: Exception diff --git a/src/hydrilla/proxy/state.py b/src/hydrilla/proxy/state.py new file mode 100644 index 0000000..fc01536 --- /dev/null +++ b/src/hydrilla/proxy/state.py @@ -0,0 +1,73 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Haketilo proxy data and configuration. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +This module contains logic for keeping track of all settings, rules, mappings +and resources. +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +import typing as t +import dataclasses as dc + +from threading import Lock +from pathlib import Path + +from ..pattern_tree import PatternTree +from .store import HaketiloStore +from . import policies + +def make_pattern_tree_with_builtin_policies() -> PatternTree[policies.Policy]: + """....""" + # TODO: implement + return PatternTree() + +tree_field = dc.field(default_factory=make_pattern_tree_with_builtin_policies) + +@dc.dataclass +class HaketiloState(HaketiloStore): + """....""" + pattern_tree: PatternTree[policies.Policy] = tree_field + default_allow: bool = False + + state_lock: Lock = dc.field(default_factory=Lock) + + def select_policy(self, url: str, allow_disabled=False) -> policies.Policy: + """....""" + with self.state_lock: + pattern_tree = self.pattern_tree + + try: + for policy_set in pattern_tree.search(url): + # if policy.enabled or allow_disabled: + # return policy + pass + + return policies.FallbackBlockPolicy() + except Exception as e: + return policies.ErrorBlockPolicy(e) diff --git a/src/hydrilla/proxy/store.py b/src/hydrilla/proxy/store.py new file mode 100644 index 0000000..72852d8 --- /dev/null +++ b/src/hydrilla/proxy/store.py @@ -0,0 +1,40 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Haketilo proxy on-disk data storage. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +"""This module facilitates storing and modifying Haketilo proxy data on-disk.""" + +# Enable using with Python 3.7. +from __future__ import annotations + +import dataclasses as dc + +from pathlib import Path + +@dc.dataclass +class HaketiloStore: + """....""" + store_dir: Path + # TODO: implement diff --git a/src/hydrilla/py.typed b/src/hydrilla/py.typed new file mode 100644 index 0000000..f41d511 --- /dev/null +++ b/src/hydrilla/py.typed @@ -0,0 +1,5 @@ +SPDX-License-Identifier: CC0-1.0 + +Copyright (C) 2022 Wojtek Kosior + +Available under the terms of Creative Commons Zero v1.0 Universal. diff --git a/src/hydrilla/schemas/1.x b/src/hydrilla/schemas/1.x new file mode 160000 index 0000000..09634f3 --- /dev/null +++ b/src/hydrilla/schemas/1.x @@ -0,0 +1 @@ +Subproject commit 09634f3446866f712a022327683b1149d8f46bf0 diff --git a/src/hydrilla/schemas/2.x b/src/hydrilla/schemas/2.x new file mode 160000 index 0000000..7206db4 --- /dev/null +++ b/src/hydrilla/schemas/2.x @@ -0,0 +1 @@ +Subproject commit 7206db45f277c10c34d1b7ed9bd35343ac742d30 diff --git a/src/hydrilla/server/config.json b/src/hydrilla/server/config.json index bde341c..e307548 100644 --- a/src/hydrilla/server/config.json +++ b/src/hydrilla/server/config.json @@ -28,9 +28,6 @@ // What port to listen on (if not being run through WSGI). "port": 10112, - // What localization to use for console messages and served HTML files. - "language": "en_US", - // Whether to exit upon emitting a warning. "werror": false } diff --git a/src/hydrilla/server/config.py b/src/hydrilla/server/config.py index 1edd070..c7c5657 100644 --- a/src/hydrilla/server/config.py +++ b/src/hydrilla/server/config.py @@ -31,9 +31,9 @@ import json from pathlib import Path -import jsonschema +import jsonschema # type: ignore -from .. import util +from .. import json_instances config_schema = { '$schema': 'http://json-schema.org/draft-07/schema#', @@ -92,7 +92,7 @@ def load(config_paths: list[Path]=[here / 'config.json'], continue raise e from None - new_config = json.loads(util.strip_json_comments(json_text)) + new_config = json_instances.strip_json_comments(json_text) jsonschema.validate(new_config, config_schema) config.update(new_config) diff --git a/src/hydrilla/server/locales/en_US/LC_MESSAGES/hydrilla-messages.po b/src/hydrilla/server/locales/en_US/LC_MESSAGES/hydrilla-messages.po deleted file mode 100644 index 1998f89..0000000 --- a/src/hydrilla/server/locales/en_US/LC_MESSAGES/hydrilla-messages.po +++ /dev/null @@ -1,151 +0,0 @@ -# SPDX-License-Identifier: CC0-1.0 -# -# English (United States) translations for hydrilla. -# Copyright (C) 2021, 2022 Wojtek Kosior -# Available under the terms of Creative Commons Zero v1.0 Universal. -msgid "" -msgstr "" -"Project-Id-Version: hydrilla.builder 0.1\n" -"Report-Msgid-Bugs-To: koszko@koszko.org\n" -"POT-Creation-Date: 2022-05-31 18:21+0200\n" -"PO-Revision-Date: 2022-02-12 00:00+0000\n" -"Last-Translator: Wojtek Kosior \n" -"Language: en_US\n" -"Language-Team: en_US \n" -"Plural-Forms: nplurals=2; plural=(n != 1)\n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel 2.8.0\n" - -#: src/hydrilla/server/serve.py:127 -#, python-brace-format -msgid "uuid_mismatch_{identifier}" -msgstr "Two different uuids were specified for item '{identifier}'." - -#: src/hydrilla/server/serve.py:134 -#, python-brace-format -msgid "version_clash_{identifier}_{version}" -msgstr "Version '{version}' specified more than once for item '{identifier}'." - -#: src/hydrilla/server/serve.py:250 src/hydrilla/server/serve.py:262 -msgid "invalid_URL_{}" -msgstr "Invalid URL/pattern: '{}'." - -#: src/hydrilla/server/serve.py:254 -msgid "disallowed_protocol_{}" -msgstr "Disallowed protocol: '{}'." - -#: src/hydrilla/server/serve.py:307 -msgid "malcontent_dir_path_not_dir_{}" -msgstr "Provided 'malcontent_dir' path does not name a directory: {}" - -#: src/hydrilla/server/serve.py:326 -msgid "couldnt_load_item_from_{}" -msgstr "Couldn't load item from {}." - -#: src/hydrilla/server/serve.py:351 -msgid "item_{item}_in_file_{file}" -msgstr "Item {item} incorrectly present under {file}." - -#: src/hydrilla/server/serve.py:357 -msgid "item_version_{ver}_in_file_{file}" -msgstr "Item version {ver} incorrectly present under {file}." - -#: src/hydrilla/server/serve.py:380 -msgid "no_dep_{resource}_{ver}_{dep}" -msgstr "Unknown dependency '{dep}' of resource '{resource}', version '{ver}'." - -#: src/hydrilla/server/serve.py:391 -msgid "no_payload_{mapping}_{ver}_{payload}" -msgstr "Unknown payload '{payload}' of mapping '{mapping}', version '{ver}'." - -#: src/hydrilla/server/serve.py:403 -msgid "no_mapping_{required_by}_{ver}_{required}" -msgstr "Unknown mapping '{required}' required by '{required_by}', version '{ver}'." - -#: src/hydrilla/server/serve.py:430 -msgid "couldnt_register_{mapping}_{ver}_{pattern}" -msgstr "" -"Couldn't register mapping '{mapping}', version '{ver}' (pattern " -"'{pattern}')." - -#: src/hydrilla/server/serve.py:583 src/hydrilla/server/serve.py:606 -#: src/hydrilla/server/serve.py:650 -#, python-format -msgid "%(prog)s_%(version)s_license" -msgstr "" -"%(prog)s %(version)s\n" -"Copyright (C) 2021,2022 Wojtek Kosior and contributors.\n" -"License GPLv3+: GNU AGPL version 3 or later " -"\n" -"This is free software: you are free to change and redistribute it.\n" -"There is NO WARRANTY, to the extent permitted by law." - -#: src/hydrilla/server/serve.py:592 -msgid "serve_hydrilla_packages_explain_wsgi_considerations" -msgstr "" -"Serve Hydrilla packages.\n" -"\n" -"This command is meant to be a quick way to run a local or development " -"Hydrilla instance. For better performance, consider deployment using " -"WSGI." - -#: src/hydrilla/server/serve.py:595 -msgid "directory_to_serve_from_overrides_config" -msgstr "" -"Directory to serve files from. Overrides value from the config file (if " -"any)." - -#: src/hydrilla/server/serve.py:597 -msgid "project_url_to_display_overrides_config" -msgstr "" -"Project url to display on generated HTML pages. Overrides value from the " -"config file (if any)." - -#: src/hydrilla/server/serve.py:599 -msgid "tcp_port_to_listen_on_overrides_config" -msgstr "" -"TCP port number to listen on (0-65535). Overrides value from the config " -"file (if any)." - -#: src/hydrilla/server/serve.py:602 -msgid "path_to_config_file_explain_default" -msgstr "" -"Path to Hydrilla server configuration file (optional, by default Hydrilla" -" loads its own config file, which in turn tries to load " -"/etc/hydrilla/config.json)." - -#: src/hydrilla/server/serve.py:604 -msgid "language_to_use_overrides_config" -msgstr "" -"Language to use (also affects served HTML files). Overrides value from " -"the config file (if any)." - -#: src/hydrilla/server/serve.py:607 src/hydrilla/server/serve.py:651 -msgid "version_printing" -msgstr "Print version information and exit." - -#: src/hydrilla/server/serve.py:640 -msgid "config_option_{}_not_supplied" -msgstr "Missing configuration option '{}'." - -#: src/hydrilla/server/serve.py:644 -msgid "serve_hydrilla_packages_wsgi_help" -msgstr "" -"Serve Hydrilla packages.\n" -"\n" -"This program is a WSGI script that runs Hydrilla repository behind an " -"HTTP server like Apache2 or Nginx. You can configure Hydrilla through the" -" /etc/hydrilla/config.json file." - -#. 'hydrilla' as a title -#: src/hydrilla/server/templates/base.html:99 -#: src/hydrilla/server/templates/base.html:105 -msgid "hydrilla" -msgstr "Hydrilla" - -#: src/hydrilla/server/templates/index.html:29 -msgid "hydrilla_welcome" -msgstr "Welcome to Hydrilla!" - diff --git a/src/hydrilla/server/serve.py b/src/hydrilla/server/serve.py index 779f3d2..8f0d557 100644 --- a/src/hydrilla/server/serve.py +++ b/src/hydrilla/server/serve.py @@ -36,16 +36,18 @@ import logging from pathlib import Path from hashlib import sha256 from abc import ABC, abstractmethod -from typing import Optional, Union, Iterable +from typing import Optional, Union, Iterable, TypeVar, Generic import click import flask from werkzeug import Response -from .. import util +from .. import _version, versions, json_instances +from ..item_infos import ResourceInfo, MappingInfo, VersionedItemInfo +from ..translations import smart_gettext as _, translation as make_translation +#from ..url_patterns import PatternTree from . import config -from . import _version here = Path(__file__).resolve().parent @@ -54,243 +56,20 @@ generated_by = { 'version': _version.version } -class ItemInfo(ABC): - """Shortened data of a resource/mapping.""" - def __init__(self, item_obj: dict, major_schema_version: int): - """Initialize ItemInfo using item definition read from JSON.""" - self.version = util.normalize_version(item_obj['version']) - self.identifier = item_obj['identifier'] - self.uuid = item_obj.get('uuid') - self.long_name = item_obj['long_name'] - - self.required_mappings = [] - if major_schema_version >= 2: - self.required_mappings = [map_ref['identifier'] for map_ref in - item_obj.get('required_mappings', [])] - - def path(self) -> str: - """ - Get a relative path to this item's JSON definition with respect to - directory containing items of this type. - """ - return f'{self.identifier}/{util.version_string(self.version)}' - -class ResourceInfo(ItemInfo): - """Shortened data of a resource.""" - def __init__(self, resource_obj: dict, major_schema_version: int): - """Initialize ResourceInfo using resource definition read from JSON.""" - super().__init__(resource_obj, major_schema_version) - - dependencies = resource_obj.get('dependencies', []) - self.dependencies = [res_ref['identifier'] for res_ref in dependencies] - -class MappingInfo(ItemInfo): - """Shortened data of a mapping.""" - def __init__(self, mapping_obj: dict, major_schema_version: int): - """Initialize MappingInfo using mapping definition read from JSON.""" - super().__init__(mapping_obj, major_schema_version) - - self.payloads = {} - for pattern, res_ref in mapping_obj.get('payloads', {}).items(): - self.payloads[pattern] = res_ref['identifier'] - - def as_query_result(self) -> str: - """ - Produce a json.dump()-able object describing this mapping as one of a - collection of query results. - """ - return { - 'version': self.version, - 'identifier': self.identifier, - 'long_name': self.long_name - } - -class VersionedItemInfo: - """Stores data of multiple versions of given resource/mapping.""" - def __init__(self): - self.uuid = None - self.identifier = None - self.by_version = {} - self.known_versions = [] - - def register(self, item_info: ItemInfo) -> None: - """ - Make item info queryable by version. Perform sanity checks for uuid. - """ - if self.identifier is None: - self.identifier = item_info.identifier - - if self.uuid is None: - self.uuid = item_info.uuid - - if self.uuid is not None and self.uuid != item_info.uuid: - raise ValueError(f_('uuid_mismatch_{identifier}') - .format(identifier=self.identifier)) - - ver = item_info.version - ver_str = util.version_string(ver) - - if ver_str in self.by_version: - raise ValueError(f_('version_clash_{identifier}_{version}') - .format(identifier=self.identifier, - version=ver_str)) - - self.by_version[ver_str] = item_info - self.known_versions.append(ver) - - def get_by_ver(self, ver: Optional[list[int]]=None) -> Optional[ItemInfo]: - """ - Find and return info of the newest version of item. - - If ver is specified, instead find and return info of that version of the - item (or None if absent). - """ - ver = util.version_string(ver or self.known_versions[-1]) - - return self.by_version.get(ver) - - def get_all(self) -> list[ItemInfo]: - """ - Return a list of item info for all its versions, from oldest ot newest. - """ - return [self.by_version[util.version_string(ver)] - for ver in self.known_versions] - -class PatternTreeNode: - """ - "Pattern Tree" is how we refer to the data structure used for querying - Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal - is to make it possible for given URL to quickly retrieve all known patterns - that match it. - """ - def __init__(self): - self.wildcard_matches = [None, None, None] - self.literal_match = None - self.children = {} - - def search(self, segments): - """ - Yields all matches of this segments sequence against the tree that - starts at this node. Results are produces in order from greatest to - lowest pattern specificity. - """ - nodes = [self] - - for segment in segments: - next_node = nodes[-1].children.get(segment) - if next_node is None: - break - - nodes.append(next_node) - - nsegments = len(segments) - cond_literal = lambda: len(nodes) == nsegments - cond_wildcard = [ - lambda: len(nodes) + 1 == nsegments and segments[-1] != '*', - lambda: len(nodes) + 1 < nsegments, - lambda: len(nodes) + 1 != nsegments or segments[-1] != '***' - ] - - while nodes: - node = nodes.pop() - - for item, condition in [(node.literal_match, cond_literal), - *zip(node.wildcard_matches, cond_wildcard)]: - if item is not None and condition(): - yield item - - def add(self, segments, item_instantiator): - """ - Make item queryable through (this branch of) the Pattern Tree. If there - was not yet any item associated with the tree path designated by - segments, create a new one using item_instantiator() function. Return - all items matching this path (both the ones that existed and the ones - just created). - """ - node = self - segment = None - - for segment in segments: - wildcards = node.wildcard_matches - - child = node.children.get(segment) or PatternTreeNode() - node.children[segment] = child - node = child - - if node.literal_match is None: - node.literal_match = item_instantiator() - - if segment not in ('*', '**', '***'): - return [node.literal_match] - - if wildcards[len(segment) - 1] is None: - wildcards[len(segment) - 1] = item_instantiator() - - return [node.literal_match, wildcards[len(segment) - 1]] - -proto_regex = re.compile(r'^(?P\w+)://(?P.*)$') -user_re = r'[^/?#@]+@' # r'(?P[^/?#@]+)@' # discarded for now -query_re = r'\??[^#]*' # r'\??(?P[^#]*)' # discarded for now -domain_re = r'(?P[^/?#]+)' -path_re = r'(?P[^?#]*)' -http_regex = re.compile(f'{domain_re}{path_re}{query_re}.*') -ftp_regex = re.compile(f'(?:{user_re})?{domain_re}{path_re}.*') - -class UrlError(ValueError): - """Used to report a URL or URL pattern that is invalid or unsupported.""" - pass - -class DeconstructedUrl: - """Represents a deconstructed URL or URL pattern""" - def __init__(self, url): - self.url = url - - match = proto_regex.match(url) - if not match: - raise UrlError(f_('invalid_URL_{}').format(url)) - - self.proto = match.group('proto') - if self.proto not in ('http', 'https', 'ftp'): - raise UrlError(f_('disallowed_protocol_{}').format(proto)) - - if self.proto == 'ftp': - match = ftp_regex.match(match.group('rest')) - elif self.proto in ('http', 'https'): - match = http_regex.match(match.group('rest')) - - if not match: - raise UrlError(f_('invalid_URL_{}').format(url)) - - self.domain = match.group('domain').split('.') - self.domain.reverse() - self.path = [*filter(None, match.group('path').split('/'))] - -class PatternMapping: - """ - A mapping info, together with one of its patterns, as stored in Pattern - Tree. - """ - def __init__(self, pattern: str, mapping_info: MappingInfo): - self.pattern = pattern - self.mapping_info = mapping_info - - def register(self, pattern_tree: dict): - """ - Make self queryable through the Pattern Tree passed in the argument. - """ - deco = DeconstructedUrl(self.pattern) - - domain_tree = pattern_tree.get(deco.proto) or PatternTreeNode() - pattern_tree[deco.proto] = domain_tree - - for path_tree in domain_tree.add(deco.domain, PatternTreeNode): - for match_list in path_tree.add(deco.path, list): - match_list.append(self) + # def as_query_result(self) -> dict[str, Union[str, list[int]]]: + # """ + # Produce a json.dump()-able object describing this mapping as one of a + # collection of query results. + # """ + # return { + # 'version': self.version, + # 'identifier': self.identifier, + # 'long_name': self.long_name + # } class Malcontent: """ - Instance of this class represents a directory with files that can be loaded - and served by Hydrilla. + Represent a directory with files that can be loaded and served by Hydrilla. """ def __init__(self, malcontent_dir_path: Path): """ @@ -298,13 +77,15 @@ class Malcontent: malcontent_dir_path for serveable site-modifying packages and loads them into its data structures. """ - self.infos = {'resource': {}, 'mapping': {}} - self.pattern_tree = {} + self.resource_infos: dict[str, VersionedItemInfo[ResourceInfo]] = {} + self.mapping_infos: dict[str, VersionedItemInfo[MappingInfo]] = {} + + self.pattern_tree: PatternTree[MappingInfo] = PatternTree() self.malcontent_dir_path = malcontent_dir_path if not self.malcontent_dir_path.is_dir(): - raise ValueError(f_('malcontent_dir_path_not_dir_{}') + raise ValueError(_('malcontent_dir_path_not_dir_{}') .format(malcontent_dir_path)) for item_type in ('mapping', 'resource'): @@ -323,18 +104,27 @@ class Malcontent: if flask.current_app._hydrilla_werror: raise e from None - msg = f_('couldnt_load_item_from_{}').format(ver_file) + msg = _('couldnt_load_item_from_{}').format(ver_file) logging.error(msg, exc_info=True) self._report_missing() self._finalize() + @staticmethod + def _register_info(infos: dict[str, VersionedItemInfo[VersionedType]], + identifier: str, item_info: VersionedType) -> None: + """ + ........... + """ + infos.setdefault(identifier, VersionedItemInfo())\ + .register(item_info) + def _load_item(self, item_type: str, ver_file: Path) -> None: """ Reads, validates and autocompletes serveable mapping/resource definition, then registers information from it in data structures. """ - version = util.parse_version(ver_file.name) + version = versions.parse_version(ver_file.name) identifier = ver_file.parent.name item_json, major = util.load_instance_from_file(ver_file) @@ -342,32 +132,35 @@ class Malcontent: util.validator_for(f'api_{item_type}_description-{major}.schema.json')\ .validate(item_json) - if item_type == 'resource': - item_info = ResourceInfo(item_json, major) - else: - item_info = MappingInfo(item_json, major) + # Assertion needed for mypy. If validation passed, this should not fail. + assert major is not None + + item_info: ItemInfo = ResourceInfo(item_json, major) \ + if item_type == 'resource' else MappingInfo(item_json, major) if item_info.identifier != identifier: - msg = f_('item_{item}_in_file_{file}')\ + msg = _('item_{item}_in_file_{file}')\ .format({'item': item_info.identifier, 'file': ver_file}) raise ValueError(msg) if item_info.version != version: ver_str = util.version_string(item_info.version) - msg = f_('item_version_{ver}_in_file_{file}')\ + msg = _('item_version_{ver}_in_file_{file}')\ .format({'ver': ver_str, 'file': ver_file}) raise ValueError(msg) - versioned_info = self.infos[item_type].get(identifier) - if versioned_info is None: - versioned_info = VersionedItemInfo() - self.infos[item_type][identifier] = versioned_info + if isinstance(item_info, ResourceInfo): + self._register_info(self.resource_infos, identifier, item_info) + elif isinstance(item_info, MappingInfo): + self._register_info(self.mapping_infos, identifier, item_info) - versioned_info.register(item_info) - - def _all_of_type(self, item_type: str) -> Iterable[ItemInfo]: - """Iterator over all registered versions of all mappings/resources.""" - for versioned_info in self.infos[item_type].values(): + @staticmethod + def _all_infos(infos: dict[str, VersionedItemInfo[VersionedType]]) \ + -> Iterable[VersionedType]: + """ + ........... + """ + for versioned_info in infos.values(): for item_info in versioned_info.by_version.values(): yield item_info @@ -377,38 +170,38 @@ class Malcontent: were not loaded. """ def report_missing_dependency(info: ResourceInfo, dep: str) -> None: - msg = f_('no_dep_{resource}_{ver}_{dep}')\ + msg = _('no_dep_{resource}_{ver}_{dep}')\ .format(dep=dep, resource=info.identifier, ver=util.version_string(info.version)) logging.error(msg) - for resource_info in self._all_of_type('resource'): + for resource_info in self._all_infos(self.resource_infos): for dep in resource_info.dependencies: - if dep not in self.infos['resource']: + if dep not in self.resource_infos: report_missing_dependency(resource_info, dep) def report_missing_payload(info: MappingInfo, payload: str) -> None: - msg = f_('no_payload_{mapping}_{ver}_{payload}')\ + msg = _('no_payload_{mapping}_{ver}_{payload}')\ .format(mapping=info.identifier, payload=payload, ver=util.version_string(info.version)) logging.error(msg) - for mapping_info in self._all_of_type('mapping'): + for mapping_info in self._all_infos(self.mapping_infos): for payload in mapping_info.payloads.values(): - if payload not in self.infos['resource']: + if payload not in self.resource_infos: report_missing_payload(mapping_info, payload) - def report_missing_mapping(info: Union[MappingInfo, ResourceInfo], + def report_missing_mapping(info: ItemInfo, required_mapping: str) -> None: msg = _('no_mapping_{required_by}_{ver}_{required}')\ .format(required_by=info.identifier, required=required_mapping, ver=util.version_string(info.version)) logging.error(msg) - for item_info in (*self._all_of_type('mapping'), - *self._all_of_type('resource')): + for item_info in (*self._all_infos(self.mapping_infos), + *self._all_infos(self.resource_infos)): for required in item_info.required_mappings: - if required not in self.infos['mapping']: + if required not in self.mapping_infos: report_missing_mapping(item_info, required) def _finalize(self): @@ -416,18 +209,19 @@ class Malcontent: Initialize structures needed to serve queries. Called once after all data gets loaded. """ - for infos_dict in self.infos.values(): - for versioned_info in infos_dict.values(): + for versioned_info in (*self.mapping_infos.values(), + *self.resource_infos.values()): versioned_info.known_versions.sort() - for info in self._all_of_type('mapping'): + for info in self._all_infos(self.mapping_infos): for pattern in info.payloads: try: - PatternMapping(pattern, info).register(self.pattern_tree) + self.pattern_tree = \ + self.pattern_tree.register(pattern, info) except Exception as e: if flask.current_app._hydrilla_werror: raise e from None - msg = f_('couldnt_register_{mapping}_{ver}_{pattern}')\ + msg = _('couldnt_register_{mapping}_{ver}_{pattern}')\ .format(mapping=info.identifier, pattern=pattern, ver=util.version_string(info.version)) logging.error(msg) @@ -439,27 +233,16 @@ class Malcontent: If multiple versions of a mapping are applicable, only the most recent is included in the result. """ - deco = DeconstructedUrl(url) - - collected = {} - - domain_tree = self.pattern_tree.get(deco.proto) or PatternTreeNode() - - def process_mapping(pattern_mapping: PatternMapping) -> None: - if url[-1] != '/' and pattern_mapping.pattern[-1] == '/': - return - - info = pattern_mapping.mapping_info + collected: dict[str, MappingInfo] = {} + for result_set in self.pattern_tree.search(url): + for wrapped_mapping_info in result_set: + info = wrapped_mapping_info.item + previous = collected.get(info.identifier) + if previous and previous.version > info.version: + continue - if info.identifier not in collected or \ - info.version > collected[info.identifier].version: collected[info.identifier] = info - for path_tree in domain_tree.search(deco.domain): - for matches_list in path_tree.search(deco.path): - for pattern_mapping in matches_list: - process_mapping(pattern_mapping) - return list(collected.values()) bp = flask.Blueprint('bp', __package__) @@ -484,8 +267,6 @@ class HydrillaApp(flask.Flask): ] } - self._hydrilla_translation = \ - util.translation(here / 'locales', hydrilla_config['language']) self._hydrilla_project_url = hydrilla_config['hydrilla_project_url'] self._hydrilla_port = hydrilla_config['port'] self._hydrilla_werror = hydrilla_config.get('werror', False) @@ -506,8 +287,8 @@ class HydrillaApp(flask.Flask): 'hydrilla_project_url' global variable and to install proper translations. """ - env = super().create_jinja_environment(*args, **kwargs) - env.install_gettext_translations(self._hydrilla_translation) + env = super().create_jinja_environment(*args, **kwargs) # type: ignore + env.install_gettext_translations(make_translation()) env.globals['hydrilla_project_url'] = self._hydrilla_project_url return env @@ -519,9 +300,6 @@ class HydrillaApp(flask.Flask): """ return super().run(*args, port=self._hydrilla_port, **kwargs) -def f_(text_key): - return flask.current_app._hydrilla_translation.gettext(text_key) - def malcontent(): return flask.current_app._hydrilla_malcontent @@ -542,7 +320,12 @@ def get_resource_or_mapping(item_type: str, identifier: str) -> Response: identifier = match.group(1) - versioned_info = malcontent().infos[item_type].get(identifier) + if item_type == 'resource': + infos = malcontent().resource_infos + else: + infos = malcontent().mapping_infos + + versioned_info = infos.get(identifier) info = versioned_info and versioned_info.get_by_ver() if info is None: @@ -586,9 +369,6 @@ default_config_path = Path('/etc/hydrilla/config.json') default_malcontent_dir = '/var/lib/hydrilla/malcontent' default_project_url = 'https://hydrillabugs.koszko.org/projects/hydrilla/wiki' -console_gettext = util.translation(here / 'locales').gettext -_ = console_gettext - @click.command(help=_('serve_hydrilla_packages_explain_wsgi_considerations')) @click.option('-m', '--malcontent-dir', type=click.Path(exists=True, file_okay=False), @@ -600,24 +380,21 @@ _ = console_gettext @click.option('-c', '--config', 'config_path', type=click.Path(exists=True, dir_okay=False, resolve_path=True), help=_('path_to_config_file_explain_default')) -@click.option('-l', '--language', type=click.STRING, - help=_('language_to_use_overrides_config')) @click.version_option(version=_version.version, prog_name='Hydrilla', message=_('%(prog)s_%(version)s_license'), help=_('version_printing')) def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str], - port: Optional[int], config_path: Optional[str], - language: Optional[str]) -> None: + port: Optional[int], config_path: Optional[str]) -> None: """ Run a development Hydrilla server. This command is meant to be the entry point of hydrilla command exported by this package. """ - config_load_opts = {} if config_path is None \ - else {'config_path': [Path(config_path)]} - - hydrilla_config = config.load(**config_load_opts) + if config_path is None: + hydrilla_config = config.load() + else: + hydrilla_config = config.load(config_paths=[Path(config_path)]) if malcontent_dir is not None: hydrilla_config['malcontent_dir'] = str(Path(malcontent_dir).resolve()) @@ -628,14 +405,7 @@ def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str], if port is not None: hydrilla_config['port'] = port - if language is not None: - hydrilla_config['language'] = language - - lang = hydrilla_config.get('language') - _ = console_gettext if lang is None else \ - util.translation(here / 'locales', lang).gettext - - for opt in ('malcontent_dir', 'hydrilla_project_url', 'port', 'language'): + for opt in ('malcontent_dir', 'hydrilla_project_url', 'port'): if opt not in hydrilla_config: raise ValueError(_('config_option_{}_not_supplied').format(opt)) @@ -649,7 +419,7 @@ def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str], @click.version_option(version=_version.version, prog_name='Hydrilla', message=_('%(prog)s_%(version)s_license'), help=_('version_printing')) -def start_wsgi() -> None: +def start_wsgi() -> flask.Flask: """ Create application object for use in WSGI deployment. diff --git a/src/hydrilla/translations.py b/src/hydrilla/translations.py new file mode 100644 index 0000000..a963e82 --- /dev/null +++ b/src/hydrilla/translations.py @@ -0,0 +1,104 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Handling of gettext for Hydrilla. +# +# This file is part of Hydrilla +# +# Copyright (C) 2021, 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +# Enable using with Python 3.7. +from __future__ import annotations + +import locale as lcl +import gettext + +from pathlib import Path +from typing import Optional + +here = Path(__file__).resolve().parent + +localedir = here / 'locales' + +supported_locales = [f.name for f in localedir.iterdir() if f.is_dir()] + +default_locale = 'en_US' + +def select_best_locale() -> str: + """ + .... + + Otherwise, try to determine system's default language and use that. + """ + # TODO: Stop referenceing flask here. Instead, allow other code to register + # custom locale resolvers and register flask-aware resolver during + # runtime from within the flask-related part(s) of the application. + try: + import flask + use_flask = flask.has_request_context() + except ModuleNotFoundError: + use_flask = False + + if use_flask: + return flask.request.accept_languages.best_match( + supported_locales, + default=default_locale + ) + + # https://stackoverflow.com/questions/3425294/how-to-detect-the-os-default-language-in-python + # I am not going to surrender to Microbugs' nonfree, crappy OS to test it, + # so the lines inside try: block may actually fail. + locale: Optional[str] = lcl.getdefaultlocale()[0] + try: + from ctypes.windll import kernel32 as windll # type: ignore + locale = lcl.windows_locale[windll.GetUserDefaultUILanguage()] + except: + pass + + return locale if locale in supported_locales else default_locale + +translations: dict[str, gettext.NullTranslations] = {} + +def translation(locale: Optional[str] = None) -> gettext.NullTranslations: + """ + Configure translations for domain 'messages' and return the object that + represents them. If the requested locale is not available, fall back to + 'en_US'. + """ + if locale is None: + locale = select_best_locale() + + if not (localedir / locale).is_dir(): + locale = 'en_US' + + if locale not in translations: + translations[locale] = gettext.translation( + 'messages', + localedir=localedir, + languages=[locale] + ) + + return translations[locale] + +def smart_gettext(msg: str, locale: Optional[str] = None) -> str: + """....""" + return translation(locale).gettext(msg) + +_ = smart_gettext diff --git a/src/hydrilla/url_patterns.py b/src/hydrilla/url_patterns.py new file mode 100644 index 0000000..8e80379 --- /dev/null +++ b/src/hydrilla/url_patterns.py @@ -0,0 +1,181 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Data structure for querying URL patterns. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2021, 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +This module contains functions for deconstruction and construction of URLs and +Haketilo URL patterns. + +Data structures for querying data using URL patterns are also defined there. +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +import re +import urllib.parse as up +import typing as t +import dataclasses as dc + +from immutables import Map + +from hydrilla.translations import smart_gettext as _ +from hydrilla.exceptions import HaketiloException + +default_ports: t.Mapping[str, int] = Map(http=80, https=443, ftp=21) + +@dc.dataclass(frozen=True, unsafe_hash=True) +class ParsedUrl: + """....""" + orig_url: str # orig_url used in __hash__() + scheme: str = dc.field(hash=False) + domain_labels: tuple[str, ...] = dc.field(hash=False) + path_segments: tuple[str, ...] = dc.field(hash=False) + has_trailing_slash: bool = dc.field(hash=False) + port: int = dc.field(hash=False) + + # def reconstruct_url(self) -> str: + # """....""" + # scheme = self.orig_scheme + + # netloc = '.'.join(reversed(self.domain_labels)) + # if scheme == self.scheme and \ + # self.port is not None and \ + # default_ports[scheme] != self.port: + # netloc += f':{self.port}' + + # path = '/'.join(('', *self.path_segments)) + # if self.has_trailing_slash: + # path += '/' + + # return f'{scheme}://{netloc}{path}' + +# URLs with those schemes will be recognized but not all of them have to be +# actually supported by Hydrilla server and Haketilo proxy. +supported_schemes = 'http', 'https', 'ftp', 'file' + +def _parse_pattern_or_url(url: str, orig_url: str, is_pattern: bool = False) \ + -> ParsedUrl: + """....""" + if not is_pattern: + assert orig_url == url + + parse_result = up.urlparse(url) + + # Verify the parsed URL is valid + has_hostname = parse_result.hostname is not None + if not parse_result.scheme or \ + (parse_result.scheme == 'file' and parse_result.port is not None) or \ + (parse_result.scheme == 'file' and has_hostname) or \ + (parse_result.scheme != 'file' and not has_hostname): + if is_pattern: + msg = _('err.url_pattern_{}.bad').format(orig_url) + raise HaketiloException(msg) + else: + raise HaketiloException(_('err.url_{}.bad') .format(url)) + + # Verify the URL uses a known scheme and extract it. + scheme = parse_result.scheme + + if parse_result.scheme not in supported_schemes: + if is_pattern: + msg = _('err.url_pattern_{}.bad_scheme').format(orig_url) + raise HaketiloException(msg) + else: + raise HaketiloException(_('err.url_{}.bad_scheme').format(url)) + + # Extract and keep information about special pattern schemas used. + if is_pattern and orig_url.startswith('http*:'): + if parse_result.port: + fmt = _('err.url_pattern_{}.special_scheme_port') + raise HaketiloException(fmt.format(orig_url)) + + # Extract URL's explicit port or deduce the port based on URL's protocol. + try: + explicit_port = parse_result.port + port_out_of_range = explicit_port == 0 + except ValueError: + port_out_of_range = True + + if port_out_of_range: + if is_pattern: + msg = _('err.url_pattern_{}.bad_port').format(orig_url) + raise HaketiloException(msg) + else: + raise HaketiloException(_('err.url_{}.bad_port').format(url)) + + port = t.cast(int, explicit_port or default_ports.get(parse_result.scheme)) + + # Make URL's hostname into a list of labels in reverse order. E.g. + # 'https://a.bc..de.fg.com/h/i/' -> ['com', 'fg', 'de', 'bc', 'a'] + hostname = parse_result.hostname or '' + domain_labels_with_empty = reversed(hostname.split('.')) + domain_labels = tuple(lbl for lbl in domain_labels_with_empty if lbl) + + # Make URL's path into a list of segments. E.g. + # 'https://ab.cd/e//f/g/' -> ['e', 'f', 'g'] + path_segments_with_empty = parse_result.path.split('/') + path_segments = tuple(sgmt for sgmt in path_segments_with_empty if sgmt) + + # Record whether a trailing '/' is present in the URL. + has_trailing_slash = parse_result.path.endswith('/') + + # Perform some additional sanity checks and return the result. + if is_pattern: + if parse_result.query: + msg = _('err.url_pattern_{}.has_query').format(orig_url) + raise HaketiloException(msg) + + if parse_result.fragment: + msg = _('err.url_pattern_{}.has_frag').format(orig_url) + raise HaketiloException(msg) + + return ParsedUrl( + orig_url = orig_url, + scheme = scheme, + port = port, + domain_labels = domain_labels, + path_segments = path_segments, + has_trailing_slash = has_trailing_slash + ) + +replace_scheme_regex = re.compile(r'^[^:]*') + +def parse_pattern(url_pattern: str) -> t.Sequence[ParsedUrl]: + """....""" + if url_pattern.startswith('http*:'): + patterns = [ + replace_scheme_regex.sub('http', url_pattern), + replace_scheme_regex.sub('https', url_pattern) + ] + else: + patterns = [url_pattern] + + return tuple(_parse_pattern_or_url(pat, url_pattern, True) + for pat in patterns) + +def parse_url(url: str) -> ParsedUrl: + """....""" + return _parse_pattern_or_url(url, url) diff --git a/src/hydrilla/versions.py b/src/hydrilla/versions.py new file mode 100644 index 0000000..a7a9f29 --- /dev/null +++ b/src/hydrilla/versions.py @@ -0,0 +1,59 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Functions to operate on version numbers. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2021, 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +This module contains functions for deconstruction and construction of version +strings and version tuples. +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +import typing as t + +def normalize_version(ver: t.Sequence[int]) -> tuple[int, ...]: + """Strip right-most zeroes from 'ver'. The original list is not modified.""" + new_len = 0 + for i, num in enumerate(ver): + if num != 0: + new_len = i + 1 + + return tuple(ver[:new_len]) + +def parse_version(ver_str: str) -> tuple[int, ...]: + """ + Convert 'ver_str' into an array representation, e.g. for ver_str="4.6.13.0" + return [4, 6, 13, 0]. + """ + return tuple(int(num) for num in ver_str.split('.')) + +def version_string(ver: t.Sequence[int], rev: t.Optional[int] = None) -> str: + """ + Produce version's string representation (optionally with revision), like: + 1.2.3-5 + No version normalization is performed. + """ + return '.'.join(str(n) for n in ver) + ('' if rev is None else f'-{rev}') diff --git a/tests/helpers.py b/tests/helpers.py new file mode 100644 index 0000000..df474b0 --- /dev/null +++ b/tests/helpers.py @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior +# +# Available under the terms of Creative Commons Zero v1.0 Universal. + +import re + +variable_word_re = re.compile(r'^<(.+)>$') + +def process_command(command, expected_command): + """Validate the command line and extract its variable parts (if any).""" + assert len(command) == len(expected_command) + + extracted = {} + for word, expected_word in zip(command, expected_command): + match = variable_word_re.match(expected_word) + if match: + extracted[match.group(1)] = word + else: + assert word == expected_word + + return extracted + +def run_missing_executable(command, **kwargs): + """ + Instead of running a command, raise FileNotFoundError as if its executable + was missing. + """ + raise FileNotFoundError('dummy') + +class MockedCompletedProcess: + """ + Object with some fields similar to those of subprocess.CompletedProcess. + """ + def __init__(self, args, returncode=0, + stdout='some output', stderr='some error output', + text_output=True): + """ + Initialize MockedCompletedProcess. Convert strings to bytes if needed. + """ + self.args = args + self.returncode = returncode + + if type(stdout) is str and not text_output: + stdout = stdout.encode() + if type(stderr) is str and not text_output: + stderr = stderr.encode() + + self.stdout = stdout + self.stderr = stderr diff --git a/tests/test_build.py b/tests/test_build.py new file mode 100644 index 0000000..868594e --- /dev/null +++ b/tests/test_build.py @@ -0,0 +1,818 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior +# +# Available under the terms of Creative Commons Zero v1.0 Universal. + +# Enable using with Python 3.7. +from __future__ import annotations + +import pytest +import json +import shutil +import functools as ft + +from tempfile import TemporaryDirectory +from pathlib import Path, PurePosixPath +from hashlib import sha256 +from zipfile import ZipFile +from contextlib import contextmanager + +from jsonschema import ValidationError + +from hydrilla import _version, json_instances, versions +from hydrilla.json_instances import _schema_name_re +from hydrilla.builder import build, local_apt +from hydrilla.builder.common_errors import * + +from .helpers import * + +here = Path(__file__).resolve().parent + +expected_generated_by = { + 'name': 'hydrilla.builder', + 'version': _version.version +} + +orig_srcdir = here / 'source-package-example' + +index_obj = json_instances.read_instance(orig_srcdir / 'index.json') + +def read_files(*file_list): + """ + Take names of files under srcdir and return a dict that maps them to their + contents (as bytes). + """ + return dict((name, (orig_srcdir / name).read_bytes()) for name in file_list) + +dist_files = { + **read_files('LICENSES/CC0-1.0.txt', 'bye.js', 'hello.js', 'message.js'), + 'report.spdx': b'dummy spdx output' +} +src_files = { + **dist_files, + **read_files('README.txt', 'README.txt.license', '.reuse/dep5', + 'index.json') +} +extra_archive_files = { +} + +sha256_hashes = dict((name, sha256(contents).digest().hex()) + for name, contents in src_files.items()) + +del src_files['report.spdx'] + +expected_source_copyright = [{ + 'file': 'report.spdx', + 'sha256': sha256_hashes['report.spdx'] +}, { + 'file': 'LICENSES/CC0-1.0.txt', + 'sha256': sha256_hashes['LICENSES/CC0-1.0.txt'] +}] + +expected_resources = [{ + '$schema': 'https://hydrilla.koszko.org/schemas/api_resource_description-1.schema.json', + 'source_name': 'hello', + 'source_copyright': expected_source_copyright, + 'type': 'resource', + 'identifier': 'helloapple', + 'long_name': 'Hello Apple', + 'uuid': 'a6754dcb-58d8-4b7a-a245-24fd7ad4cd68', + 'version': [2021, 11, 10], + 'revision': 1, + 'description': 'greets an apple', + 'dependencies': [{'identifier': 'hello-message'}], + 'scripts': [{ + 'file': 'hello.js', + 'sha256': sha256_hashes['hello.js'] + }, { + 'file': 'bye.js', + 'sha256': sha256_hashes['bye.js'] + }], + 'generated_by': expected_generated_by +}, { + '$schema': 'https://hydrilla.koszko.org/schemas/api_resource_description-1.schema.json', + 'source_name': 'hello', + 'source_copyright': expected_source_copyright, + 'type': 'resource', + 'identifier': 'hello-message', + 'long_name': 'Hello Message', + 'uuid': '1ec36229-298c-4b35-8105-c4f2e1b9811e', + 'version': [2021, 11, 10], + 'revision': 2, + 'description': 'define messages for saying hello and bye', + 'dependencies': [], + 'scripts': [{ + 'file': 'message.js', + 'sha256': sha256_hashes['message.js'] + }], + 'generated_by': expected_generated_by +}] + +expected_mapping = { + '$schema': 'https://hydrilla.koszko.org/schemas/api_mapping_description-1.schema.json', + 'source_name': 'hello', + 'source_copyright': expected_source_copyright, + 'type': 'mapping', + 'identifier': 'helloapple', + 'long_name': 'Hello Apple', + 'uuid': '54d23bba-472e-42f5-9194-eaa24c0e3ee7', + 'version': [2021, 11, 10], + 'description': 'causes apple to get greeted on Hydrillabugs issue tracker', + 'payloads': { + 'https://hydrillabugs.koszko.org/***': { + 'identifier': 'helloapple' + }, + 'https://hachettebugs.koszko.org/***': { + 'identifier': 'helloapple' + } + }, + 'generated_by': expected_generated_by +} + +expected_source_description = { + '$schema': 'https://hydrilla.koszko.org/schemas/api_source_description-1.schema.json', + 'source_name': 'hello', + 'source_copyright': expected_source_copyright, + 'source_archives': { + 'zip': { + 'sha256': '!!!!value to fill during test!!!!', + } + }, + 'upstream_url': 'https://git.koszko.org/hydrilla-source-package-example', + 'definitions': [{ + 'type': 'mapping', + 'identifier': 'helloapple', + 'long_name': 'Hello Apple', + 'version': [2021, 11, 10], + }, { + 'type': 'resource', + 'identifier': 'helloapple', + 'long_name': 'Hello Apple', + 'version': [2021, 11, 10], + }, { + 'type': 'resource', + 'identifier': 'hello-message', + 'long_name': 'Hello Message', + 'version': [2021, 11, 10], + }], + 'generated_by': expected_generated_by +} + +expected = [expected_mapping, *expected_resources, expected_source_description] +expected_items = expected[:3] + +def run_reuse(command, **kwargs): + """ + Instead of running a 'reuse' command, check if 'mock_reuse_missing' file + exists under root directory. If yes, raise FileNotFoundError as if 'reuse' + command was missing. If not, check if 'README.txt.license' file exists + in the requested directory and return zero if it does. + """ + expected = ['reuse', '--root', '', + 'lint' if 'lint' in command else 'spdx'] + + root_path = Path(process_command(command, expected)['root']) + + if (root_path / 'mock_reuse_missing').exists(): + raise FileNotFoundError('dummy') + + is_reuse_compliant = (root_path / 'README.txt.license').exists() + + return MockedCompletedProcess(command, 1 - is_reuse_compliant, + stdout=f'dummy {expected[-1]} output', + text_output=kwargs.get('text')) + +mocked_piggybacked_archives = [ + PurePosixPath('apt/something.deb'), + PurePosixPath('apt/something.orig.tar.gz'), + PurePosixPath('apt/something.debian.tar.xz'), + PurePosixPath('othersystem/other-something.tar.gz') +] + +@pytest.fixture +def mock_piggybacked_apt_system(monkeypatch): + """Make local_apt.piggybacked_system() return a mocked result.""" + # We set 'td' to a temporary dir path further below. + td = None + + class MockedPiggybacked: + """Minimal mock of Piggybacked object.""" + package_license_files = [PurePosixPath('.apt-root/.../copyright')] + resource_must_depend = [{'identifier': 'apt-common-licenses'}] + + def resolve_file(path): + """ + For each path that starts with '.apt-root' return a valid dummy file + path. + """ + if path.parts[0] != '.apt-root': + return None + + (td / path.name).write_text(f'dummy {path.name}') + + return (td / path.name) + + def archive_files(): + """Yield some valid dummy file path tuples.""" + for desired_path in mocked_piggybacked_archives: + real_path = td / desired_path.name + real_path.write_text(f'dummy {desired_path.name}') + + yield desired_path, real_path + + @contextmanager + def mocked_piggybacked_system(piggyback_def, piggyback_files): + """Mock the execution of local_apt.piggybacked_system().""" + assert piggyback_def == { + 'system': 'apt', + 'distribution': 'nabia', + 'packages': ['somelib=1.0'], + 'dependencies': False + } + if piggyback_files is not None: + assert {str(path) for path in mocked_piggybacked_archives} == \ + {path.relative_to(piggyback_files).as_posix() + for path in piggyback_files.rglob('*') if path.is_file()} + + yield MockedPiggybacked + + monkeypatch.setattr(local_apt, 'piggybacked_system', + mocked_piggybacked_system) + + with TemporaryDirectory() as td: + td = Path(td) + yield + +@pytest.fixture +def sample_source(): + """Prepare a directory with sample Haketilo source package.""" + with TemporaryDirectory() as td: + sample_source = Path(td) / 'hello' + for name, contents in src_files.items(): + path = sample_source / name + path.parent.mkdir(parents=True, exist_ok=True) + path.write_bytes(contents) + + yield sample_source + +def collect(list): + """Decorate function by appending it to the specified list.""" + def decorator(function): + """The actual decorator that will be applied.""" + list.append(function) + return function + + return decorator + +variant_makers = [] + +@collect(variant_makers) +def sample_source_change_index_json(monkeypatch, sample_source): + """ + Return a non-standard path for index.json. Ensure parent directories exist. + """ + # Use a path under sample_source so that it gets auto-deleted after the + # test. Use a file under .git because .git is ignored by REUSE. + path = sample_source / '.git' / 'replacement.json' + path.parent.mkdir() + return path + +@collect(variant_makers) +def sample_source_add_comments(monkeypatch, sample_source): + """Add index.json comments that should be preserved.""" + for dictionary in index_obj, *index_obj['definitions'], *expected: + monkeypatch.setitem(dictionary, 'comment', 'index.json comment') + +@collect(variant_makers) +def sample_source_remove_spdx(monkeypatch, sample_source): + """Remove spdx report generation.""" + monkeypatch.delitem(index_obj, 'reuse_generate_spdx_report') + + pred = lambda ref: ref['file'] != 'report.spdx' + copy_refs_in = list(filter(pred, index_obj['copyright'])) + monkeypatch.setitem(index_obj, 'copyright', copy_refs_in) + + copy_refs_out = list(filter(pred, expected_source_copyright)) + for obj in expected: + monkeypatch.setitem(obj, 'source_copyright', copy_refs_out) + + monkeypatch.delitem(dist_files, 'report.spdx') + + # To verify that reuse does not get called now, make mocked subprocess.run() + # raise an error if called. + (sample_source / 'mock_reuse_missing').touch() + +@collect(variant_makers) +def sample_source_remove_additional_files(monkeypatch, sample_source): + """Use default value ([]) for 'additionall_files' property.""" + monkeypatch.delitem(index_obj, 'additional_files') + + for name in 'README.txt', 'README.txt.license', '.reuse/dep5': + monkeypatch.delitem(src_files, name) + +@collect(variant_makers) +def sample_source_remove_script(monkeypatch, sample_source): + """Use default value ([]) for 'scripts' property in one of the resources.""" + monkeypatch.delitem(index_obj['definitions'][2], 'scripts') + + monkeypatch.setitem(expected_resources[1], 'scripts', []) + + for files in dist_files, src_files: + monkeypatch.delitem(files, 'message.js') + +@collect(variant_makers) +def sample_source_remove_payloads(monkeypatch, sample_source): + """Use default value ({}) for 'payloads' property in mapping.""" + monkeypatch.delitem(index_obj['definitions'][0], 'payloads') + + monkeypatch.setitem(expected_mapping, 'payloads', {}) + +@collect(variant_makers) +def sample_source_remove_uuids(monkeypatch, sample_source): + """Don't use UUIDs (they are optional).""" + for definition in index_obj['definitions']: + monkeypatch.delitem(definition, 'uuid') + + for description in expected: + if 'uuid' in description: + monkeypatch.delitem(description, 'uuid') + +@collect(variant_makers) +def sample_source_add_extra_props(monkeypatch, sample_source): + """Add some unrecognized properties that should be stripped.""" + to_process = [index_obj] + while to_process: + processed = to_process.pop() + + if type(processed) is list: + to_process.extend(processed) + elif type(processed) is dict and 'spurious_property' not in processed: + to_process.extend(v for k, v in processed.items() + if k != 'payloads') + monkeypatch.setitem(processed, 'spurious_property', 'some_value') + +@collect(variant_makers) +def sample_source_make_version_2(monkeypatch, sample_source, + expected_documents_to_modify=[]): + """Increase sources' schema version from 1 to 2.""" + for obj in index_obj, *expected_documents_to_modify: + monkeypatch.setitem(obj, '$schema', obj['$schema'].replace('1', '2')) + +permission_variant_makers = [] + +@collect(permission_variant_makers) +def sample_source_bool_perm_ignored(permission, monkeypatch, sample_source, + value=True): + """ + Specify a boolean permissions in sources, but keep sources' schema version + at 1. + """ + for definition in index_obj['definitions']: + monkeypatch.setitem(definition, 'permissions', {permission: value}) + +@collect(permission_variant_makers) +def sample_source_bool_perm(permission, monkeypatch, sample_source): + """Specify a boolean permission in sources.""" + sample_source_bool_perm_ignored(permission, monkeypatch, sample_source) + sample_source_make_version_2(monkeypatch, sample_source, expected_items) + + for obj in expected_items: + monkeypatch.setitem(obj, 'permissions', {permission: True}) + +@collect(permission_variant_makers) +def sample_source_bool_perm_defaults(permission, monkeypatch, sample_source): + """ + Specify a boolean permission in sources but use the default value ("False"). + """ + sample_source_bool_perm_ignored(permission, monkeypatch, sample_source, + value=False) + sample_source_make_version_2(monkeypatch, sample_source) + +for permission in 'cors_bypass', 'eval': + for variant_maker in permission_variant_makers: + variant_makers.append(ft.partial(variant_maker, permission)) + +@collect(variant_makers) +def sample_source_req_mappings_ignored(monkeypatch, sample_source, + value=[{'identifier': 'mapping-dep'}]): + """ + Specify dependencies on mappings, but keep sources' schema version at 1. + """ + for definition in index_obj['definitions']: + monkeypatch.setitem(definition, 'required_mappings', value); + +@collect(variant_makers) +def sample_source_req_mappings(monkeypatch, sample_source): + """Specify dependencies on mappings.""" + sample_source_req_mappings_ignored(monkeypatch, sample_source) + sample_source_make_version_2(monkeypatch, sample_source, expected_items) + + for obj in expected_items: + monkeypatch.setitem(obj, 'required_mappings', + [{'identifier': 'mapping-dep'}]) + +@collect(variant_makers) +def sample_source_req_mappings_defaults(monkeypatch, sample_source): + """Specify dependencies of a mapping, but use the default value ("[]").""" + sample_source_req_mappings_ignored(monkeypatch, sample_source, value=[]) + sample_source_make_version_2(monkeypatch, sample_source) + +@collect(variant_makers) +def sample_source_combined_def(monkeypatch, sample_source): + """Define mapping and resource together.""" + sample_source_make_version_2(monkeypatch, sample_source) + + mapping_def = index_obj['definitions'][0] + resource_defs = index_obj['definitions'][1:3] + + item_defs_shortened = [mapping_def, resource_defs[1]] + monkeypatch.setitem(index_obj, 'definitions', item_defs_shortened) + + monkeypatch.setitem(mapping_def, 'type', 'mapping_and_resource') + + new_mapping_ver = [*expected_mapping['version'], 1] + monkeypatch.setitem(mapping_def, 'revision', 1) + monkeypatch.setitem(expected_mapping, 'version', new_mapping_ver) + + for prop in 'scripts', 'dependencies': + monkeypatch.setitem(mapping_def, prop, resource_defs[0][prop]) + + monkeypatch.setitem(expected_resources[0], 'uuid', mapping_def['uuid']) + monkeypatch.setitem(expected_resources[0], 'description', + mapping_def['description']) + + monkeypatch.setitem(expected_source_description['definitions'][0], + 'version', new_mapping_ver) + +@collect(variant_makers) +def sample_source_minmax_haketilo_ver_ignored(monkeypatch, sample_source, + min_ver=[1, 2], max_ver=[1, 2]): + """ + Specify version constraints on Haketilo, but keep sources' schema version at + 1. + """ + mapping_def = index_obj['definitions'][0] + monkeypatch.setitem(mapping_def, 'min_haketilo_version', min_ver) + monkeypatch.setitem(mapping_def, 'max_haketilo_version', max_ver) + +@collect(variant_makers) +def sample_source_minmax_haketilo_ver(monkeypatch, sample_source): + """Specify version constraints on Haketilo.""" + sample_source_minmax_haketilo_ver_ignored(monkeypatch, sample_source) + sample_source_make_version_2(monkeypatch, sample_source, [expected_mapping]) + + monkeypatch.setitem(expected_mapping, 'min_haketilo_version', [1, 2]) + monkeypatch.setitem(expected_mapping, 'max_haketilo_version', [1, 2]) + +@collect(variant_makers) +def sample_source_minmax_haketilo_ver_default(monkeypatch, sample_source): + """Specify version constraints on Haketilo, but use default values.""" + sample_source_minmax_haketilo_ver_ignored(monkeypatch, sample_source, + min_ver=[1], max_ver=[65536]) + sample_source_make_version_2(monkeypatch, sample_source) + +piggyback_archive_names = [ + 'apt/something.deb', + 'apt/something.orig.tar.gz', + 'apt/something.debian.tar.xz', + 'othersystem/other-something.tar.gz' +] + +@collect(variant_makers) +def sample_source_add_piggyback_ignored(monkeypatch, sample_source, + extra_build_args={}): + """ + Add piggybacked foreign system packages, but keep sources' schema version at + 1. + """ + old_build = build.Build + new_build = lambda *a, **kwa: old_build(*a, **kwa, **extra_build_args) + monkeypatch.setattr(build, 'Build', new_build) + + monkeypatch.setitem(index_obj, 'piggyback_on', { + 'system': 'apt', + 'distribution': 'nabia', + 'packages': ['somelib=1.0'], + 'dependencies': False + }) + +@collect(variant_makers) +def sample_source_add_piggyback(monkeypatch, sample_source, + extra_build_args={}): + """Add piggybacked foreign system packages.""" + sample_source_add_piggyback_ignored\ + (monkeypatch, sample_source, extra_build_args) + + sample_source_make_version_2(monkeypatch, sample_source) + + new_refs = {} + for name in '.apt-root/.../copyright', '.apt-root/.../script.js': + contents = f'dummy {PurePosixPath(name).name}'.encode() + digest = sha256(contents).digest().hex() + monkeypatch.setitem(dist_files, name, contents) + monkeypatch.setitem(sha256_hashes, name, digest) + new_refs[PurePosixPath(name).name] = {'file': name, 'sha256': digest} + + new_list = [*expected_source_copyright, new_refs['copyright']] + for obj in expected: + monkeypatch.setitem(obj, 'source_copyright', new_list) + + for obj in expected_resources: + new_list = [{'identifier': 'apt-common-licenses'}, *obj['dependencies']] + monkeypatch.setitem(obj, 'dependencies', new_list) + + for obj in index_obj['definitions'][1], expected_resources[0]: + new_list = [new_refs['script.js'], *obj['scripts']] + monkeypatch.setitem(obj, 'scripts', new_list) + + for name in piggyback_archive_names: + path = PurePosixPath('hello.foreign-packages') / name + monkeypatch.setitem(extra_archive_files, str(path), + f'dummy {path.name}'.encode()) + +def prepare_foreign_packages_dir(path): + """ + Put some dummy archive in the directory so that it can be passed to + piggybacked_system(). + """ + for name in piggyback_archive_names: + archive_path = path / name + archive_path.parent.mkdir(parents=True, exist_ok=True) + archive_path.write_text(f'dummy {archive_path.name}') + +@collect(variant_makers) +def sample_source_add_piggyback_pass_archives(monkeypatch, sample_source): + """ + Add piggybacked foreign system packages, use pre-downloaded foreign package + archives (have Build() find them in their default directory). + """ + # Dir next to 'sample_source' will also be gc'd by sample_source() fixture. + foreign_packages_dir = sample_source.parent / 'arbitrary-name' + + prepare_foreign_packages_dir(foreign_packages_dir) + + sample_source_add_piggyback(monkeypatch, sample_source, + {'piggyback_files': foreign_packages_dir}) + +@collect(variant_makers) +def sample_source_add_piggyback_find_archives(monkeypatch, sample_source): + """ + Add piggybacked foreign system packages, use pre-downloaded foreign package + archives (specify their directory as argument to Build()). + """ + # Dir next to 'sample_source' will also be gc'd by sample_source() fixture. + foreign_packages_dir = sample_source.parent / 'hello.foreign-packages' + + prepare_foreign_packages_dir(foreign_packages_dir) + + sample_source_add_piggyback(monkeypatch, sample_source) + +@collect(variant_makers) +def sample_source_add_piggyback_no_download(monkeypatch, sample_source, + pass_directory_to_build=False): + """ + Add piggybacked foreign system packages, use pre-downloaded foreign package + archives. + """ + # Use a dir next to 'sample_source'; have it gc'd by sample_source fixture. + if pass_directory_to_build: + foreign_packages_dir = sample_source.parent / 'arbitrary-name' + else: + foreign_packages_dir = sample_source.parent / 'hello.foreign-packages' + + prepare_foreign_packages_dir(foreign_packages_dir) + + sample_source_add_piggyback(monkeypatch, sample_source) + +@pytest.fixture(params=[lambda m, s: None, *variant_makers]) +def sample_source_make_variants(request, monkeypatch, sample_source, + mock_piggybacked_apt_system): + """ + Prepare a directory with sample Haketilo source package in multiple slightly + different versions (all correct). Return an index.json path that should be + used when performing test build. + """ + index_path = request.param(monkeypatch, sample_source) or Path('index.json') + + index_text = json.dumps(index_obj) + + (sample_source / index_path).write_text(index_text) + + monkeypatch.setitem(src_files, 'index.json', index_text.encode()) + + return index_path + +def try_validate(as_what, instance): + """ + Select the right JSON schema. Return without errors only if the instance + validates against it. + """ + schema_fmt = f'{as_what}-{{}}.schema.json' + json_instances.validate_instance(instance, schema_fmt) + +@pytest.mark.subprocess_run(build, run_reuse) +@pytest.mark.usefixtures('mock_subprocess_run') +def test_build(sample_source, sample_source_make_variants, tmpdir): + """Build the sample source package and verify the produced files.""" + index_json_path = sample_source_make_variants + + # First, build the package + build.Build(sample_source, index_json_path).write_package_files(tmpdir) + + # Verify directories under destination directory + assert {'file', 'resource', 'mapping', 'source'} == \ + set([path.name for path in tmpdir.iterdir()]) + + # Verify files under 'file/' + file_dir = tmpdir / 'file' / 'sha256' + + for name, contents in dist_files.items(): + dist_file_path = file_dir / sha256_hashes[name] + assert dist_file_path.is_file() + assert dist_file_path.read_bytes() == contents + + assert {p.name for p in file_dir.iterdir()} == \ + {sha256_hashes[name] for name in dist_files.keys()} + + # Verify files under 'resource/' + resource_dir = tmpdir / 'resource' + + assert {rj['identifier'] for rj in expected_resources} == \ + {path.name for path in resource_dir.iterdir()} + + for resource_json in expected_resources: + subdir = resource_dir / resource_json['identifier'] + ver_str = versions.version_string(resource_json['version']) + assert [ver_str] == [path.name for path in subdir.iterdir()] + + assert json.loads((subdir / ver_str).read_text()) == resource_json + + try_validate('api_resource_description', resource_json) + + # Verify files under 'mapping/' + mapping_dir = tmpdir / 'mapping' + assert ['helloapple'] == [path.name for path in mapping_dir.iterdir()] + + subdir = mapping_dir / 'helloapple' + + ver_str = versions.version_string(expected_mapping['version']) + assert [ver_str] == [path.name for path in subdir.iterdir()] + + assert json.loads((subdir / ver_str).read_text()) == expected_mapping + + try_validate('api_mapping_description', expected_mapping) + + # Verify files under 'source/' + source_dir = tmpdir / 'source' + assert {'hello.json', 'hello.zip'} == \ + {path.name for path in source_dir.iterdir()} + + archive_files = {**dict((f'hello/{name}', contents) + for name, contents in src_files.items()), + **extra_archive_files} + + with ZipFile(source_dir / 'hello.zip', 'r') as archive: + print(archive.namelist()) + assert len(archive.namelist()) == len(archive_files) + + for name, contents in archive_files.items(): + assert archive.read(name) == contents + + zip_ref = expected_source_description['source_archives']['zip'] + zip_contents = (source_dir / 'hello.zip').read_bytes() + zip_ref['sha256'] = sha256(zip_contents).digest().hex() + + assert json.loads((source_dir / 'hello.json').read_text()) == \ + expected_source_description + + try_validate('api_source_description', expected_source_description) + +error_makers = [] + +@collect(error_makers) +def sample_source_error_missing_file(monkeypatch, sample_source): + """ + Modify index.json to expect missing report.spdx file and cause an error. + """ + monkeypatch.delitem(index_obj, 'reuse_generate_spdx_report') + return FileReferenceError, '^referenced_file_report.spdx_missing$' + +@collect(error_makers) +def sample_source_error_index_schema(monkeypatch, sample_source): + """Modify index.json to be incompliant with the schema.""" + monkeypatch.delitem(index_obj, 'definitions') + return ValidationError, + +@collect(error_makers) +def sample_source_error_unknown_index_schema(monkeypatch, sample_source): + """Modify index.json to be use a not-yet-released schema.""" + schema_id = \ + 'https://hydrilla.koszko.org/schemas/package_source-65536.schema.json' + monkeypatch.setitem(index_obj, "$schema", schema_id) + return hydrilla_util.UnknownSchemaError, \ + r'^unknown_schema_package_source_.*/hello/index\.json$' + +@collect(error_makers) +def sample_source_error_bad_comment(monkeypatch, sample_source): + """Modify index.json to have an invalid '/' in it.""" + return json.JSONDecodeError, '^bad_comment: .*', \ + json.dumps(index_obj) + '/something\n' + +@collect(error_makers) +def sample_source_error_bad_json(monkeypatch, sample_source): + """Modify index.json to not be valid json even after comment stripping.""" + return json.JSONDecodeError, '', json.dumps(index_obj) + '???\n' + +@collect(error_makers) +def sample_source_error_missing_reuse(monkeypatch, sample_source): + """Cause mocked reuse process invocation to fail with FileNotFoundError.""" + (sample_source / 'mock_reuse_missing').touch() + return build.ReuseError, '^couldnt_execute_reuse_is_it_installed$' + +@collect(error_makers) +def sample_source_error_missing_license(monkeypatch, sample_source): + """Remove a file to make package REUSE-incompliant.""" + (sample_source / 'README.txt.license').unlink() + + error_regex = """^\ +command_reuse --root \\S+ lint_failed + +STDOUT_OUTPUT_heading + +dummy lint output + +STDERR_OUTPUT_heading + +some error output\ +$\ +""" + + return build.ReuseError, error_regex + +@collect(error_makers) +def sample_source_error_file_outside(monkeypatch, sample_source): + """Make index.json illegally reference a file outside srcdir.""" + new_list = [*index_obj['copyright'], {'file': '../abc'}] + monkeypatch.setitem(index_obj, 'copyright', new_list) + return FileReferenceError, '^path_contains_double_dot_\\.\\./abc$' + +@collect(error_makers) +def sample_source_error_reference_itself(monkeypatch, sample_source): + """Make index.json illegally reference index.json.""" + new_list = [*index_obj['copyright'], {'file': 'index.json'}] + monkeypatch.setitem(index_obj, 'copyright', new_list) + return FileReferenceError, '^loading_reserved_index_json$' + +@collect(error_makers) +def sample_source_error_report_excluded(monkeypatch, sample_source): + """ + Make index.json require generation of report.spdx but don't include it among + copyright files. + """ + new_list = [file_ref for file_ref in index_obj['copyright'] + if file_ref['file'] != 'report.spdx'] + monkeypatch.setitem(index_obj, 'copyright', new_list) + return FileReferenceError, '^report_spdx_not_in_copyright_list$' + +@collect(error_makers) +def sample_source_error_combined_unsupported(monkeypatch, sample_source): + """ + Define mapping and resource together but leave source schema version at 1.x + where this is unsupported. + """ + mapping_def = index_obj['definitions'][0] + monkeypatch.setitem(mapping_def, 'type', 'mapping_and_resource') + + return ValidationError, + +@pytest.fixture(params=error_makers) +def sample_source_make_errors(request, monkeypatch, sample_source): + """ + Prepare a directory with sample Haketilo source package in multiple slightly + broken versions. Return an error type that should be raised when running + test build. + """ + error_type, error_regex, index_text = \ + [*request.param(monkeypatch, sample_source), '', ''][0:3] + + index_text = index_text or json.dumps(index_obj) + + (sample_source / 'index.json').write_text(index_text) + + monkeypatch.setitem(src_files, 'index.json', index_text.encode()) + + return error_type, error_regex + +@pytest.mark.subprocess_run(build, run_reuse) +@pytest.mark.usefixtures('mock_subprocess_run') +def test_build_error(tmpdir, sample_source, sample_source_make_errors): + """Try building the sample source package and verify generated errors.""" + error_type, error_regex = sample_source_make_errors + + dstdir = Path(tmpdir) / 'dstdir' + dstdir.mkdir(exist_ok=True) + + with pytest.raises(error_type, match=error_regex): + build.Build(sample_source, Path('index.json'))\ + .write_package_files(dstdir) diff --git a/tests/test_item_infos.py b/tests/test_item_infos.py new file mode 100644 index 0000000..9de3c96 --- /dev/null +++ b/tests/test_item_infos.py @@ -0,0 +1,527 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior +# +# Available under the terms of Creative Commons Zero v1.0 Universal. + +import pytest +import pathlib +import re +import dataclasses as dc + +from immutables import Map + +from hydrilla import item_infos, versions, json_instances +from hydrilla.exceptions import HaketiloException + +def test_make_item_refs_seq_empty(): + """....""" + assert item_infos.make_item_refs_seq([]) == () + +def test_get_item_refs_seq_nonempty(): + """....""" + ref_objs = [{'identifier': 'abc'}, {'identifier': 'def'}] + + result = item_infos.make_item_refs_seq(ref_objs) + + assert type(result) is tuple + assert [ref.identifier for ref in result] == ['abc', 'def'] + +@pytest.fixture +def mock_make_item_refs_seq(monkeypatch): + """....""" + def mocked_make_item_refs_seq(ref_objs): + """....""" + assert ref_objs == getattr( + mocked_make_item_refs_seq, + 'expected', + [{'identifier': 'abc'}, {'identifier': 'def'}] + ) + + return (item_infos.ItemRef('abc'), item_infos.ItemRef('def')) + + monkeypatch.setattr(item_infos, 'make_item_refs_seq', + mocked_make_item_refs_seq) + + return mocked_make_item_refs_seq + +def test_make_required_mappings_compat_too_low(): + """....""" + assert item_infos.make_required_mappings('whatever', 1) == () + +@pytest.mark.usefixtures('mock_make_item_refs_seq') +def test_make_required_mappings_compat_ok(): + """....""" + ref_objs = [{'identifier': 'abc'}, {'identifier': 'def'}] + + assert item_infos.make_required_mappings(ref_objs, 2) == \ + (item_infos.ItemRef('abc'), item_infos.ItemRef('def')) + +def test_make_file_refs_seq_empty(): + """....""" + assert item_infos.make_file_refs_seq([]) == () + +def test_make_file_refs_seq_nonempty(): + """....""" + ref_objs = [{'file': 'abc', 'sha256': 'dummy_hash1'}, + {'file': 'def', 'sha256': 'dummy_hash2'}] + + result = item_infos.make_file_refs_seq(ref_objs) + + assert type(result) is tuple + assert [ref.name for ref in result] == ['abc', 'def'] + assert [ref.sha256 for ref in result] == ['dummy_hash1', 'dummy_hash2'] + +def test_generated_by_make_empty(): + """....""" + assert item_infos.GeneratedBy.make(None) == None + +@pytest.mark.parametrize('_in, out_version', [ + ({'name': 'abc'}, None), + ({'name': 'abc', 'version': '1.1.1'}, '1.1.1') +]) +def test_generated_by_make_nonempty(_in, out_version): + """....""" + generated_by = item_infos.GeneratedBy.make(_in) + + assert generated_by.name == 'abc' + assert generated_by.version == out_version + +def test_load_item_info(monkeypatch): + """....""" + def mocked_read_instance(instance_or_path): + """....""" + assert instance_or_path == 'dummy_path' + return 'dummy_instance' + + monkeypatch.setattr(json_instances, 'read_instance', mocked_read_instance) + + def mocked_validate_instance(instance, schema_fmt): + """....""" + assert instance == 'dummy_instance' + assert schema_fmt == 'api_exotictype_description-{}.schema.json' + return 7 + + monkeypatch.setattr(json_instances, 'validate_instance', + mocked_validate_instance) + + class MockedLoadedType: + """....""" + def make(instance, schema_compat, repository): + """....""" + assert instance == 'dummy_instance' + assert schema_compat == 7 + assert repository == 'somerepo' + return 'dummy_item_info' + + type_name = 'exotictype' + + assert item_infos._load_item_info( + MockedLoadedType, + 'dummy_path', + 'somerepo' + ) == 'dummy_item_info' + +def test_make_payloads(monkeypatch): + """....""" + payloads_obj = {'http*://example.com/': {'identifier': 'someresource'}} + + def mocked_parse_pattern(pattern): + """....""" + assert pattern == 'http*://example.com/' + + yield 'dummy_parsed_pattern_1' + yield 'dummy_parsed_pattern_2' + + monkeypatch.setattr(item_infos, 'parse_pattern', mocked_parse_pattern) + + assert item_infos.make_payloads(payloads_obj) == Map({ + 'dummy_parsed_pattern_1': item_infos.ItemRef('someresource'), + 'dummy_parsed_pattern_2': item_infos.ItemRef('someresource') + }) + +@pytest.mark.parametrize('info_mod, in_mod', [ + ({}, {}), + ({'uuid': 'dummy_uuid'}, {}), + ({}, {'uuid': 'dummy_uuid'}), + ({'uuid': 'dummy_uuid'}, {'uuid': 'dummy_uuid'}), + ({}, {'identifier': 'abc', '_initialized': True}), + ({}, {'_by_version': Map({(1, 2): 'dummy_old_info'})}) +]) +def test_versioned_item_info_register(info_mod, in_mod): + """....""" + class DummyInfo: + """....""" + uuid = None + identifier = 'abc' + version = (1, 2) + + for name, value in info_mod.items(): + setattr(DummyInfo, name, value) + + in_fields = { + 'uuid': None, + 'identifier': '', + '_by_version': Map(), + '_initialized': False, + **in_mod + } + out_fields = { + 'uuid': DummyInfo.uuid or in_mod.get('uuid'), + 'identifier': DummyInfo.identifier, + '_by_version': Map({(1, 2): DummyInfo}), + '_initialized': True + } + + versioned = item_infos.VersionedItemInfo(**in_fields) + new_versioned = versioned.register(DummyInfo) + + assert dc.asdict(versioned) == in_fields + assert dc.asdict(new_versioned) == out_fields + +def test_versioned_item_info_register_bad_uuid(): + """....""" + versioned = item_infos.VersionedItemInfo( + identifier='abc', + uuid='old_uuid' + ) + + class DummyInfo: + """....""" + uuid = 'new_uuid' + identifier = 'abc' + version = (1, 2) + + with pytest.raises(HaketiloException, match='^uuid_mismatch_abc$'): + versioned.register(DummyInfo) + +@pytest.mark.parametrize('previous_registrations', [ + Map(), + Map({(1, 2): 'dummy_info'}) +]) +def test_versioned_item_info_unregister(previous_registrations): + """....""" + versioned = item_infos.VersionedItemInfo( + identifier = 'abc', + _by_version = previous_registrations + ) + + assert versioned.unregister((1, 2)) == \ + dc.replace(versioned, _by_version=Map()) + +@pytest.mark.parametrize('registrations, out', [ + (Map(), True), + (Map({(1, 2): 'dummy_info'}), False) +]) +def test_versioned_item_info_is_empty(registrations, out): + """....""" + versioned = item_infos.VersionedItemInfo( + identifier = 'abc', + _by_version = registrations + ) + + assert versioned.is_empty() == out + +@pytest.mark.parametrize('versions, out', [ + ([(1, 2), (1, 2, 1), (0, 9999, 4), (1, 0, 2)], (1, 2, 1)), + ([(1, 2)], (1, 2)) +]) +def test_versioned_item_info_newest_version(versions, out): + """....""" + versioned = item_infos.VersionedItemInfo( + identifier = 'abc', + _by_version = Map((ver, 'dummy_info') for ver in versions) + ) + + assert versioned.newest_version() == out + +def test_versioned_item_info_newest_version_bad(monkeypatch): + """....""" + monkeypatch.setattr(item_infos.VersionedItemInfo, 'newest_version', + lambda self: 'dummy_ver1') + + versioned = item_infos.VersionedItemInfo( + identifier = 'abc', + _by_version = Map(dummy_ver1='dummy_info1', dummy_ver2='dummy_info2') + ) + + assert versioned.get_newest() == 'dummy_info1' + +def test_versioned_item_info_get_by_ver(): + """....""" + versioned = item_infos.VersionedItemInfo( + identifier = 'abc', + _by_version = Map({(1, 2): 'dummy_info1', (3, 4, 5): 'dummy_info2'}) + ) + + assert versioned.get_by_ver(range(1, 3)) == 'dummy_info1' + +@pytest.mark.parametrize('versions, out', [ + ([(1, 2), (0, 999, 4), (1, 0, 2)], ['(0, 999, 4)', '(1, 0, 2)', '(1, 2)']), + ([], []) +]) +def test_versioned_item_get_all(versions, out): + """....""" + versioned = item_infos.VersionedItemInfo( + identifier = 'abc', + _by_version = Map((ver, str(ver)) for ver in versions) + ) + + assert [*versioned.get_all()] == out + +sample_resource_obj = { + 'source_name': 'somesource', + 'source_copyright': [{'file': 'ABC', 'sha256': 'dummy_sha256'}], + 'version': [1, 2, 3, 0], + 'identifier': 'someid', + 'uuid': None, + 'long_name': 'Some Thing', + 'required_mappings': [{'identifier': 'required1'}], + 'generated_by': {'name': 'sometool', 'version': '1.1.1'}, + 'revision': 4, + 'dependencies': [{'identifier': 'abc'}, {'identifier': 'def'}], + 'scripts': [{'file': 'ABC', 'sha256': 'dummy_sha256'}] +} + +sample_mapping_obj = { + **sample_resource_obj, + 'payloads': { + 'https://example.com/': {'identifier': 'someresource'} + } +} + +del sample_mapping_obj['dependencies'] +del sample_mapping_obj['scripts'] + +@pytest.fixture(scope='session') +def sample_resource_info(): + """....""" + return item_infos.ResourceInfo( + repository = 'somerepo', + source_name = 'somesource', + source_copyright = (item_infos.FileRef('ABC', 'dummy_sha256'),), + version = (1, 2, 3), + identifier = 'someid', + uuid = None, + long_name = 'Some Thing', + required_mappings = (item_infos.ItemRef('required1'),), + generated_by = item_infos.GeneratedBy('sometool', '1.1.1'), + revision = 4, + dependencies = (item_infos.ItemRef('abc'), + item_infos.ItemRef('def')), + scripts = (item_infos.FileRef('ABC', 'dummy_sha256'),) + ) + +@pytest.fixture(scope='session') +def sample_mapping_info(): + """....""" + payloads = Map({'https://example.com/': item_infos.ItemRef('someresource')}) + + return item_infos.MappingInfo( + repository = 'somerepo', + source_name = 'somesource', + source_copyright = (item_infos.FileRef('ABC', 'dummy_sha256'),), + version = (1, 2, 3), + identifier = 'someid', + uuid = None, + long_name = 'Some Thing', + required_mappings = (item_infos.ItemRef('required1'),), + generated_by = item_infos.GeneratedBy('sometool', '1.1.1'), + payloads = payloads + ) + +@pytest.fixture(scope='session') +def sample_info_base_init_kwargs(sample_resource_info): + kwargs = {} + for field_name in item_infos.ItemInfoBase.__annotations__.keys(): + kwargs[field_name] = getattr(sample_resource_info, field_name) + + return Map(kwargs) + +@pytest.fixture +def mock_version_string(monkeypatch): + """....""" + def mocked_version_string(version, revision=None): + """....""" + assert version == (1, 2, 3) + assert revision in (None, 4) + return '1.2.3' if revision is None else '1.2.3-4' + + monkeypatch.setattr(versions, 'version_string', mocked_version_string) + +@pytest.mark.usefixtures('mock_version_string') +def test_item_info_path(sample_resource_info): + """....""" + assert sample_resource_info.path_relative_to_type() == 'someid/1.2.3' + assert sample_resource_info.path() == 'resource/someid/1.2.3' + +@pytest.mark.usefixtures('mock_version_string') +def test_resource_info_versioned_identifier(sample_resource_info, monkeypatch): + """....""" + monkeypatch.setattr(item_infos.ItemInfoBase, 'versioned_identifier', + lambda self: '') + + assert sample_resource_info.versioned_identifier == '-4' + +@pytest.mark.usefixtures('mock_version_string') +def test_mapping_info_versioned_identifier(sample_mapping_info): + """....""" + assert sample_mapping_info.versioned_identifier == 'someid-1.2.3' + +@pytest.fixture +def mock_make_file_refs_seq(monkeypatch): + """....""" + def mocked_make_file_refs_seq(ref_objs): + """....""" + assert ref_objs == getattr( + mocked_make_file_refs_seq, + 'expected', + [{'file': 'ABC', 'sha256': 'dummy_sha256'}] + ) + + return (item_infos.FileRef(name='ABC', sha256='dummy_sha256'),) + + monkeypatch.setattr(item_infos, 'make_file_refs_seq', + mocked_make_file_refs_seq) + + return mocked_make_file_refs_seq + +@pytest.mark.parametrize('missing_prop', [ + 'required_mappings', + 'generated_by', + 'uuid' +]) +@pytest.mark.usefixtures('mock_make_item_refs_seq', 'mock_make_file_refs_seq') +def test_item_info_get_base_init_kwargs( + missing_prop, + monkeypatch, + sample_resource_info, + sample_info_base_init_kwargs, + mock_make_file_refs_seq +): + """....""" + monkeypatch.delitem(sample_resource_obj, missing_prop) + + def mocked_normalize_version(version): + """....""" + assert version == [1, 2, 3, 0] + + return (1, 2, 3) + + monkeypatch.setattr(versions, 'normalize_version', mocked_normalize_version) + + def mocked_make_required_mappings(ref_objs, schema_compat): + """....""" + if missing_prop == 'required_mappings': + assert ref_objs == [] + else: + assert ref_objs == [{'identifier': 'required1'}] + + assert schema_compat == 2 + + return (item_infos.ItemRef('required1'),) + + monkeypatch.setattr(item_infos, 'make_required_mappings', + mocked_make_required_mappings) + + def mocked_generated_by_make(generated_by_obj): + """....""" + if missing_prop == 'generated_by': + assert generated_by_obj == None + else: + assert generated_by_obj == {'name': 'sometool', 'version': '1.1.1'} + + return item_infos.GeneratedBy(name='sometool', version='1.1.1') + + monkeypatch.setattr(item_infos.GeneratedBy, 'make', + mocked_generated_by_make) + + expected = sample_info_base_init_kwargs + if missing_prop == 'uuid': + expected = expected.set('uuid', None) + + Base = item_infos.ItemInfoBase + assert Base._get_base_init_kwargs(sample_resource_obj, 2, 'somerepo') == \ + expected + +@pytest.fixture +def mock_get_base_init_kwargs(monkeypatch, sample_info_base_init_kwargs): + """....""" + def mocked_get_base_init_kwargs(item_obj, schema_compat, repository): + """....""" + assert schema_compat == 2 + assert item_obj['identifier'] == 'someid' + assert repository == 'somerepo' + + return sample_info_base_init_kwargs + + monkeypatch.setattr(item_infos.ItemInfoBase, '_get_base_init_kwargs', + mocked_get_base_init_kwargs) + +@pytest.mark.parametrize('missing_prop', ['dependencies', 'scripts']) +@pytest.mark.usefixtures('mock_get_base_init_kwargs') +def test_resource_info_make( + missing_prop, + monkeypatch, + sample_resource_info, + mock_make_item_refs_seq, + mock_make_file_refs_seq +): + """....""" + _in = sample_resource_obj + monkeypatch.delitem(_in, missing_prop) + + if missing_prop == 'dependencies': + mock_make_item_refs_seq.expected = [] + elif missing_prop == 'scripts': + mock_make_file_refs_seq.expected = [] + + assert item_infos.ResourceInfo.make(_in, 2, 'somerepo') == \ + sample_resource_info + +@pytest.mark.parametrize('missing_payloads', [True, False]) +@pytest.mark.usefixtures('mock_get_base_init_kwargs', 'mock_make_item_refs_seq') +def test_mapping_info_make(missing_payloads, monkeypatch, sample_mapping_info): + """....""" + _in = sample_mapping_obj + if missing_payloads: + monkeypatch.delitem(_in, 'payloads') + + def mocked_make_payloads(payloads_obj): + """....""" + if missing_payloads: + assert payloads_obj == {} + else: + assert payloads_obj == \ + {'https://example.com/': {'identifier': 'someresource'}} + + return Map({'https://example.com/': item_infos.ItemRef('someresource')}) + + monkeypatch.setattr(item_infos, 'make_payloads', mocked_make_payloads) + + assert item_infos.MappingInfo.make(_in, 2, 'somerepo') == \ + sample_mapping_info + +@pytest.mark.parametrize('type_name', ['ResourceInfo', 'MappingInfo']) +def test_make_item_info(type_name, monkeypatch): + """....""" + info_type = getattr(item_infos, type_name) + + def mocked_load_item_info(_info_type, instance_or_path, repository): + """....""" + assert _info_type == info_type + assert instance_or_path == 'dummy_path' + assert repository == 'somerepo' + return 'dummy_info' + + monkeypatch.setattr(item_infos, '_load_item_info', mocked_load_item_info) + + assert info_type.load('dummy_path', 'somerepo') == 'dummy_info' + +def test_resource_info_hash(sample_resource_info): + """....""" + hash(sample_resource_info) + +def test_mapping_info_hash(sample_mapping_info): + """....""" + hash(sample_mapping_info) diff --git a/tests/test_json_instances.py b/tests/test_json_instances.py new file mode 100644 index 0000000..f5bd270 --- /dev/null +++ b/tests/test_json_instances.py @@ -0,0 +1,194 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior +# +# Available under the terms of Creative Commons Zero v1.0 Universal. + +import pytest +import re + +from hydrilla import json_instances +from hydrilla.exceptions import HaketiloException + +sample_json_no_comments = '{\n"so/me":\n"//json//"\n}\n' + +@pytest.mark.parametrize('_in', [ + '{\n"so/me":\n"//json//"\n}\n', + '{//we\n"so/me"://will\n"//json//"//rock\n}//you\n' +]) +def test_strip_json_comments(_in): + """....""" + assert json_instances.strip_json_comments(_in) == sample_json_no_comments + +@pytest.mark.parametrize('_in, line, char', [ + ('/{\n"so/me":\n"//json//"\n}\n', 1, 1), + ('{\n"so/me":/\n"//json//"\n}/\n', 2, 9), + ('{\n"so/me":/ huehue, I am an invalid comment\n"//json//"\n}\n', 2, 9) +]) +def test_strip_json_comments_bad(_in, line, char): + """....""" + error_regex = f'^bad_json_comment_line_{line}_char_{char}$' + with pytest.raises(HaketiloException, match=error_regex): + json_instances.strip_json_comments(_in) + +@pytest.mark.parametrize('schema_name, full_schema_name', [ + ('package_source-1.0.1.schema.json', 'package_source-1.0.1.schema.json'), + ('package_source-1.0.schema.json', 'package_source-1.0.1.schema.json'), + ('package_source-1.schema.json', 'package_source-1.0.1.schema.json'), + ('package_source-2.schema.json', 'package_source-2.schema.json') +]) +def test_get_schema(schema_name, full_schema_name): + """....""" + url_prefix = 'https://hydrilla.koszko.org/schemas/' + + for prefix in ('', url_prefix): + schema1 = json_instances._get_schema(prefix + schema_name) + assert schema1['$id'] == url_prefix + full_schema_name + + schema2 = json_instances._get_schema(prefix + schema_name) + assert schema2 is schema1 + +@pytest.mark.parametrize('_in', ['dummy_uri', {'$id': 'dummy_uri'}]) +def test_validator_for(_in, monkeypatch): + """....""" + def mocked_get_schema(schema_id): + """....""" + assert schema_id == 'dummy_uri' + return {'$id': 'dummy_uri'} + + monkeypatch.setattr(json_instances, '_get_schema', mocked_get_schema) + + def MockedRefResolver(base_uri, referrer, handlers): + """....""" + assert base_uri == referrer['$id'] + assert referrer == {'$id': 'dummy_uri'} + assert handlers == {'https': mocked_get_schema} + return 'dummy_resolver' + + monkeypatch.setattr(json_instances, 'RefResolver', MockedRefResolver) + + def MockedDraft7Validator(schema, resolver): + """....""" + assert schema == {'$id': 'dummy_uri'} + assert resolver == 'dummy_resolver' + return 'dummy_validator' + + monkeypatch.setattr(json_instances, 'Draft7Validator', + MockedDraft7Validator) + + assert json_instances.validator_for(_in) == 'dummy_validator' + +def test_parse_instance(monkeypatch): + """....""" + def mocked_strip_json_comments(text): + """....""" + assert text == 'dummy_commented_json' + return '{"dummy": 1}' + + monkeypatch.setattr(json_instances, 'strip_json_comments', + mocked_strip_json_comments) + + assert json_instances.parse_instance('dummy_commented_json') == {'dummy': 1} + + +def test_read_instance(monkeypatch, tmpdir): + """....""" + def mocked_parse_instance(text): + """....""" + assert text == 'dummy_JSON_text' + return {'dummy': 1} + + monkeypatch.setattr(json_instances, 'parse_instance', mocked_parse_instance) + + somepath = tmpdir / 'somefile' + somepath.write_text('dummy_JSON_text') + + for instance_or_path in (somepath, str(somepath), {'dummy': 1}): + assert json_instances.read_instance(instance_or_path) == {'dummy': 1} + +def test_read_instance_bad(monkeypatch, tmpdir): + """....""" + monkeypatch.setattr(json_instances, 'parse_instance', lambda: 3 / 0) + + somepath = tmpdir / 'somefile' + somepath.write_text('dummy_JSON_text') + + error_regex = f'^text_in_{re.escape(str(somepath))}_not_valid_json$' + with pytest.raises(HaketiloException, match=error_regex): + json_instances.read_instance(somepath) + +@pytest.mark.parametrize('instance, ver_str', [ + ({'$schema': 'a_b_c-1.0.1.0.schema.json'}, '1.0.1.0'), + ({'$schema': '9-9-9-10.5.600.schema.json'}, '10.5.600'), + ({'$schema': 'https://ab.cd-2.schema.json'}, '2') +]) +def test_get_schema_version(instance, ver_str, monkeypatch): + """....""" + def mocked_parse_version(_ver_str): + """....""" + assert _ver_str == ver_str + return 'dummy_version' + + monkeypatch.setattr(json_instances, 'parse_version', mocked_parse_version) + + assert json_instances.get_schema_version(instance) == 'dummy_version' + +@pytest.mark.parametrize('instance', [ + {'$schema': 'https://ab.cd-0.schema.json'}, + {'$schema': 'https://ab.cd-02.schema.json'}, + {'$schema': 'https://ab.cd-2.00.schema.json'}, + {'$schema': 'https://ab.cd-2.01.schema.json'}, + {'$schema': 'https://ab.cd-2.schema.json5'}, + {'$schema': 'https://ab.cd-2.schema@json'}, + {'$schema': 'https://ab.cd_2.schema.json'}, + {'$schema': '2.schema.json'}, + {'$schema': 'https://ab.cd-.schema.json'}, + {'$schema': b'https://ab.cd-2.schema.json'}, + {}, + 'not dict' +]) +def test_get_schema_version_bad(instance): + """....""" + error_regex = '^no_schema_number_in_instance$' + with pytest.raises(HaketiloException, match=error_regex): + json_instances.get_schema_version(instance) + +def test_get_schema_major_number(monkeypatch): + """....""" + def mocked_get_schema_version(instance): + """....""" + assert instance == 'dummy_instance' + return (3, 4, 6) + + monkeypatch.setattr(json_instances, 'get_schema_version', + mocked_get_schema_version) + + assert json_instances.get_schema_major_number('dummy_instance') == 3 + +def test_validate_instance(monkeypatch): + """....""" + def mocked_get_schema_major_number(instance): + """....""" + assert instance == 'dummy_instance' + return 4 + + monkeypatch.setattr(json_instances, 'get_schema_major_number', + mocked_get_schema_major_number) + + class mocked_validator_for: + """....""" + def __init__(self, schema_name): + """....""" + assert schema_name == 'https://ab.cd/something-4.schema.json' + + def validate(self, instance): + """....""" + assert instance == 'dummy_instance' + + monkeypatch.setattr(json_instances, 'validator_for', mocked_validator_for) + + schema_name_fmt = 'https://ab.cd/something-{}.schema.json' + assert json_instances.validate_instance( + 'dummy_instance', + schema_name_fmt + ) == 4 diff --git a/tests/test_local_apt.py b/tests/test_local_apt.py new file mode 100644 index 0000000..9122408 --- /dev/null +++ b/tests/test_local_apt.py @@ -0,0 +1,754 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior +# +# Available under the terms of Creative Commons Zero v1.0 Universal. + +import pytest +import tempfile +import re +import json +from pathlib import Path, PurePosixPath +from zipfile import ZipFile +from tempfile import TemporaryDirectory + +from hydrilla.builder import local_apt +from hydrilla.builder.common_errors import * + +here = Path(__file__).resolve().parent + +from .helpers import * + +@pytest.fixture +def mock_cache_dir(monkeypatch): + """Make local_apt.py cache files to a temporary directory.""" + with tempfile.TemporaryDirectory() as td: + td_path = Path(td) + monkeypatch.setattr(local_apt, 'default_apt_cache_dir', td_path) + yield td_path + +@pytest.fixture +def mock_gnupg_import(monkeypatch, mock_cache_dir): + """Mock gnupg library when imported dynamically.""" + + gnupg_mock_dir = mock_cache_dir / 'gnupg_mock' + gnupg_mock_dir.mkdir() + (gnupg_mock_dir / 'gnupg.py').write_text('GPG = None\n') + + monkeypatch.syspath_prepend(str(gnupg_mock_dir)) + + import gnupg + + keyring_path = mock_cache_dir / 'master_keyring.gpg' + + class MockedImportResult: + """gnupg.ImportResult replacement""" + def __init__(self): + """Initialize MockedImportResult object.""" + self.imported = 1 + + class MockedGPG: + """GPG replacement that does not really invoke GPG.""" + def __init__(self, keyring): + """Verify the keyring path and initialize MockedGPG.""" + assert keyring == str(keyring_path) + + self.known_keys = {*keyring_path.read_text().split('\n')} \ + if keyring_path.exists() else set() + + def recv_keys(self, keyserver, key): + """Mock key receiving - record requested key as received.""" + assert keyserver == local_apt.default_keyserver + assert key not in self.known_keys + + self.known_keys.add(key) + keyring_path.write_text('\n'.join(self.known_keys)) + + return MockedImportResult() + + def list_keys(self, keys=None): + """Mock key listing - return a list with dummy items.""" + if keys is None: + return ['dummy'] * len(self.known_keys) + else: + return ['dummy' for k in keys if k in self.known_keys] + + def export_keys(self, keys, **kwargs): + """ + Mock key export - check that the call has the expected arguments and + return a dummy bytes array. + """ + assert kwargs['armor'] == False + assert kwargs['minimal'] == True + assert {*keys} == self.known_keys + + return b'' + + monkeypatch.setattr(gnupg, 'GPG', MockedGPG) + +def process_run_args(command, kwargs, expected_command): + """ + Perform assertions common to all mocked subprocess.run() invocations and + extract variable parts of the command line (if any). + """ + assert kwargs['env'] == {'LANG': 'en_US'} + assert kwargs['capture_output'] == True + + return process_command(command, expected_command) + +def run_apt_get_update(command, returncode=0, **kwargs): + """ + Instead of running an 'apt-get update' command just touch some file in apt + root to indicate that the call was made. + """ + expected = ['apt-get', '-c', '', 'update'] + conf_path = Path(process_run_args(command, kwargs, expected)['conf_path']) + + (conf_path.parent / 'update_called').touch() + + return MockedCompletedProcess(command, returncode, + text_output=kwargs.get('text')) + +""" +Output of 'apt-get install --yes --just-print libjs-mathjax' on some APT-based +system. +""" +sample_install_stdout = '''\ +NOTE: This is only a simulation! + apt-get needs root privileges for real execution. + Keep also in mind that locking is deactivated, + so don't depend on the relevance to the real current situation! +Reading package lists... +Building dependency tree... +Reading state information... +The following additional packages will be installed: + fonts-mathjax +Suggested packages: + fonts-mathjax-extras fonts-stix libjs-mathjax-doc +The following NEW packages will be installed: + fonts-mathjax libjs-mathjax +0 upgraded, 2 newly installed, 0 to remove and 0 not upgraded. +Inst fonts-mathjax (0:2.7.9+dfsg-1 Devuan:4.0/stable, Devuan:1.0.0/unstable [all]) +Inst libjs-mathjax (0:2.7.9+dfsg-1 Devuan:4.0/stable, Devuan:1.0.0/unstable [all]) +Conf fonts-mathjax (0:2.7.9+dfsg-1 Devuan:4.0/stable, Devuan:1.0.0/unstable [all]) +Conf libjs-mathjax (0:2.7.9+dfsg-1 Devuan:4.0/stable, Devuan:1.0.0/unstable [all]) +''' + +def run_apt_get_install(command, returncode=0, **kwargs): + """ + Instead of running an 'apt-get install' command just print a possible + output of one. + """ + expected = ['apt-get', '-c', '', 'install', + '--yes', '--just-print', 'libjs-mathjax'] + + conf_path = Path(process_run_args(command, kwargs, expected)['conf_path']) + + return MockedCompletedProcess(command, returncode, + stdout=sample_install_stdout, + text_output=kwargs.get('text')) + +def run_apt_get_download(command, returncode=0, **kwargs): + """ + Instead of running an 'apt-get download' command just write some dummy + .deb to the appropriate directory. + """ + expected = ['apt-get', '-c', '', 'download'] + if 'libjs-mathjax' in command: + expected.append('libjs-mathjax') + else: + expected.append('fonts-mathjax=0:2.7.9+dfsg-1') + expected.append('libjs-mathjax=0:2.7.9+dfsg-1') + + conf_path = Path(process_run_args(command, kwargs, expected)['conf_path']) + + destination = Path(kwargs.get('cwd') or Path.cwd()) + + package_name_regex = re.compile(r'^[^=]+-mathjax') + + for word in expected: + match = package_name_regex.match(word) + if match: + filename = f'{match.group(0)}_0%3a2.7.9+dfsg-1_all.deb' + deb_path = destination / filename + deb_path.write_text(f'dummy {deb_path.name}') + + return MockedCompletedProcess(command, returncode, + text_output=kwargs.get('text')) + +def run_apt_get_source(command, returncode=0, **kwargs): + """ + Instead of running an 'apt-get source' command just write some dummy + "tarballs" to the appropriate directory. + """ + expected = ['apt-get', '-c', '', 'source', + '--download-only', 'libjs-mathjax=0:2.7.9+dfsg-1'] + if 'fonts-mathjax=0:2.7.9+dfsg-1' in command: + if command[-1] == 'fonts-mathjax=0:2.7.9+dfsg-1': + expected.append('fonts-mathjax=0:2.7.9+dfsg-1') + else: + expected.insert(-1, 'fonts-mathjax=0:2.7.9+dfsg-1') + + destination = Path(kwargs.get('cwd') or Path.cwd()) + for filename in [ + 'mathjax_2.7.9+dfsg-1.debian.tar.xz', + 'mathjax_2.7.9+dfsg-1.dsc', + 'mathjax_2.7.9+dfsg.orig.tar.xz' + ]: + (destination / filename).write_text(f'dummy {filename}') + + return MockedCompletedProcess(command, returncode, + text_output=kwargs.get('text')) + +def make_run_apt_get(**returncodes): + """ + Produce a function that chooses and runs the appropriate one of + subprocess_run_apt_get_*() mock functions. + """ + def mock_run(command, **kwargs): + """ + Chooses and runs the appropriate one of subprocess_run_apt_get_*() mock + functions. + """ + for subcommand, run in [ + ('update', run_apt_get_update), + ('install', run_apt_get_install), + ('download', run_apt_get_download), + ('source', run_apt_get_source) + ]: + if subcommand in command: + returncode = returncodes.get(f'{subcommand}_code', 0) + return run(command, returncode, **kwargs) + + raise Exception('Unknown command: {}'.format(' '.join(command))) + + return mock_run + +@pytest.mark.subprocess_run(local_apt, make_run_apt_get()) +@pytest.mark.usefixtures('mock_subprocess_run', 'mock_gnupg_import') +def test_local_apt_contextmanager(mock_cache_dir): + """ + Verify that the local_apt() function creates a proper apt environment and + that it also properly restores it from cache. + """ + sources_list = local_apt.SourcesList(['deb-src sth', 'deb sth']) + + with local_apt.local_apt(sources_list, local_apt.default_keys) as apt: + apt_root = Path(apt.apt_conf).parent.parent + + assert (apt_root / 'etc' / 'trusted.gpg').read_bytes() == \ + b'' + + assert (apt_root / 'etc' / 'update_called').exists() + + assert (apt_root / 'etc' / 'apt.sources.list').read_text() == \ + 'deb-src sth\ndeb sth' + + conf_lines = (apt_root / 'etc' / 'apt.conf').read_text().split('\n') + + # check mocked keyring + assert {*local_apt.default_keys} == \ + {*(mock_cache_dir / 'master_keyring.gpg').read_text().split('\n')} + + assert not apt_root.exists() + + expected_conf = { + 'Architecture': 'amd64', + 'Dir': str(apt_root), + 'Dir::State': f'{apt_root}/var/lib/apt', + 'Dir::State::status': f'{apt_root}/var/lib/dpkg/status', + 'Dir::Etc::SourceList': f'{apt_root}/etc/apt.sources.list', + 'Dir::Etc::SourceParts': '', + 'Dir::Cache': f'{apt_root}/var/cache/apt', + 'pkgCacheGen::Essential': 'none', + 'Dir::Etc::Trusted': f'{apt_root}/etc/trusted.gpg', + } + + conf_regex = re.compile(r'^(?P\S+)\s"(?P\S*)";$') + assert dict([(m.group('key'), m.group('val')) + for l in conf_lines if l for m in [conf_regex.match(l)]]) == \ + expected_conf + + with ZipFile(mock_cache_dir / f'apt_{sources_list.identity()}.zip') as zf: + # reuse the same APT, its cached zip file should exist now + with local_apt.local_apt(sources_list, local_apt.default_keys) as apt: + apt_root = Path(apt.apt_conf).parent.parent + + expected_members = {*apt_root.rglob('*')} + expected_members.remove(apt_root / 'etc' / 'apt.conf') + expected_members.remove(apt_root / 'etc' / 'trusted.gpg') + + names = zf.namelist() + assert len(names) == len(expected_members) + + for name in names: + path = apt_root / name + assert path in expected_members + assert zf.read(name) == \ + (b'' if path.is_dir() else path.read_bytes()) + + assert not apt_root.exists() + +@pytest.mark.subprocess_run(local_apt, run_missing_executable) +@pytest.mark.usefixtures('mock_subprocess_run', 'mock_gnupg_import') +def test_local_apt_missing(mock_cache_dir): + """ + Verify that the local_apt() function raises a proper error when 'apt-get' + command is missing. + """ + sources_list = local_apt.SourcesList(['deb-src sth', 'deb sth']) + + with pytest.raises(local_apt.AptError, + match='^couldnt_execute_apt-get_is_it_installed$'): + with local_apt.local_apt(sources_list, local_apt.default_keys) as apt: + pass + +@pytest.mark.subprocess_run(local_apt, make_run_apt_get(update_code=1)) +@pytest.mark.usefixtures('mock_subprocess_run', 'mock_gnupg_import') +def test_local_apt_update_fail(mock_cache_dir): + """ + Verify that the local_apt() function raises a proper error when + 'apt-get update' command returns non-0. + """ + sources_list = local_apt.SourcesList(['deb-src sth', 'deb sth']) + + error_regex = """^\ +command_apt-get -c \\S+ update_failed + +STDOUT_OUTPUT_heading + +some output + +STDERR_OUTPUT_heading + +some error output\ +$\ +""" + + with pytest.raises(local_apt.AptError, match=error_regex): + with local_apt.local_apt(sources_list, local_apt.default_keys) as apt: + pass + +@pytest.mark.subprocess_run(local_apt, make_run_apt_get()) +@pytest.mark.usefixtures('mock_subprocess_run', 'mock_gnupg_import') +def test_local_apt_download(mock_cache_dir): + """ + Verify that download_apt_packages() function properly performs the download + of .debs and sources. + """ + sources_list = local_apt.SourcesList(['deb-src sth', 'deb sth']) + destination = mock_cache_dir / 'destination' + destination.mkdir() + + local_apt.download_apt_packages(sources_list, local_apt.default_keys, + ['libjs-mathjax'], destination, False) + + libjs_mathjax_path = destination / 'libjs-mathjax_0%3a2.7.9+dfsg-1_all.deb' + fonts_mathjax_path = destination / 'fonts-mathjax_0%3a2.7.9+dfsg-1_all.deb' + + source_paths = [ + destination / 'mathjax_2.7.9+dfsg-1.debian.tar.xz', + destination / 'mathjax_2.7.9+dfsg-1.dsc', + destination / 'mathjax_2.7.9+dfsg.orig.tar.xz' + ] + + assert {*destination.iterdir()} == {libjs_mathjax_path, *source_paths} + + local_apt.download_apt_packages(sources_list, local_apt.default_keys, + ['libjs-mathjax'], destination, + with_deps=True) + + assert {*destination.iterdir()} == \ + {libjs_mathjax_path, fonts_mathjax_path, *source_paths} + +@pytest.mark.subprocess_run(local_apt, make_run_apt_get(install_code=1)) +@pytest.mark.usefixtures('mock_subprocess_run', 'mock_gnupg_import') +def test_local_apt_install_fail(mock_cache_dir): + """ + Verify that the download_apt_packages() function raises a proper error when + 'apt-get install' command returns non-0. + """ + sources_list = local_apt.SourcesList(['deb-src sth', 'deb sth']) + destination = mock_cache_dir / 'destination' + destination.mkdir() + + error_regex = f"""^\ +command_apt-get -c \\S+ install --yes --just-print libjs-mathjax_failed + +STDOUT_OUTPUT_heading + +{re.escape(sample_install_stdout)} + +STDERR_OUTPUT_heading + +some error output\ +$\ +""" + + with pytest.raises(local_apt.AptError, match=error_regex): + local_apt.download_apt_packages(sources_list, local_apt.default_keys, + ['libjs-mathjax'], destination, + with_deps=True) + + assert [*destination.iterdir()] == [] + +@pytest.mark.subprocess_run(local_apt, make_run_apt_get(download_code=1)) +@pytest.mark.usefixtures('mock_subprocess_run', 'mock_gnupg_import') +def test_local_apt_download_fail(mock_cache_dir): + """ + Verify that the download_apt_packages() function raises a proper error when + 'apt-get download' command returns non-0. + """ + sources_list = local_apt.SourcesList(['deb-src sth', 'deb sth']) + destination = mock_cache_dir / 'destination' + destination.mkdir() + + error_regex = """^\ +command_apt-get -c \\S+ download libjs-mathjax_failed + +STDOUT_OUTPUT_heading + +some output + +STDERR_OUTPUT_heading + +some error output\ +$\ +""" + + with pytest.raises(local_apt.AptError, match=error_regex): + local_apt.download_apt_packages(sources_list, local_apt.default_keys, + ['libjs-mathjax'], destination, False) + + assert [*destination.iterdir()] == [] + +@pytest.fixture +def mock_bad_deb_file(monkeypatch, mock_subprocess_run): + """ + Make mocked 'apt-get download' command produce an incorrectly-named file. + """ + old_run = local_apt.subprocess.run + + def twice_mocked_run(command, **kwargs): + """ + Create an evil file if needed; then act just like the run() function + that got replaced by this one. + """ + if 'download' in command: + destination = Path(kwargs.get('cwd') or Path.cwd()) + (destination / 'arbitrary-name').write_text('anything') + + return old_run(command, **kwargs) + + monkeypatch.setattr(local_apt.subprocess, 'run', twice_mocked_run) + +@pytest.mark.subprocess_run(local_apt, make_run_apt_get()) +@pytest.mark.usefixtures('mock_subprocess_run', 'mock_gnupg_import', + 'mock_bad_deb_file') +def test_local_apt_download_bad_filename(mock_cache_dir): + """ + Verify that the download_apt_packages() function raises a proper error when + 'apt-get download' command produces an incorrectly-named file. + """ + sources_list = local_apt.SourcesList([], 'nabia') + destination = mock_cache_dir / 'destination' + destination.mkdir() + + error_regex = """^\ +apt_download_gave_bad_filename_arbitrary-name + +STDOUT_OUTPUT_heading + +some output + +STDERR_OUTPUT_heading + +some error output\ +$\ +""" + + with pytest.raises(local_apt.AptError, match=error_regex): + local_apt.download_apt_packages(sources_list, local_apt.default_keys, + ['libjs-mathjax'], destination, False) + + assert [*destination.iterdir()] == [] + +@pytest.mark.subprocess_run(local_apt, make_run_apt_get(source_code=1)) +@pytest.mark.usefixtures('mock_subprocess_run', 'mock_gnupg_import') +def test_local_apt_source_fail(mock_cache_dir): + """ + Verify that the download_apt_packages() function raises a proper error when + 'apt-get source' command returns non-0. + """ + sources_list = local_apt.SourcesList(['deb-src sth', 'deb sth']) + destination = mock_cache_dir / 'destination' + destination.mkdir() + + error_regex = """^\ +command_apt-get -c \\S* source --download-only \\S+_failed + +STDOUT_OUTPUT_heading + +some output + +STDERR_OUTPUT_heading + +some error output\ +$\ +""" + + with pytest.raises(local_apt.AptError, match=error_regex): + local_apt.download_apt_packages(sources_list, local_apt.default_keys, + ['libjs-mathjax'], destination, False) + + assert [*destination.iterdir()] == [] + +def test_sources_list(): + """Verify that the SourcesList class works properly.""" + list = local_apt.SourcesList([], 'nabia') + assert list.identity() == 'nabia' + + with pytest.raises(local_apt.DistroError, match='^distro_nabiał_unknown$'): + local_apt.SourcesList([], 'nabiał') + + list = local_apt.SourcesList(['deb sth', 'deb-src sth'], 'nabia') + assert list.identity() == \ + 'ef28d408b96046eae45c8ab3094ce69b2ac0c02a887e796b1d3d1a4f06fb49f1' + +def run_dpkg_deb(command, returncode=0, **kwargs): + """ + Insted of running an 'dpkg-deb -x' command just create some dummy file + in the destination directory. + """ + expected = ['dpkg-deb', '-x', '', ''] + + variables = process_run_args(command, kwargs, expected) + deb_path = Path(variables['deb_path']) + dst_path = Path(variables['dst_path']) + + package_name = re.match('^([^_]+)_.*', deb_path.name).group(1) + for path in [ + dst_path / 'etc' / f'dummy_{package_name}_config', + dst_path / 'usr/share/doc' / package_name / 'copyright' + ]: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(f'dummy {path.name}') + + return MockedCompletedProcess(command, returncode, + text_output=kwargs.get('text')) + +def download_apt_packages(list, keys, packages, destination_dir, + with_deps=False): + """ + Replacement for download_apt_packages() function in local_apt.py, for + unit-testing the piggybacked_system() function. + """ + for path in [ + destination_dir / 'some-bin-package_1.1-2_all.deb', + destination_dir / 'another-package_1.1-2_all.deb', + destination_dir / 'some-source-package_1.1.orig.tar.gz', + destination_dir / 'some-source-package_1.1-1.dsc' + ]: + path.write_text(f'dummy {path.name}') + + with open(destination_dir / 'test_data.json', 'w') as out: + json.dump({ + 'list_identity': list.identity(), + 'keys': keys, + 'packages': packages, + 'with_deps': with_deps + }, out) + +@pytest.fixture +def mock_download_packages(monkeypatch): + """Mock the download_apt_packages() function in local_apt.py.""" + monkeypatch.setattr(local_apt, 'download_apt_packages', + download_apt_packages) + +@pytest.mark.subprocess_run(local_apt, run_dpkg_deb) +@pytest.mark.parametrize('params', [ + { + 'with_deps': False, + 'base_depends': True, + 'identity': 'nabia', + 'props': {'distribution': 'nabia', 'dependencies': False}, + 'all_keys': local_apt.default_keys, + 'prepared_directory': False + }, + { + 'with_deps': True, + 'base_depends': False, + 'identity': '38db0b4fa2f6610cd1398b66a2c05d9abb1285f9a055a96eb96dee0f6b72aca8', + 'props': { + 'sources_list': [f'deb{suf} http://example.com/ stable main' + for suf in ('', '-src')], + 'trusted_keys': ['AB' * 20], + 'dependencies': True, + 'depend_on_base_packages': False + }, + 'all_keys': [*local_apt.default_keys, 'AB' * 20], + 'prepared_directory': True + } +]) +@pytest.mark.usefixtures('mock_download_packages', 'mock_subprocess_run') +def test_piggybacked_system_download(params, tmpdir): + """ + Verify that the piggybacked_system() function properly downloads and unpacks + APT packages. + """ + foreign_packages_dir = tmpdir if params['prepared_directory'] else None + + with local_apt.piggybacked_system({ + 'system': 'apt', + **params['props'], + 'packages': ['some-bin-package', 'another-package=1.1-2'] + }, foreign_packages_dir) as piggybacked: + expected_depends = [{'identifier': 'apt-common-licenses'}] \ + if params['base_depends'] else [] + assert piggybacked.resource_must_depend == expected_depends + + archive_files = dict(piggybacked.archive_files()) + + archive_names = [ + 'some-bin-package_1.1-2_all.deb', + 'another-package_1.1-2_all.deb', + 'some-source-package_1.1.orig.tar.gz', + 'some-source-package_1.1-1.dsc', + 'test_data.json' + ] + assert {*archive_files.keys()} == \ + {PurePosixPath('apt') / n for n in archive_names} + + for path in archive_files.values(): + if path.name == 'test_data.json': + assert json.loads(path.read_text()) == { + 'list_identity': params['identity'], + 'keys': params['all_keys'], + 'packages': ['some-bin-package', 'another-package=1.1-2'], + 'with_deps': params['with_deps'] + } + else: + assert path.read_text() == f'dummy {path.name}' + + if foreign_packages_dir is not None: + assert path.parent == foreign_packages_dir / 'apt' + + license_files = {*piggybacked.package_license_files} + + assert license_files == { + PurePosixPath('.apt-root/usr/share/doc/another-package/copyright'), + PurePosixPath('.apt-root/usr/share/doc/some-bin-package/copyright') + } + + assert ['dummy copyright'] * 2 == \ + [piggybacked.resolve_file(p).read_text() for p in license_files] + + for name in ['some-bin-package', 'another-package']: + path = PurePosixPath(f'.apt-root/etc/dummy_{name}_config') + assert piggybacked.resolve_file(path).read_text() == \ + f'dummy {path.name}' + + assert piggybacked.resolve_file(PurePosixPath('a/b/c')) == None + assert piggybacked.resolve_file(PurePosixPath('')) == None + + output_text = 'loading_.apt-root/a/../../../b_outside_piggybacked_dir' + with pytest.raises(FileReferenceError, + match=f'^{re.escape(output_text)}$'): + piggybacked.resolve_file(PurePosixPath('.apt-root/a/../../../b')) + + root = piggybacked.resolve_file(PurePosixPath('.apt-root/dummy')).parent + assert root.is_dir() + + assert not root.exists() + + if foreign_packages_dir: + assert [*tmpdir.iterdir()] == [tmpdir / 'apt'] + +@pytest.mark.subprocess_run(local_apt, run_dpkg_deb) +@pytest.mark.usefixtures('mock_subprocess_run') +def test_piggybacked_system_no_download(): + """ + Verify that the piggybacked_system() function is able to use pre-downloaded + APT packages. + """ + archive_names = { + f'{package}{rest}' + for package in ('some-lib_1:2.3', 'other-lib_4.45.2') + for rest in ('-1_all.deb', '.orig.tar.gz', '-1.debian.tar.xz', '-1.dsc') + } + + with TemporaryDirectory() as td: + td = Path(td) + (td / 'apt').mkdir() + for name in archive_names: + (td / 'apt' / name).write_text(f'dummy {name}') + + with local_apt.piggybacked_system({ + 'system': 'apt', + 'distribution': 'nabia', + 'dependencies': True, + 'packages': ['whatever', 'whatever2'] + }, td) as piggybacked: + archive_files = dict(piggybacked.archive_files()) + + assert {*archive_files.keys()} == \ + {PurePosixPath('apt') / name for name in archive_names} + + for path in archive_files.values(): + assert path.read_text() == f'dummy {path.name}' + + assert {*piggybacked.package_license_files} == { + PurePosixPath('.apt-root/usr/share/doc/some-lib/copyright'), + PurePosixPath('.apt-root/usr/share/doc/other-lib/copyright') + } + + for name in ['some-lib', 'other-lib']: + path = PurePosixPath(f'.apt-root/etc/dummy_{name}_config') + assert piggybacked.resolve_file(path).read_text() == \ + f'dummy {path.name}' + +@pytest.mark.subprocess_run(local_apt, run_missing_executable) +@pytest.mark.usefixtures('mock_download_packages', 'mock_subprocess_run') +def test_piggybacked_system_missing(): + """ + Verify that the piggybacked_system() function raises a proper error when + 'dpkg-deb' is missing. + """ + with pytest.raises(local_apt.AptError, + match='^couldnt_execute_dpkg-deb_is_it_installed$'): + with local_apt.piggybacked_system({ + 'system': 'apt', + 'distribution': 'nabia', + 'packages': ['some-package'], + 'dependencies': False + }, None) as piggybacked: + pass + +@pytest.mark.subprocess_run(local_apt, lambda c, **kw: run_dpkg_deb(c, 1, **kw)) +@pytest.mark.usefixtures('mock_download_packages', 'mock_subprocess_run') +def test_piggybacked_system_fail(): + """ + Verify that the piggybacked_system() function raises a proper error when + 'dpkg-deb -x' command returns non-0. + """ + error_regex = """^\ +command_dpkg-deb -x \\S+\\.deb \\S+_failed + +STDOUT_OUTPUT_heading + +some output + +STDERR_OUTPUT_heading + +some error output\ +$\ +""" + + with pytest.raises(local_apt.AptError, match=error_regex): + with local_apt.piggybacked_system({ + 'system': 'apt', + 'distribution': 'nabia', + 'packages': ['some-package'], + 'dependencies': False + }, None) as piggybacked: + pass diff --git a/tests/test_pattern_tree.py b/tests/test_pattern_tree.py new file mode 100644 index 0000000..4238d66 --- /dev/null +++ b/tests/test_pattern_tree.py @@ -0,0 +1,454 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior +# +# Available under the terms of Creative Commons Zero v1.0 Universal. + +import pytest +import re +import dataclasses as dc + +from immutables import Map + +from hydrilla import pattern_tree + +from .url_patterns_common import * + +@pytest.mark.parametrize('_in, out', [ + (Map(), True), + ({'children': Map(non_empty='non_emtpy')}, False), + ({'literal_match': 'non-None'}, False), + ({'children': Map(non_empty='non_emtpy')}, False), + ({'literal_match': 'non-None', 'children': 'non-empty'}, False) +]) +def test_pattern_tree_node_is_empty(_in, out): + """....""" + assert pattern_tree.PatternTreeNode(**_in).is_empty() == out + +def test_pattern_tree_node_update_literal_match(): + """....""" + node1 = pattern_tree.PatternTreeNode() + node2 = node1.update_literal_match('dummy match item') + + assert node1.literal_match is None + assert node2.literal_match == 'dummy match item' + +def test_pattern_tree_node_get_child(): + """....""" + node = pattern_tree.PatternTreeNode(children=Map(dummy_key='dummy_val')) + + assert node.get_child('dummy_key') == 'dummy_val' + assert node.get_child('other_key') is None + +def test_pattern_tree_node_remove_child(): + """....""" + node1 = pattern_tree.PatternTreeNode(children=Map(dummy_key='dummy_val')) + node2 = node1.remove_child('dummy_key') + + assert node1.children == Map(dummy_key='dummy_val') + assert node2.children == Map() + +def test_pattern_tree_node_set_child(): + """....""" + node1 = pattern_tree.PatternTreeNode(children=Map(dummy_key='dummy_val')) + node2 = node1.set_child('other_key', 'other_val') + + assert node1.children == Map(dummy_key='dummy_val') + assert node2.children == Map(dummy_key='dummy_val', other_key='other_val') + +@pytest.mark.parametrize('root_empty', [True, False]) +def test_pattern_tree_branch_is_empty(root_empty): + """....""" + class DummyEmptyRoot: + """....""" + is_empty = lambda: root_empty + + branch = pattern_tree.PatternTreeBranch(root_node=DummyEmptyRoot) + assert branch.is_empty() == root_empty + +# def test_pattern_tree_branch_copy(): +# """....""" +# class DummyRoot: +# """....""" +# pass + +# branch1 = pattern_tree.PatternTreeBranch(root_node=DummyRoot) +# branch2 = branch1.copy() + +# assert branch1 is not branch2 +# for val_b1, val_b2 in zip(dc.astuple(branch1), dc.astuple(branch2)): +# assert val_b1 is val_b2 + +@pytest.fixture +def empty_branch(): + """....""" + return pattern_tree.PatternTreeBranch( + root_node = pattern_tree.PatternTreeNode() + ) + +@pytest.fixture +def branch_with_a_b(): + """....""" + return pattern_tree.PatternTreeBranch( + root_node = pattern_tree.PatternTreeNode( + children = Map( + a = pattern_tree.PatternTreeNode( + children = Map( + b = pattern_tree.PatternTreeNode( + literal_match = frozenset({'myitem'}) + ) + ) + ) + ) + ) + ) + +def test_pattern_tree_branch_update_add_first(empty_branch, branch_with_a_b): + """....""" + updated_branch = empty_branch.update( + ['a', 'b'], + lambda s: frozenset({*(s or []), 'myitem'}) + ) + + assert updated_branch == branch_with_a_b + assert empty_branch.root_node.children == Map() + +def test_pattern_tree_branch_update_add_second(branch_with_a_b): + """....""" + updated_branch = branch_with_a_b.update( + ['a', 'b'], + lambda s: frozenset({*(s or []), 'myotheritem'}) + ) + + leaf_node = updated_branch.root_node.children['a'].children['b'] + assert leaf_node.literal_match == frozenset({'myitem', 'myotheritem'}) + +def test_pattern_tree_branch_update_add_different_path(branch_with_a_b): + """....""" + updated_branch = branch_with_a_b.update( + ['a', 'not_b'], + lambda s: frozenset({*(s or []), 'myotheritem'}) + ) + + for segment, item in [('b', 'myitem'), ('not_b', 'myotheritem')]: + leaf_node = updated_branch.root_node.children['a'].children[segment] + assert leaf_node.literal_match == frozenset({item}) + +# def test_pattern_tree_branch_update_is_value_copied(branch_with_a_b): +# """....""" +# updated_branch = branch_with_a_b.update(['a', 'b'], lambda s: s) + +# leaf_node_orig = updated_branch.root_node.children['a'].children['b'] +# leaf_node_new = branch_with_a_b.root_node.children['a'].children['b'] + +# assert leaf_node_orig.literal_match == leaf_node_new.literal_match +# assert leaf_node_orig.literal_match is not leaf_node_new.literal_match + +def test_pattern_tree_branch_remove(branch_with_a_b, empty_branch): + """....""" + updated_branch = branch_with_a_b.update(['a', 'b'], lambda s: None) + + assert updated_branch == empty_branch + +def test_pattern_tree_branch_search_empty(empty_branch): + """....""" + assert [*empty_branch.search(['a', 'b'])] == [] + +@pytest.fixture +def branch_with_wildcards(): + """....""" + return pattern_tree.PatternTreeBranch( + root_node = pattern_tree.PatternTreeNode( + children = Map( + a = pattern_tree.PatternTreeNode( + children = Map( + b = pattern_tree.PatternTreeNode( + children = Map({ + 'c': pattern_tree.PatternTreeNode( + literal_match = 'dummy/c' + ), + '*': pattern_tree.PatternTreeNode( + literal_match = 'dummy/*' + ), + '**': pattern_tree.PatternTreeNode( + literal_match = 'dummy/**' + ), + '***': pattern_tree.PatternTreeNode( + literal_match = 'dummy/***' + ) + }) + ) + ) + ) + ) + ) + ) + +@pytest.mark.parametrize('_in, out', [ + (['a'], []), + (['a', 'x', 'y', 'z'], []), + (['a', 'b'], ['dummy/***']), + (['a', 'b', 'c'], ['dummy/c', 'dummy/*', 'dummy/***']), + (['a', 'b', 'u'], ['dummy/*', 'dummy/***']), + (['a', 'b', '*'], ['dummy/*', 'dummy/***']), + (['a', 'b', '**'], ['dummy/**', 'dummy/*', 'dummy/***']), + (['a', 'b', '***'], ['dummy/***', 'dummy/*']), + (['a', 'b', 'u', 'l'], ['dummy/**', 'dummy/***']), + (['a', 'b', 'u', 'l', 'y'], ['dummy/**', 'dummy/***']) +]) +def test_pattern_tree_branch_search_wildcards(_in, out, branch_with_wildcards): + """....""" + assert [*branch_with_wildcards.search(_in)] == out + +def test_filter_by_trailing_slash(sample_url_parsed): + """....""" + sample_url_parsed2 = dc.replace(sample_url_parsed, has_trailing_slash=True) + item1 = pattern_tree.StoredTreeItem(sample_url_parsed, 'dummy_it1') + item2 = pattern_tree.StoredTreeItem(sample_url_parsed2, 'dummy_it2') + + assert pattern_tree.filter_by_trailing_slash((item1, item2), False) == \ + frozenset({item1}) + + assert pattern_tree.filter_by_trailing_slash((item1, item2), True) == \ + frozenset({item2}) + +@pytest.mark.parametrize('register_mode', [True, False]) +@pytest.mark.parametrize('empty_at_start', [True, False]) +@pytest.mark.parametrize('empty_at_end', [True, False]) +def test_pattern_tree_privatemethod_register( + register_mode, + empty_at_start, + empty_at_end, + monkeypatch, + sample_url_parsed +): + """....""" + dummy_it = pattern_tree.StoredTreeItem(sample_url_parsed, 'dummy_it') + other_dummy_it = pattern_tree.StoredTreeItem( + sample_url_parsed, + 'other_dummy_it' + ) + + class MockedTreeBranch: + """....""" + def is_empty(self): + """....""" + return empty_at_end + + def update(self, segments, item_updater): + """....""" + if segments == ('com', 'example'): + return self._update_as_domain_branch(item_updater) + else: + assert segments == ('aa', 'bb') + return self._update_as_path_branch(item_updater) + + def _update_as_domain_branch(self, item_updater): + """....""" + for updater_input in (None, MockedTreeBranch()): + updated = item_updater(updater_input) + if empty_at_end: + assert updated is None + else: + assert type(updated) is MockedTreeBranch + + return MockedTreeBranch() + + def _update_as_path_branch(self, item_updater): + """....""" + set_with_1_item = frozenset() + set_with_2_items = frozenset({dummy_it, other_dummy_it}) + for updater_input in (None, set_with_1_item, set_with_2_items): + updated = item_updater(updater_input) + if register_mode: + assert dummy_it in updated + elif updater_input is set_with_2_items: + assert dummy_it not in updated + else: + assert updated is None + + return MockedTreeBranch() + + monkeypatch.setattr(pattern_tree, 'PatternTreeBranch', MockedTreeBranch) + + initial_root = Map() if empty_at_start else \ + Map({('http', 80): MockedTreeBranch()}) + + tree = pattern_tree.PatternTree(_by_scheme_and_port=initial_root) + + new_tree = tree._register( + sample_url_parsed, + 'dummy_it', + register=register_mode + ) + + assert new_tree is not tree + + if empty_at_end: + assert new_tree._by_scheme_and_port == Map() + else: + assert len(new_tree._by_scheme_and_port) == 1 + assert type(new_tree._by_scheme_and_port[('http', 80)]) is \ + MockedTreeBranch + +# @pytest.mark.parametrize('register_mode', [True, False]) +# def test_pattern_tree_privatemethod_register( +# register_mode, +# monkeypatch, +# sample_url_parsed +# ): +# """....""" +# registered_count = 0 + +# def mocked_parse_pattern(url_pattern): +# """....""" +# assert url_pattern == 'dummy_pattern' + +# for _ in range(2): +# yield sample_url_parsed + +# monkeypatch.setattr(pattern_tree, 'parse_pattern', mocked_parse_pattern) + +# def mocked_reconstruct_url(self): +# """....""" +# return 'dummy_reconstructed_pattern' + +# monkeypatch.setattr(pattern_tree.ParsedUrl, 'reconstruct_url', +# mocked_reconstruct_url) + +# def mocked_register_with_parsed_pattern( +# self, +# parsed_pat, +# wrapped_item, +# register=True +# ): +# """....""" +# nonlocal registered_count + +# assert parsed_pat is sample_url_parsed +# assert wrapped_item.pattern == 'dummy_reconstructed_pattern' +# assert register == register_mode + +# registered_count += 1 + +# return 'dummy_new_tree' if registered_count == 2 else dc.replace(self) + +# monkeypatch.setattr( +# pattern_tree.PatternTree, +# '_register_with_parsed_pattern', +# mocked_register_with_parsed_pattern +# ) + +# pattern_tree = pattern_tree.PatternTree() + +# new_tree = pattern_tree._register( +# 'dummy_pattern', +# 'dummy_item', +# register_mode +# ) + +# assert new_tree == 'dummy_new_tree' + +@pytest.mark.parametrize('method_name, register_mode', [ + ('register', True), + ('deregister', False) +]) +def test_pattern_tree_register(method_name, register_mode, monkeypatch): + """....""" + def mocked_privatemethod_register(self, parsed_pat, item, register=True): + """....""" + assert (parsed_pat, item, register) == \ + ('dummy_pattern', 'dummy_url', register_mode) + + return 'dummy_new_tree' + + monkeypatch.setattr( + pattern_tree.PatternTree, + '_register', + mocked_privatemethod_register + ) + + method = getattr(pattern_tree.PatternTree(), method_name) + assert method('dummy_pattern', 'dummy_url') == 'dummy_new_tree' + +@pytest.fixture +def mock_parse_url(monkeypatch, sample_url_parsed): + """....""" + def mocked_parse_url(url): + """....""" + assert url == 'dummy_url' + return dc.replace( + sample_url_parsed, + **getattr(mocked_parse_url, 'url_mod', {}) + ) + + monkeypatch.setattr(pattern_tree, 'parse_url', mocked_parse_url) + + return mocked_parse_url + +@pytest.mark.usefixtures('mock_parse_url') +def test_pattern_tree_search_empty(sample_url_parsed): + """....""" + for url in ('dummy_url', sample_url_parsed): + assert [*pattern_tree.PatternTree().search(url)] == [] + +@pytest.mark.parametrize('url_mod, out', [ + ({}, + ['dummy_set_A', 'dummy_set_B', 'dummy_set_C']), + + ({'has_trailing_slash': True}, + ['dummy_set_A_with_slash', 'dummy_set_A', + 'dummy_set_B_with_slash', 'dummy_set_B', + 'dummy_set_C_with_slash', 'dummy_set_C']) +]) +def test_pattern_tree_search( + url_mod, + out, + monkeypatch, + sample_url_parsed, + mock_parse_url, +): + """....""" + mock_parse_url.url_mod = url_mod + + dummy_tree_contents = [ + ['dummy_set_A', 'dummy_set_B'], + [], + ['dummy_empty_set'] * 3, + ['dummy_set_C'] + ] + + def mocked_filter_by_trailing_slash(items, with_slash): + """....""" + if items == 'dummy_empty_set': + return frozenset() + + return items + ('_with_slash' if with_slash else '') + + monkeypatch.setattr(pattern_tree, 'filter_by_trailing_slash', + mocked_filter_by_trailing_slash) + + class MockedDomainBranch: + """....""" + def search(self, labels): + """....""" + assert labels == sample_url_parsed.domain_labels + + for item_sets in dummy_tree_contents: + class MockedPathBranch: + """....""" + def search(self, segments, item_sets=item_sets): + """....""" + assert segments == sample_url_parsed.path_segments + + for dummy_items_set in item_sets: + yield dummy_items_set + + yield MockedPathBranch() + + tree = pattern_tree.PatternTree( + _by_scheme_and_port = {('http', 80): MockedDomainBranch()} + ) + + for url in ('dummy_url', mock_parse_url('dummy_url')): + assert [*tree.search(url)] == out diff --git a/tests/test_server.py b/tests/test_server.py index 02b9742..854b5f0 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -24,9 +24,6 @@ # file's license. Although I request that you do not make use this code # in a proprietary program, I am not going to enforce this in court. -# Enable using with Python 3.7. -from __future__ import annotations - import pytest import sys import shutil @@ -42,9 +39,9 @@ from flask.testing import FlaskClient from markupsafe import escape from werkzeug import Response -from hydrilla import util as hydrilla_util +from hydrilla import _version, json_instances from hydrilla.builder import Build -from hydrilla.server import config, _version +from hydrilla.server import config from hydrilla.server.serve import HydrillaApp here = Path(__file__).resolve().parent @@ -125,7 +122,7 @@ def index_json_modification(modify_index_json): def handle_index_json(setup): """Modify index.json before build.""" index_path = setup.source_dir / 'index.json' - index_json, _ = hydrilla_util.load_instance_from_file(index_path) + index_json, _ = json_instances.read_instance(index_path) index_json = modify_index_json(index_json) or index_json @@ -193,8 +190,8 @@ def test_get_newest(setup: Setup, item_type: str) -> None: assert ('uuid' in definition) == (setup is not uuidless_setup) - hydrilla_util.validator_for(f'api_{item_type}_description-1.0.1.schema.json')\ - .validate(definition) + schema_name = f'api_{item_type}_description-1.0.1.schema.json' + json_instances.validator_for(schema_name).validate(definition) @pytest.mark.parametrize('item_type', ['resource', 'mapping']) def test_get_nonexistent(item_type: str) -> None: @@ -241,8 +238,8 @@ def test_empty_query() -> None: 'generated_by': expected_generated_by } - hydrilla_util.validator_for('api_query_result-1.0.1.schema.json')\ - .validate(response_object) + schema_name = 'api_query_result-1.0.1.schema.json' + json_instances.validator_for(schema_name).validate(response_object) def test_query() -> None: """ @@ -264,8 +261,8 @@ def test_query() -> None: 'generated_by': expected_generated_by } - hydrilla_util.validator_for('api_query_result-1.schema.json')\ - .validate(response_object) + schema_name = 'api_query_result-1.schema.json' + json_instances.validator_for(schema_name).validate(response_object) def test_source() -> None: """Verify source descriptions are properly served.""" @@ -282,8 +279,8 @@ def test_source() -> None: response = def_get(f'/source/hello.zip') assert sha256(response.data).digest().hex() == zipfile_hash - hydrilla_util.validator_for('api_source_description-1.schema.json')\ - .validate(description) + schema_name = 'api_source_description-1.schema.json' + json_instances.validator_for(schema_name).validate(description) def test_missing_source() -> None: """Verify requests for nonexistent sources result in 404.""" @@ -292,8 +289,3 @@ def test_missing_source() -> None: response = def_get(f'/source/nonexistent.zip') assert response.status_code == 404 - -def test_normalize_version(): - assert hydrilla_util.normalize_version([4, 5, 3, 0, 0]) == [4, 5, 3] - assert hydrilla_util.normalize_version([1, 0, 5, 0]) == [1, 0, 5] - assert hydrilla_util.normalize_version([3, 3]) == [3, 3] diff --git a/tests/test_url_patterns.py b/tests/test_url_patterns.py new file mode 100644 index 0000000..c308f18 --- /dev/null +++ b/tests/test_url_patterns.py @@ -0,0 +1,188 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior +# +# Available under the terms of Creative Commons Zero v1.0 Universal. + +import pytest +import re +import dataclasses as dc + +from immutables import Map + +from hydrilla import url_patterns +from hydrilla.exceptions import HaketiloException + +from .url_patterns_common import * + +# @pytest.mark.parametrize('_in, out', [ +# ({}, sample_url_str), +# ({'path_segments': ()}, 'http://example.com'), +# ({'has_trailing_slash': True}, 'http://example.com/aa/bb/'), +# ({'orig_scheme': 'http*'}, 'http*://example.com/aa/bb'), +# ({'scheme': 'http_sth'}, 'http://example.com/aa/bb'), +# ({'port': 443}, 'http://example.com:443/aa/bb'), + +# ({'path_segments': (), +# 'has_trailing_slash': True}, +# 'http://example.com/'), + +# ({'orig_scheme': 'https', +# 'scheme': 'https', +# 'port': 443}, +# 'https://example.com/aa/bb'), + +# ({'orig_scheme': 'ftp', +# 'scheme': 'ftp', +# 'port': 21}, +# 'ftp://example.com/aa/bb'), + +# ({'orig_scheme': 'file', +# 'scheme': 'file', +# 'port': None, +# 'domain_labels': ()}, +# 'file:///aa/bb') +# ]) +# def test_reconstruct_parsed_url(_in, out, sample_url_parsed): +# """Test the reconstruct_url() method of ParsedUrl class.""" +# parsed_url = dc.replace(sample_url_parsed, **_in) +# assert parsed_url.reconstruct_url() == out + +@pytest.mark.parametrize('_in, out', [ + ({'url': sample_url_str}, {}), + ({'url': 'http://example.com:80/aa/bb'}, {}), + ({'url': 'http://example.com//aa///bb'}, {}), + ({'url': 'http://example...com/aa/bb'}, {}), + ({'url': 'http://example.com/aa/bb?c=d#ef'}, {}), + ({'url': 'http://example.com'}, {'path_segments': ()}), + ({'url': 'http://example.com/aa/bb/'}, {'has_trailing_slash': True}), + ({'url': 'http://example.com:443/aa/bb'}, {'port': 443}), + + ({'url': 'http://example.com/'}, + {'path_segments': (), + 'has_trailing_slash': True}), + + ({'url': 'http://example.com/aa/bb', + 'is_pattern': True, + 'orig_url': 'http*://example.com/aa/bb/'}, + {}), + + ({'url': 'https://example.com/aa/bb'}, + {'scheme': 'https', + 'port': 443}), + + ({'url': 'ftp://example.com/aa/bb'}, + {'scheme': 'ftp', + 'port': 21}), + + ({'url': 'file:///aa/bb'}, + {'scheme': 'file', + 'port': None, + 'domain_labels': ()}) +]) +def test_parse_pattern_or_url(_in, out, sample_url_parsed): + """Test normal use (no errors) of the _parse_pattern_or_url() function.""" + if 'orig_url' not in _in: + _in = {**_in, 'orig_url': _in['url']} + + out = {**out, 'orig_url': _in['orig_url']} + + parsed_url = url_patterns._parse_pattern_or_url(**_in) + assert parsed_url == dc.replace(sample_url_parsed, **out) + +@pytest.mark.parametrize('_in, err', [ + ({'url': 'file://:78/unexpected/port'}, 'err.url_{}.bad'), + ({'url': 'file://unexpected.hostname/'}, 'err.url_{}.bad'), + ({'url': 'http:///no/hostname'}, 'err.url_{}.bad'), + ({'url': 'invalid?://example.com'}, 'err.url_{}.bad'), + ({'url': 'invalid?://example.com', + 'orig_url': 'invalid?://example.com', + 'is_pattern': True}, + 'err.url_pattern_{}.bad'), + + ({'url': 'unknown://example.com'}, 'err.url_{}.bad_scheme'), + ({'url': 'unknown://example.com', + 'orig_url': 'unknown://example.com', + 'is_pattern': True}, + 'err.url_pattern_{}.bad_scheme'), + + ({'url': 'http://example.com:80', + 'orig_url': 'http*://example.com:80', + 'is_pattern': True}, + 'err.url_pattern_{}.special_scheme_port'), + + ({'url': 'http://example.com:65536'}, 'err.url_{}.bad_port'), + ({'url': 'http://example.com:0'}, 'err.url_{}.bad_port'), + ({'url': 'http://example.com:65537', + 'orig_url': 'http://example.com:65537', + 'is_pattern': True}, + 'err.url_pattern_{}.bad_port'), + + ({'url': 'http://example.com/?a=b', + 'orig_url': 'http://example.com/?a=b', + 'is_pattern': True}, + 'err.url_pattern_{}.has_query'), + + ({'url': 'http://example.com/#abc', + 'orig_url': 'http://example.com/#abc', + 'is_pattern': True}, + 'err.url_pattern_{}.has_frag') +]) +def test_parse_pattern_or_url_err(_in, err, sample_url_parsed): + """Test error conditions of the _parse_pattern_or_url() function.""" + if 'orig_url' not in _in: + _in = {**_in, 'orig_url': _in['url']} + + err_url = _in['orig_url'] + err_regex = err.format(re.escape(err_url)) + + with pytest.raises(HaketiloException, match=f'^{err_regex}$'): + url_patterns._parse_pattern_or_url(**_in) + +def test_parse_pattern_or_url_different_urls(): + """ + Verify the _parse_pattern_or_url() function allows passed URLs to be + different only when parsing a pattern. + """ + urls = [sample_url_str, sample_url_str.replace('http', 'http*')] + + url_patterns._parse_pattern_or_url(*urls, is_pattern=True) + + with pytest.raises(AssertionError): + url_patterns._parse_pattern_or_url(*urls) + +@pytest.mark.parametrize('_in, out', [ + ('http://example.com', ('mocked_pr_http://example.com',)), + ('ftp://example.com', ('mocked_pr_ftp://example.com',)), + ('http*://example.com', ('mocked_pr_http://example.com', + 'mocked_pr_https://example.com')) +]) +def test_parse_pattern(monkeypatch, _in, out): + """....""" + def mocked_parse_pattern_or_url(url, orig_url, is_pattern=False): + """....""" + assert is_pattern + assert orig_url == _in + + return f'mocked_pr_{url}' + + monkeypatch.setattr(url_patterns, '_parse_pattern_or_url', + mocked_parse_pattern_or_url) + + assert url_patterns.parse_pattern(_in) == out + +def test_parse_url(monkeypatch): + """....""" + def mocked_parse_pattern_or_url(url, orig_url): + """....""" + return f'mocked_pr_{url}' + + monkeypatch.setattr(url_patterns, '_parse_pattern_or_url', + mocked_parse_pattern_or_url) + + assert url_patterns.parse_url('https://example.com') == \ + 'mocked_pr_https://example.com' + +def test_parsed_url_hash(sample_url_parsed): + """....""" + hash(sample_url_parsed) diff --git a/tests/test_versions.py b/tests/test_versions.py new file mode 100644 index 0000000..43a3f33 --- /dev/null +++ b/tests/test_versions.py @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior +# +# Available under the terms of Creative Commons Zero v1.0 Universal. + +import pytest + +from hydrilla import versions + +sample_version_tuples = [(4, 5, 3), (1, 0, 5), (3,)] +sample_version_strings = ['4.5.3', '1.0.5', '3'] + +sample_versions = [*zip(sample_version_tuples, sample_version_strings)] + +@pytest.mark.parametrize('version_tuple', sample_version_tuples) +def test_normalize_version(version_tuple): + """Verify that normalize_version() produces proper results.""" + assert versions.normalize_version([*version_tuple]) == version_tuple + assert versions.normalize_version([*version_tuple, 0]) == version_tuple + +@pytest.mark.parametrize('version_tuple, string', sample_versions) +def test_parse_version(version_tuple, string): + """Verify that parse_version() produces proper results.""" + assert versions.parse_version(string) + assert versions.parse_version(string + '.0') == tuple([*version_tuple, 0]) + +def test_parse_version_bad_string(): + """Verify that parse_version() raises when passed an invalid string.""" + with pytest.raises(ValueError): + versions.parse_version('i am not a valid version') + +@pytest.mark.parametrize('version_tuple, string', sample_versions) +def test_version_string(version_tuple, string): + """Verify that version_string() produces proper results.""" + for _version_tuple, _string in [ + (version_tuple, string), + (tuple([*version_tuple, 0]), f'{string}.0') + ]: + assert versions.version_string(_version_tuple) == _string + assert versions.version_string(_version_tuple, 5) == f'{_string}-5' diff --git a/tests/url_patterns_common.py b/tests/url_patterns_common.py new file mode 100644 index 0000000..de6651d --- /dev/null +++ b/tests/url_patterns_common.py @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior +# +# Available under the terms of Creative Commons Zero v1.0 Universal. + +import pytest + +from hydrilla import url_patterns + +sample_url_str = 'http://example.com/aa/bb' + +@pytest.fixture(scope='session') +def sample_url_parsed(): + """Generate a simple ParsedUrl object.""" + return url_patterns.ParsedUrl( + orig_url = sample_url_str, + scheme = 'http', + domain_labels = ('com', 'example'), + path_segments = ('aa', 'bb'), + has_trailing_slash = False, + port = 80 + ) -- cgit v1.2.3