From 6676b4ed90e19e2fd6ee5f4242cf85f64db145d8 Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Tue, 8 Feb 2022 15:29:49 +0100 Subject: rework Hydrilla to use a separate tool for building its source packages * Hydrilla now depends on "Hydrilla builder" developed at: https://git.koszko.org/hydrilla-builder/ * Hydrilla repository is now REUSE-compliant * The debian packaging is temporarily not tested and likely to be broken * JSON schemas are now in use (through 'jsonschema' Python library) * This is not yet a release and some minor changes to the API on-fisk format are going to occur before that --- src/hydrilla/__init__.py | 7 + src/hydrilla/server/__init__.py | 7 + src/hydrilla/server/config.json | 24 + .../server/locales/en/LC_MESSAGES/hydrilla.po | 127 ++++ src/hydrilla/server/serve.py | 604 +++++++++++++++++ src/hydrilla/server/templates/base.html | 122 ++++ src/hydrilla/server/templates/index.html | 30 + src/hydrilla_dev_helper.py | 308 +++++++++ src/pydrilla/__init__.py | 1 - src/pydrilla/config.json | 14 - src/pydrilla/locales/en/LC_MESSAGES/pydrilla.po | 127 ---- src/pydrilla/pydrilla.py | 755 --------------------- src/pydrilla/templates/base.html | 122 ---- src/pydrilla/templates/index.html | 30 - src/pydrilla_dev_helper.py | 293 -------- src/test/__init__.py | 5 + src/test/development_config.json | 17 +- src/test/example_content/hello/bye.js | 7 - src/test/example_content/hello/cc0.txt | 121 ---- src/test/example_content/hello/hello.js | 7 - src/test/example_content/hello/index.json | 302 --------- src/test/example_content/hello/message.js | 8 - src/test/source-package-example | 1 + src/test/test_pydrilla.py | 153 ----- src/test/test_server.py | 199 ++++++ 25 files changed, 1441 insertions(+), 1950 deletions(-) create mode 100644 src/hydrilla/__init__.py create mode 100644 src/hydrilla/server/__init__.py create mode 100644 src/hydrilla/server/config.json create mode 100644 src/hydrilla/server/locales/en/LC_MESSAGES/hydrilla.po create mode 100644 src/hydrilla/server/serve.py create mode 100644 src/hydrilla/server/templates/base.html create mode 100644 src/hydrilla/server/templates/index.html create mode 100644 src/hydrilla_dev_helper.py delete mode 100644 src/pydrilla/__init__.py delete mode 100644 src/pydrilla/config.json delete mode 100644 src/pydrilla/locales/en/LC_MESSAGES/pydrilla.po delete mode 100644 src/pydrilla/pydrilla.py delete mode 100644 src/pydrilla/templates/base.html delete mode 100644 src/pydrilla/templates/index.html delete mode 100644 src/pydrilla_dev_helper.py delete mode 100644 src/test/example_content/hello/bye.js delete mode 100644 src/test/example_content/hello/cc0.txt delete mode 100644 src/test/example_content/hello/hello.js delete mode 100644 src/test/example_content/hello/index.json delete mode 100644 src/test/example_content/hello/message.js create mode 160000 src/test/source-package-example delete mode 100644 src/test/test_pydrilla.py create mode 100644 src/test/test_server.py (limited to 'src') diff --git a/src/hydrilla/__init__.py b/src/hydrilla/__init__.py new file mode 100644 index 0000000..6aeb276 --- /dev/null +++ b/src/hydrilla/__init__.py @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: 0BSD + +# Copyright (C) 2013-2020, PyPA + +# https://packaging.python.org/en/latest/guides/packaging-namespace-packages/#pkgutil-style-namespace-packages + +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/src/hydrilla/server/__init__.py b/src/hydrilla/server/__init__.py new file mode 100644 index 0000000..f5a799e --- /dev/null +++ b/src/hydrilla/server/__init__.py @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior +# +# Available under the terms of Creative Commons Zero v1.0 Universal. + +from .serve import create_app diff --git a/src/hydrilla/server/config.json b/src/hydrilla/server/config.json new file mode 100644 index 0000000..7c9f22b --- /dev/null +++ b/src/hydrilla/server/config.json @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: CC0-1.0 + +// Default Hydrilla config file. +// +// Copyright (C) 2021, 2022 Wojtek Kosior +// +// Available under the terms of Creative Commons Zero v1.0 Universal. + +{ + // Relative path to directory from which Hydrilla will load packages + // metadata and serve files. + // Deliberately avoiding word "content", see: + // http://www.gnu.org/philosophy/words-to-avoid.en.html#Content + "malcontent_dir": "/var/lib/hydrilla/malcontent", + + // Hydrilla will display this link to users as a place where they can + // obtain sources for its software. This config option is meant to ease + // compliance with the AGPL. + "hydrilla_project_url": "https://hydrillabugs.koszko.org/projects/hydrilla/wiki", + + // Tell Hydrilla to look for additional configuration in those files, in + // this order. + "try_configs": ["/etc/hydrilla/config.json"] +} diff --git a/src/hydrilla/server/locales/en/LC_MESSAGES/hydrilla.po b/src/hydrilla/server/locales/en/LC_MESSAGES/hydrilla.po new file mode 100644 index 0000000..f9e6a82 --- /dev/null +++ b/src/hydrilla/server/locales/en/LC_MESSAGES/hydrilla.po @@ -0,0 +1,127 @@ +# SPDX-License-Identifier: CC0-1.0 + +# English localization +# +# This file is part of Hydrilla +# +# Copyright (C) 2021 Wojtek Kosior +# +# This file is free cultural work: you can redistribute it with or +# without modification under the terms of the CC0 1.0 Universal License +# as published by the Creative Commons Corporation. +# +# This file is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# CC0 1.0 Universal License for more details. + +msgid "" +msgstr "" +"Project-Id-Version: Hydrilla 0.2\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2021-11-13 19:03+0100\n" +"PO-Revision-Date: 2021-11-06 08:42+0100\n" +"Last-Translator: Wojtek Kosior \n" +"Language-Team: English\n" +"Language: en\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +#: pydrilla.py:97 +msgid "path_is_absolute_{}" +msgstr "Provided path '{}' is absolute." + +#: pydrilla.py:104 +#, python-brace-format +msgid "not_implemented_{what}_{where}" +msgstr "" +"Attempt to use '{what}' in '{where}' but this feature is not yet implemented." + +#: pydrilla.py:194 +#, python-brace-format +msgid "uuid_mismatch_{identifier}" +msgstr "Two different uuids were specified for item '{identifier}'." + +#: pydrilla.py:201 +#, python-brace-format +msgid "version_clash_{identifier}_{version}" +msgstr "Version '{version}' specified more than once for item '{identifier}'." + +#: pydrilla.py:297 pydrilla.py:309 +msgid "invalid_URL_{}" +msgstr "Invalid URL/pattern: '{}'." + +#: pydrilla.py:301 +msgid "disallowed_protocol_{}" +msgstr "Disallowed protocol: '{}'." + +#: pydrilla.py:391 +msgid "license_clash_{}" +msgstr "License '{}' defined more than once." + +#: pydrilla.py:408 +msgid "source_name_clash_{}" +msgstr "Source name '{}' used more than once." + +#: pydrilla.py:426 +#, python-format +msgid "couldnt_load_definition_from_%s" +msgstr "Couldn't load definition from '%s'." + +#: pydrilla.py:442 +#, python-format +msgid "no_index_license_%(source)s_%(lic)s" +msgstr "Unknown license '%(lic)s' used by index.json of '%(source)s'." + +#: pydrilla.py:449 +#, python-format +msgid "no_resource_license_%(resource)s_%(ver)s_%(lic)s" +msgstr "" +"Unknown license '%(lic)s' used by resource '%(resource)s', version '%(ver)s'." + +#: pydrilla.py:451 +#, python-format +msgid "no_mapping_license_%(mapping)s_%(ver)s_%(lic)s" +msgstr "" +"Unknown license '%(lic)s' used by mapping '%(mapping)s', version '%(ver)s'." + +#: pydrilla.py:474 +#, python-format +msgid "no_dep_%(resource)s_%(ver)s_%(dep)s" +msgstr "" +"Unknown dependency '%(dep)s' of resource '%(resource)s', version '%(ver)s'." + +#: pydrilla.py:484 +#, python-format +msgid "no_payload_%(mapping)s_%(ver)s_%(payload)s" +msgstr "" +"Unknown payload '%(payload)s' of mapping '%(mapping)s', version '%(ver)s'." + +#: pydrilla.py:512 +#, python-format +msgid "couldnt_register_%(mapping)s_%(ver)s_%(pattern)s" +msgstr "" +"Couldn't register mapping '%(mapping)s', version '%(ver)s' (pattern " +"'%(pattern)s')." + +#: pydrilla.py:566 +msgid "content_dir_path_not_dir" +msgstr "Provided \"content_dir\" path does not name a direcotry." + +#: pydrilla.py:578 +#, python-format +msgid "couldnt_load_content_from_%s" +msgstr "Couldn't load content from '%s'." + +#: pydrilla.py:603 +msgid "config_key_absent_{}" +msgstr "Config key \"{}\" not provided." + +#: templates/index.html:4 +msgid "hydrilla_welcome" +msgstr "Welcome to Hydrilla!" + +#: templates/base.html:55 templates/base.html:61 +msgid "hydrilla" +msgstr "Hydrilla" diff --git a/src/hydrilla/server/serve.py b/src/hydrilla/server/serve.py new file mode 100644 index 0000000..815ac63 --- /dev/null +++ b/src/hydrilla/server/serve.py @@ -0,0 +1,604 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +# Main repository logic. +# +# This file is part of Hydrilla +# +# Copyright (C) 2021, 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +import re +import os +import pathlib +import json +import gettext +import logging + +from pathlib import Path +from hashlib import sha256 +from abc import ABC, abstractmethod +from typing import Optional, Union, Iterable + +from flask import Flask, Blueprint, current_app, url_for, abort, request, \ + redirect, send_file +from jinja2 import Environment, PackageLoader +from werkzeug import Response + +from .. import util + +here = pathlib.Path(__file__).resolve().parent + +def load_config(config_path: Path) -> dict: + config = {} + to_load = [config_path] + failures_ok = [False] + + while to_load: + path = to_load.pop() + can_fail = failures_ok.pop() + + try: + json_text = util.strip_json_comments(config_path.read_text()) + new_config = json.loads(json_text) + except Exception as e: + if can_fail: + continue + raise e from None + + config.update(new_config) + + for key, failure_ok in [('try_configs', True), ('use_configs', False)]: + paths = new_config.get(key, []) + paths.reverse() + to_load.extend(paths) + failures_ok.extend([failure_ok] * len(paths)) + + for key in ('try_configs', 'use_configs'): + if key in config: + config.pop(key) + + for key in ('malcontent_dir', 'hydrilla_project_url'): + if key not in config: + raise ValueError(_('config_key_absent_{}').format(key)) + + malcontent_path = Path(config['malcontent_dir']) + if not malcontent_path.is_absolute(): + malcontent_path = config_path.parent / malcontent_path + + config['malcontent_dir'] = str(malcontent_path.resolve()) + + return config + +class ItemInfo(ABC): + """Shortened data of a resource/mapping.""" + def __init__(self, item_obj: dict): + """Initialize ItemInfo using item definition read from JSON.""" + self.version = util.normalize_version(item_obj['version']) + self.identifier = item_obj['identifier'] + self.uuid = item_obj['uuid'] + self.long_name = item_obj['long_name'] + + def path(self) -> str: + """ + Get a relative path to this item's JSON definition with respect to + directory containing items of this type. + """ + return f'{self.identifier}/{util.version_string(self.version)}' + +class ResourceInfo(ItemInfo): + """Shortened data of a resource.""" + def __init__(self, resource_obj: dict): + """Initialize ResourceInfo using resource definition read from JSON.""" + super().__init__(resource_obj) + + self.dependencies = resource_obj.get('dependencies', []) + +class MappingInfo(ItemInfo): + """Shortened data of a mapping.""" + def __init__(self, mapping_obj: dict): + """Initialize MappingInfo using mapping definition read from JSON.""" + super().__init__(mapping_obj) + + self.payloads = {} + for pattern, res_ref in mapping_obj.get('payloads', {}).items(): + self.payloads[pattern] = res_ref['identifier'] + + def as_query_result(self) -> str: + """ + Produce a json.dump()-able object describing this mapping as one of a + collection of query results. + """ + return { + 'version': self.version, + 'identifier': self.identifier, + 'long_name': self.long_name + } + +class VersionedItemInfo: + """Stores data of multiple versions of given resource/mapping.""" + def __init__(self): + self.uuid = None + self.identifier = None + self.by_version = {} + self.known_versions = [] + + def register(self, item_info: ItemInfo) -> None: + """ + Make item info queryable by version. Perform sanity checks for uuid. + """ + if self.identifier is None: + self.identifier = item_info.identifier + self.uuid = item_info.uuid + elif self.uuid != item_info.uuid: + raise ValueError(_('uuid_mismatch_{identifier}') + .format(identifier=self.identifier)) + + ver = item_info.version + ver_str = util.version_string(ver) + + if ver_str in self.by_version: + raise ValueError(_('version_clash_{identifier}_{version}') + .format(identifier=self.identifier, + version=ver_str)) + + self.by_version[ver_str] = item_info + self.known_versions.append(ver) + + def get_by_ver(self, ver: Optional[list[int]]=None) -> Optional[ItemInfo]: + """ + Find and return info of the newest version of item. + + If ver is specified, instead find and return info of that version of the + item (or None if absent). + """ + ver = util.version_string(ver or self.known_versions[-1]) + + return self.by_version.get(ver) + + def get_all(self) -> list[ItemInfo]: + """ + Return a list of item info for all its versions, from oldest ot newest. + """ + return [self.by_version[util.version_string(ver)] + for ver in self.known_versions] + +class PatternTreeNode: + """ + "Pattern Tree" is how we refer to the data structure used for querying + Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal + is to make it possible for given URL to quickly retrieve all known patterns + that match it. + """ + def __init__(self): + self.wildcard_matches = [None, None, None] + self.literal_match = None + self.children = {} + + def search(self, segments): + """ + Yields all matches of this segments sequence against the tree that + starts at this node. Results are produces in order from greatest to + lowest pattern specificity. + """ + nodes = [self] + + for segment in segments: + next_node = nodes[-1].children.get(segment) + if next_node is None: + break + + nodes.append(next_node) + + nsegments = len(segments) + cond_literal = lambda: len(nodes) == nsegments + cond_wildcard = [ + lambda: len(nodes) + 1 == nsegments and segments[-1] != '*', + lambda: len(nodes) + 1 < nsegments, + lambda: len(nodes) + 1 != nsegments or segments[-1] != '***' + ] + + while nodes: + node = nodes.pop() + + for item, condition in [(node.literal_match, cond_literal), + *zip(node.wildcard_matches, cond_wildcard)]: + if item is not None and condition(): + yield item + + def add(self, segments, item_instantiator): + """ + Make item queryable through (this branch of) the Pattern Tree. If there + was not yet any item associated with the tree path designated by + segments, create a new one using item_instantiator() function. Return + all items matching this path (both the ones that existed and the ones + just created). + """ + node = self + segment = None + + for segment in segments: + wildcards = node.wildcard_matches + + child = node.children.get(segment) or PatternTreeNode() + node.children[segment] = child + node = child + + if node.literal_match is None: + node.literal_match = item_instantiator() + + if segment not in ('*', '**', '***'): + return [node.literal_match] + + if wildcards[len(segment) - 1] is None: + wildcards[len(segment) - 1] = item_instantiator() + + return [node.literal_match, wildcards[len(segment) - 1]] + +proto_regex = re.compile(r'^(?P\w+)://(?P.*)$') +user_re = r'[^/?#@]+@' # r'(?P[^/?#@]+)@' # discarded for now +query_re = r'\??[^#]*' # r'\??(?P[^#]*)' # discarded for now +domain_re = r'(?P[^/?#]+)' +path_re = r'(?P[^?#]*)' +http_regex = re.compile(f'{domain_re}{path_re}{query_re}.*') +ftp_regex = re.compile(f'(?:{user_re})?{domain_re}{path_re}.*') + +class UrlError(ValueError): + """Used to report a URL or URL pattern that is invalid or unsupported.""" + pass + +class DeconstructedUrl: + """Represents a deconstructed URL or URL pattern""" + def __init__(self, url): + self.url = url + + match = proto_regex.match(url) + if not match: + raise UrlError(_('invalid_URL_{}').format(url)) + + self.proto = match.group('proto') + if self.proto not in ('http', 'https', 'ftp'): + raise UrlError(_('disallowed_protocol_{}').format(proto)) + + if self.proto == 'ftp': + match = ftp_regex.match(match.group('rest')) + elif self.proto in ('http', 'https'): + match = http_regex.match(match.group('rest')) + + if not match: + raise UrlError(_('invalid_URL_{}').format(url)) + + self.domain = match.group('domain').split('.') + self.domain.reverse() + self.path = [*filter(None, match.group('path').split('/'))] + +class PatternMapping: + """ + A mapping info, together with one of its patterns, as stored in Pattern + Tree. + """ + def __init__(self, pattern: str, mapping_info: MappingInfo): + self.pattern = pattern + self.mapping_info = mapping_info + + def register(self, pattern_tree: dict): + """ + Make self queryable through the Pattern Tree passed in the argument. + """ + deco = DeconstructedUrl(self.pattern) + + domain_tree = pattern_tree.get(deco.proto) or PatternTreeNode() + pattern_tree[deco.proto] = domain_tree + + for path_tree in domain_tree.add(deco.domain, PatternTreeNode): + for match_list in path_tree.add(deco.path, list): + match_list.append(self) + +class Malcontent: + """ + Instance of this class represents a directory with files that can be loaded + and served by Hydrilla. + """ + def __init__(self, malcontent_dir_path: Union[Path, str]): + """ + When an instance of Malcontent is constructed, it searches + malcontent_dir_path for serveable site-modifying packages and loads + them into its data structures. + """ + self.infos = {'resource': {}, 'mapping': {}} + self.pattern_tree = {} + + self.malcontent_dir_path = pathlib.Path(malcontent_dir_path).resolve() + + if not self.malcontent_dir_path.is_dir(): + raise ValueError(_('malcontent_dir_path_not_dir')) + + for item_type in ('mapping', 'resource'): + type_path = self.malcontent_dir_path / item_type + if not type_path.is_dir(): + continue + + for subpath in type_path.iterdir(): + if not subpath.is_dir(): + continue + + for ver_file in subpath.iterdir(): + try: + self._load_item(item_type, ver_file) + except Exception as e: + if current_app._hydrilla_werror: + raise e from None + + msg = _('couldnt_load_item_from_{}').format(ver_file) + logging.error(msg, exc_info=True) + + self._report_missing() + self._finalize() + + def _load_item(self, item_type: str, ver_file: Path) -> None: + """ + Reads, validates and autocompletes serveable mapping/resource + definition, then registers information from it in data structures. + """ + version = util.parse_version(ver_file.name) + identifier = ver_file.parent.name + + with open(ver_file, 'rt') as file_handle: + item_json = json.load(file_handle) + + util.validator_for(f'api_{item_type}_description-1.schema.json')\ + .validate(item_json) + + if item_type == 'resource': + item_info = ResourceInfo(item_json) + else: + item_info = MappingInfo(item_json) + + if item_info.identifier != identifier: + msg = _('item_{item}_in_file_{file}')\ + .format({'item': item_info.identifier, 'file': ver_file}) + raise ValueError(msg) + + if item_info.version != version: + ver_str = util.version_string(item_info.version) + msg = _('item_version_{ver}_in_file_{file}')\ + .format({'ver': ver_str, 'file': ver_file}) + raise ValueError(msg) + + versioned_info = self.infos[item_type].get(identifier) + if versioned_info is None: + versioned_info = VersionedItemInfo() + self.infos[item_type][identifier] = versioned_info + + versioned_info.register(item_info) + + def _all_of_type(self, item_type: str) -> Iterable[ItemInfo]: + """Iterator over all registered versions of all mappings/resources.""" + for versioned_info in self.infos[item_type].values(): + for item_info in versioned_info.by_version.values(): + yield item_info + + def _report_missing(self) -> None: + """ + Use logger to print information about items that are referenced but + were not loaded. + """ + def report_missing_dependency(info: ResourceInfo, dep: str) -> None: + msg = _('no_dep_%(resource)s_%(ver)s_%(dep)s')\ + .format(dep=dep, resource=info.identifier, + ver=util.version_string(info.version)) + logging.error(msg) + + for resource_info in self._all_of_type('resource'): + for dep in resource_info.dependencies: + if dep not in self.infos['resource']: + report_missing_dependency(resource_info, dep) + + def report_missing_payload(info: MappingInfo, payload: str) -> None: + msg = _('no_payload_{mapping}_{ver}_{payload}')\ + .format(mapping=info.identifier, payload=payload, + ver=util.version_string(info.version)) + logging.error(msg) + + for mapping_info in self._all_of_type('mapping'): + for payload in mapping_info.payloads.values(): + if payload not in self.infos['resource']: + report_missing_payload(mapping_info, payload) + + def _finalize(self): + """ + Initialize structures needed to serve queries. Called once after all + data gets loaded. + """ + for infos_dict in self.infos.values(): + for versioned_info in infos_dict.values(): + versioned_info.known_versions.sort() + + for info in self._all_of_type('mapping'): + for pattern in info.payloads: + try: + PatternMapping(pattern, info).register(self.pattern_tree) + except Exception as e: + if current_app._hydrilla_werror: + raise e from None + msg = _('couldnt_register_{mapping}_{ver}_{pattern}')\ + .format(mapping=info.identifier, pattern=pattern, + ver=util.version_string(info.version)) + logging.error(msg) + + def query(self, url: str) -> list[MappingInfo]: + """ + Return a list of registered mappings that match url. + + If multiple versions of a mapping are applicable, only the most recent + is included in the result. + """ + deco = DeconstructedUrl(url) + + collected = {} + + domain_tree = self.pattern_tree.get(deco.proto) or PatternTreeNode() + + def process_mapping(pattern_mapping: PatternMapping) -> None: + if url[-1] != '/' and pattern_mapping.pattern[-1] == '/': + return + + info = pattern_mapping.mapping_info + + if info.identifier not in collected or \ + info.version > collected[info.identifier].version: + collected[info.identifier] = info + + for path_tree in domain_tree.search(deco.domain): + for matches_list in path_tree.search(deco.path): + for pattern_mapping in matches_list: + process_mapping(pattern_mapping) + + return list(collected.values()) + +bp = Blueprint('bp', __package__) + +def create_app(config_path: Path=(here / 'config.json'), flask_config: dict={}): + """Create the Flask instance.""" + config = load_config(config_path) + + app = Flask(__package__, static_url_path='/', + static_folder=config['malcontent_dir']) + app.config.update(flask_config) + + language = flask_config.get('lang', 'en') + translation = gettext.translation('hydrilla', localedir=(here / 'locales'), + languages=[language]) + + app._hydrilla_gettext = translation.gettext + + # https://stackoverflow.com/questions/9449101/how-to-stop-flask-from-initialising-twice-in-debug-mode + if app.debug and os.environ.get('WERKZEUG_RUN_MAIN') != 'true': + return app + + app._hydrilla_project_url = config['hydrilla_project_url'] + app._hydrilla_werror = config.get('werror', False) + if 'hydrilla_parent' in config: + raise MyNotImplError('hydrilla_parent', config_path.name) + + malcontent_dir = pathlib.Path(config['malcontent_dir']) + if not malcontent_dir.is_absolute(): + malcontent_dir = config_path.parent / malcontent_dir + with app.app_context(): + app._hydrilla_malcontent = Malcontent(malcontent_dir.resolve()) + + app.register_blueprint(bp) + + return app + +def _(text_key): + return current_app._hydrilla_gettext(text_key) + +def malcontent(): + return current_app._hydrilla_malcontent + +# TODO: override create_jinja_environment() method of Flask instead of wrapping +# Jinja environment +class MyEnvironment(Environment): + """ + A wrapper class around jinja2.Environment that causes GNU gettext function + (as '_' and '__'), url_for function and 'hydrilla_project_url' config option + to be passed to every call of each template's render() method. + """ + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def get_template(self, *args, **kwargs): + template = super().get_template(*args, **kwargs) + old_render = template.render + + def new_render(*args, **kwargs): + _ = current_app._hydrilla_gettext + project_url = current_app._hydrilla_project_url + + def escaping_gettext(text_key): + from markupsafe import escape + + return str(escape(_(text_key))) + + final_kwargs = { + '_': escaping_gettext, + '__': escaping_gettext, + 'url_for': url_for, + 'hydrilla_project_url' : project_url + } + final_kwargs.update(kwargs) + + return old_render(*args, **final_kwargs) + + template.render = new_render + + return template + +j2env = MyEnvironment(loader=PackageLoader(__package__), autoescape=False) + +indexpage = j2env.get_template('index.html') +@bp.route('/') +def index(): + return indexpage.render() + +identifier_json_re = re.compile(r'^([-0-9a-z.]+)\.json$') + +def get_resource_or_mapping(item_type: str, identifier: str) -> Response: + """ + Strip '.json' from 'identifier', look the item up and send its JSON + description. + """ + match = identifier_json_re.match(identifier) + if not match: + abort(404) + + identifier = match.group(1) + + versioned_info = malcontent().infos[item_type].get(identifier) + + info = versioned_info and versioned_info.get_by_ver() + if info is None: + abort(404) + + # no need for send_from_directory(); path is safe, constructed by us + return send_file(malcontent().malcontent_dir_path / item_type / info.path()) + +@bp.route('/mapping/') +def get_newest_mapping(identifier_dot_json: str) -> Response: + return get_resource_or_mapping('mapping', identifier_dot_json) + +@bp.route('/resource/') +def get_newest_resource(identifier_dot_json: str) -> Response: + return get_resource_or_mapping('resource', identifier_dot_json) + +@bp.route('/query') +def query(): + url = request.args['url'] + + mapping_refs = [i.as_query_result() for i in malcontent().query(url)] + result = { + 'api_schema_version': [1], + 'generated_by': { + 'name': 'hydrilla' + }, + 'mappings': mapping_refs + } + + return json.dumps(result) diff --git a/src/hydrilla/server/templates/base.html b/src/hydrilla/server/templates/base.html new file mode 100644 index 0000000..f95ce54 --- /dev/null +++ b/src/hydrilla/server/templates/base.html @@ -0,0 +1,122 @@ +{# SPDX-License-Identifier: CC-BY-SA-4.0 OR AGPL-3.0-or-later + +Base HTML page template. + +This file is part of Hydrilla + +Copyright (C) 2021 Wojtek Kosior + +This file is free cultural work: you can redistribute it with or +without modification under the terms of the Creative Commons +Attribution Share Alike 4.0 International as published by the +Creative Commons Corporation. + +This file is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +Creative Commons Attribution Share Alike 4.0 International +License for more details. + + +I, Wojtek Kosior, thereby promise not to sue for violation of this +file's license. Although I request that you do not make use this code +in a proprietary program, I am not going to enforce this in court. +#} + +{% macro link_for(endpoint, text) -%} + + {{ text }} + +{%- endmacro %} + + + + + {% block head %} + + + {% block title %}{{ _('hydrilla') }}{% endblock %} + {% endblock %} + + + {% block body %} + + +
+ {% block content %} + {% endblock %} +
+ + + {% endblock %} + + diff --git a/src/hydrilla/server/templates/index.html b/src/hydrilla/server/templates/index.html new file mode 100644 index 0000000..3063239 --- /dev/null +++ b/src/hydrilla/server/templates/index.html @@ -0,0 +1,30 @@ +{# SPDX-License-Identifier: CC-BY-SA-4.0 OR AGPL-3.0-or-later + +HTML index page template. + +This file is part of Hydrilla + +Copyright (C) 2021 Wojtek Kosior + +This file is free cultural work: you can redistribute it with or +without modification under the terms of the Creative Commons +Attribution Share Alike 4.0 International as published by the +Creative Commons Corporation. + +This file is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +Creative Commons Attribution Share Alike 4.0 International +License for more details. + + +I, Wojtek Kosior, thereby promise not to sue for violation of this +file's license. Although I request that you do not make use this code +in a proprietary program, I am not going to enforce this in court. +#} + +{% extends 'base.html' %} +{% block content %} + {{ super() }} +

{{ _('hydrilla_welcome') }}

+{% endblock %} diff --git a/src/hydrilla_dev_helper.py b/src/hydrilla_dev_helper.py new file mode 100644 index 0000000..925f414 --- /dev/null +++ b/src/hydrilla_dev_helper.py @@ -0,0 +1,308 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Definitions of helper commands to use with setuptools +# +# This file is part of Hydrilla +# +# Copyright (C) 2021 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this file's +# license. Although I request that you do not make use this code in a +# proprietary program, I am not going to enforce this in court. + +from setuptools import Command +from setuptools.command.build_py import build_py +import sys +from pathlib import Path +import subprocess +import re +import os +import json +import importlib + +def mypath(path_or_string): + return Path(path_or_string).resolve() + +class Helper: + def __init__(self, project_root, app_package_name, locales_dir, + locales=['en', 'pl'], default_locale='en', locale_domain=None, + packages_root=None, debian_dir=None, config_path=None): + self.project_root = mypath(project_root) + self.app_package_name = app_package_name + self.locales_dir = mypath(locales_dir) + self.locales = locales + self.default_locale = default_locale + self.locale_domain = locale_domain or app_package_name + self.packages_root = mypath(packages_root or project_root / 'src') + self.app_package_dir = self.packages_root / app_package_name + self.debian_dir = mypath(debian_dir or project_root / 'debian') + self.config_path = config_path and mypath(config_path) + self.locale_files_list = None + + def run_command(self, command, verbose, runner=subprocess.run, **kwargs): + cwd = kwargs.get('cwd') + if cwd: + cwd = mypath(cwd) + where = f'from {cwd} ' + else: + cwd = Path.cwd().resolve() + where = '' + + str_command = [str(command[0])] + + for arg in command[1:]: + if isinstance(arg, Path): + try: + arg = str(arg.relative_to(cwd)) + except ValueError: + arg = str(arg) + + str_command.append(arg) + + if verbose: + print(f'{where}executing {" ".join(str_command)}') + runner(str_command, **kwargs) + + def create_mo_files(self, dry_run=False, verbose=False): + self.locale_files_list = [] + + for locale in self.locales: + messages_dir = self.locales_dir / locale / 'LC_MESSAGES' + + for po_path in messages_dir.glob('*.po'): + mo_path = po_path.with_suffix('.mo') + + if not dry_run: + command = ['msgfmt', po_path, '-o', mo_path] + self.run_command(command, verbose=verbose, check=True) + + self.locale_files_list.extend([po_path, mo_path]) + + def locale_files(self): + if self.locale_files_list is None: + self.create_mo_files(dry_run=True) + + return self.locale_files_list + + def locale_files_relative(self, to=None): + if to is None: + to = self.app_package_dir + + return [file.relative_to(to) for file in self.locale_files()] + + def flask_run(self, locale=None): + for var, val in (('ENV', 'development'), ('DEBUG', 'True')): + os.environ[f'FLASK_{var}'] = os.environ.get(f'FLASK_{var}', val) + + config = {'lang': locale or self.default_locale} + + sys.path.insert(0, str(self.packages_root)) + package = importlib.import_module(self.app_package_name) + + # make relative paths in json config resolve from project's directory + os.chdir(self.project_root) + + kwargs = {'config_path': self.config_path} if self.config_path else {} + package.create_app(flask_config=config, **kwargs).run() + + def update_po_files(self, verbose=False): + pot_path = self.locales_dir / f'{self.locale_domain}.pot' + rglob = self.app_package_dir.rglob + command = ['xgettext', '-d', self.locale_domain, '--language=Python', + '-o', pot_path, *rglob('*.py'), *rglob('*.html')] + + self.run_command(command, verbose=verbose, check=True, + cwd=self.app_package_dir) + + for locale in self.locales: + messages_dir = self.locales_dir / locale / 'LC_MESSAGES' + + for po_path in messages_dir.glob('*.po'): + if po_path.stem != self.app_package_name: + continue; + + if po_path.exists(): + command = ['msgmerge', '--update', po_path, pot_path] + else: + command = ['cp', po_path, pot_path] + + self.run_command(command, verbose=verbose, check=True) + + if (verbose): + print('removing generated .pot file') + pot_path.unlink() + + # we exclude these from the source archive we produce + bad_file_regex = re.compile(r'^\..*|build|debian|dist') + + changelog_line_regex = re.compile(r''' + ^ # match from the beginning of each line + \s* # skip initial whitespace (if any) + (?P # capture name + [^\s(]+ + ) + \s* # again skip whitespace (if any) + \( + (?P # capture version which is enclosed in parantheses + [^)]+ + ) + - + (?P # capture debrel part of version separately + [0-9]+ + ) + \) + ''', re.VERBOSE) + + def make_tarballs(self, verbose=False): + changelog_path = self.project_root / 'debian' / 'changelog' + with open(changelog_path, 'rt') as file_handle: + for line in file_handle.readlines(): + match = changelog_line_regex.match(line) + if match: + break + + if not match: + raise ValueError("Couldn't extract version from debian/changelog.") + + name, ver, debrel = \ + [match.group(gn) for gn in ('source_name', 'version', 'debrel')] + + source_dirname = f'{name}-{ver}' + source_tarball_name = f'{name}_{ver}.orig.tar.gz' + debian_tarball_name = f'{name}_{ver}-{debrel}.debian.tar.gz' + + source_args = [f'--prefix={source_dirname}/', '-o', + self.project_root.parent / source_tarball_name, 'HEAD'] + + for filepath in self.project_root.iterdir(): + if not self.bad_file_regex.search(filepath.parts[-1]): + source_args.append(filepath) + + debian_args = ['-o', self.project_root.parent / debian_tarball_name, + 'HEAD', self.debian_dir] + + for args in [source_args, debian_args]: + command = ['git', 'archive', '--format=tar.gz', *args] + self.run_command(command, verbose=verbose, check=True) + + def commands(self): + helper = self + + class MsgfmtCommand(Command): + '''A custom command to run msgfmt on all .po files below '{}'.''' + + description = 'use msgfmt to generate .mo files from .po files' + user_options = [] + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + def run(self): + helper.create_mo_files(verbose=self.verbose) + + MsgfmtCommand.__doc__ = MsgfmtCommand.__doc__.format(helper.locales_dir) + + class RunCommand(Command): + ''' + A custom command to run the app using flask. + + This is similar in effect to: + PYTHONPATH='{packages_root}' FLASK_APP={app_package_name} \\ + FLASK_ENV=development flask run + ''' + + description = 'run the Flask app from source directory' + + user_options = [ + ('locale=', 'l', + "app locale (one of: %s; default: '%s')" % + (', '.join([f"'{l}'" for l in helper.locales]), + helper.default_locale)) + ] + + def initialize_options(self): + self.locale = helper.default_locale + + def finalize_options(self): + if self.locale not in helper.locales: + raise ValueError("Locale '%s' not supported" % self.lang) + + def run(self): + helper.flask_run(locale=self.locale) + + RunCommand.__doc__ = RunCommand.__doc__.format( + packages_root=self.packages_root, + app_package_name=self.app_package_name + ) + + class MsgmergeCommand(Command): + ''' + A custom command to run xgettext and msgmerge to update project's + .po files below '{}'. + ''' + + description = 'use xgettext and msgmerge to update (or generate) .po files for this project' + user_options = [] + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + def run(self): + helper.update_po_files(verbose=self.verbose) + + MsgmergeCommand.__doc__ = \ + MsgmergeCommand.__doc__.format(helper.locales_dir) + + class TarballsCommand(Command): + ''' + A custom command to run git archive to create debian tarballs of + this project. + ''' + + description = 'use git archive to create .orig.tar.gz and .debian.tar.gz files for this project' + user_options = [] + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + def run(self): + helper.make_tarballs(verbose=self.verbose) + + class BuildCommand(build_py): + ''' + The build command but runs the custom msgfmt command before build. + ''' + def run(self, *args, **kwargs): + self.run_command('msgfmt') + super().run(*args, **kwargs) + + return { + 'msgfmt': MsgfmtCommand, + 'run': RunCommand, + 'msgmerge': MsgmergeCommand, + 'tarballs': TarballsCommand, + 'build_py': BuildCommand + } diff --git a/src/pydrilla/__init__.py b/src/pydrilla/__init__.py deleted file mode 100644 index 8d1565b..0000000 --- a/src/pydrilla/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .pydrilla import create_app diff --git a/src/pydrilla/config.json b/src/pydrilla/config.json deleted file mode 100644 index 6bb5440..0000000 --- a/src/pydrilla/config.json +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-License-Identifier: CC0-1.0 - -// Example Hydrilla config file. -// -// Copyright (C) 2021 Wojtek Kosior -// -// Available under the terms of Creative Commons Zero v1.0 Universal. - -{ - "content_dir": "/var/lib/hydrilla/content", - "static_resource_uri": "http://localhost:8000/", - "hydrilla_sources_uri": "https://git.koszko.org/pydrilla/", - "try_configs": ["/etc/pydrilla/config.json"] -} diff --git a/src/pydrilla/locales/en/LC_MESSAGES/pydrilla.po b/src/pydrilla/locales/en/LC_MESSAGES/pydrilla.po deleted file mode 100644 index f9e6a82..0000000 --- a/src/pydrilla/locales/en/LC_MESSAGES/pydrilla.po +++ /dev/null @@ -1,127 +0,0 @@ -# SPDX-License-Identifier: CC0-1.0 - -# English localization -# -# This file is part of Hydrilla -# -# Copyright (C) 2021 Wojtek Kosior -# -# This file is free cultural work: you can redistribute it with or -# without modification under the terms of the CC0 1.0 Universal License -# as published by the Creative Commons Corporation. -# -# This file is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# CC0 1.0 Universal License for more details. - -msgid "" -msgstr "" -"Project-Id-Version: Hydrilla 0.2\n" -"Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2021-11-13 19:03+0100\n" -"PO-Revision-Date: 2021-11-06 08:42+0100\n" -"Last-Translator: Wojtek Kosior \n" -"Language-Team: English\n" -"Language: en\n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=UTF-8\n" -"Content-Transfer-Encoding: 8bit\n" - -#: pydrilla.py:97 -msgid "path_is_absolute_{}" -msgstr "Provided path '{}' is absolute." - -#: pydrilla.py:104 -#, python-brace-format -msgid "not_implemented_{what}_{where}" -msgstr "" -"Attempt to use '{what}' in '{where}' but this feature is not yet implemented." - -#: pydrilla.py:194 -#, python-brace-format -msgid "uuid_mismatch_{identifier}" -msgstr "Two different uuids were specified for item '{identifier}'." - -#: pydrilla.py:201 -#, python-brace-format -msgid "version_clash_{identifier}_{version}" -msgstr "Version '{version}' specified more than once for item '{identifier}'." - -#: pydrilla.py:297 pydrilla.py:309 -msgid "invalid_URL_{}" -msgstr "Invalid URL/pattern: '{}'." - -#: pydrilla.py:301 -msgid "disallowed_protocol_{}" -msgstr "Disallowed protocol: '{}'." - -#: pydrilla.py:391 -msgid "license_clash_{}" -msgstr "License '{}' defined more than once." - -#: pydrilla.py:408 -msgid "source_name_clash_{}" -msgstr "Source name '{}' used more than once." - -#: pydrilla.py:426 -#, python-format -msgid "couldnt_load_definition_from_%s" -msgstr "Couldn't load definition from '%s'." - -#: pydrilla.py:442 -#, python-format -msgid "no_index_license_%(source)s_%(lic)s" -msgstr "Unknown license '%(lic)s' used by index.json of '%(source)s'." - -#: pydrilla.py:449 -#, python-format -msgid "no_resource_license_%(resource)s_%(ver)s_%(lic)s" -msgstr "" -"Unknown license '%(lic)s' used by resource '%(resource)s', version '%(ver)s'." - -#: pydrilla.py:451 -#, python-format -msgid "no_mapping_license_%(mapping)s_%(ver)s_%(lic)s" -msgstr "" -"Unknown license '%(lic)s' used by mapping '%(mapping)s', version '%(ver)s'." - -#: pydrilla.py:474 -#, python-format -msgid "no_dep_%(resource)s_%(ver)s_%(dep)s" -msgstr "" -"Unknown dependency '%(dep)s' of resource '%(resource)s', version '%(ver)s'." - -#: pydrilla.py:484 -#, python-format -msgid "no_payload_%(mapping)s_%(ver)s_%(payload)s" -msgstr "" -"Unknown payload '%(payload)s' of mapping '%(mapping)s', version '%(ver)s'." - -#: pydrilla.py:512 -#, python-format -msgid "couldnt_register_%(mapping)s_%(ver)s_%(pattern)s" -msgstr "" -"Couldn't register mapping '%(mapping)s', version '%(ver)s' (pattern " -"'%(pattern)s')." - -#: pydrilla.py:566 -msgid "content_dir_path_not_dir" -msgstr "Provided \"content_dir\" path does not name a direcotry." - -#: pydrilla.py:578 -#, python-format -msgid "couldnt_load_content_from_%s" -msgstr "Couldn't load content from '%s'." - -#: pydrilla.py:603 -msgid "config_key_absent_{}" -msgstr "Config key \"{}\" not provided." - -#: templates/index.html:4 -msgid "hydrilla_welcome" -msgstr "Welcome to Hydrilla!" - -#: templates/base.html:55 templates/base.html:61 -msgid "hydrilla" -msgstr "Hydrilla" diff --git a/src/pydrilla/pydrilla.py b/src/pydrilla/pydrilla.py deleted file mode 100644 index d7aef76..0000000 --- a/src/pydrilla/pydrilla.py +++ /dev/null @@ -1,755 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later - -# Main repository logic. -# -# This file is part of Hydrilla -# -# Copyright (C) 2021 Wojtek Kosior -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# -# -# I, Wojtek Kosior, thereby promise not to sue for violation of this -# file's license. Although I request that you do not make use this code -# in a proprietary program, I am not going to enforce this in court. - -from flask import Flask, Blueprint, current_app, url_for, abort, request, \ - redirect -from jinja2 import Environment, PackageLoader -import re -from hashlib import sha256 -import os -import pathlib -import json -import gettext -import logging - -SCHEMA_VERSION = [0, 2] - -strip_comment_re = re.compile(r''' -^ # match from the beginning of each line -( # catch the part before '//' comment - (?: # this group matches either a string or a single out-of-string character - [^"/] | - " - (?: # this group matches any in-a-string character - [^"\\] | # match any normal character - \\[^u] | # match any escaped character like '\f' or '\n' - \\u[a-fA-F0-9]{4} # match an escape - )* - " - )* -) -# expect either end-of-line or a comment: -# * unterminated strings will cause matching to fail -# * bad comment (with '/' instead of '//') will be indicated by second group -# having length 1 instead of 2 or 0 -(//?|$) -''', re.VERBOSE) - -def strip_json_comments(text): - processed = 0 - stripped_text = [] - for line in text.split('\n'): - match = strip_comment_re.match(line) - - if match is None: # unterminated string - # ignore this error, let json module report it - stripped = line - elif len(match[2]) == 1: - raise json.JSONDecodeError('bad comment', text, - processed + len(match[1])) - else: - stripped = match[1] - - stripped_text.append(stripped) - processed += len(line) + 1 - - return '\n'.join(stripped_text) - -here = pathlib.Path(__file__).resolve().parent - -bp = Blueprint('bp', __package__) - -def load_config(config_path): - config = {} - to_load = [config_path] - failures_ok = [False] - - while to_load: - path = to_load.pop() - can_fail = failures_ok.pop() - - try: - with open(config_path) as config_file: - new_config = json.loads(strip_json_comments(config_file.read())) - except Exception as e: - if can_fail: - continue - raise e from None - - config.update(new_config) - - for key, failure_ok in [('try_configs', True), ('use_configs', False)]: - paths = new_config.get(key, []) - paths.reverse() - to_load.extend(paths) - failures_ok.extend([failure_ok] * len(paths)) - - for key in ['try_configs', 'use_configs']: - if key in config: - config.pop(key) - - return config - -def get_content_file_path(path): - if os.path.sep != '/': - path.replace('/', os.path.sep) - - path = pathlib.Path(path) - if path.is_absolute(): - raise ValueError(_('path_is_absolute_{}').format(path)) - - return path - -class MyNotImplError(NotImplementedError): - '''Raised when a planned but not-yet-completed feature is used.''' - def __init__(self, what, where): - super().__init__(_('not_implemented_{what}_{where}') - .format(what=what, where=where)) - -def normalize_version(ver): - ''' - ver is an array of integers. Strip right-most zeroes from ver. - - Returns a *new* array. Doesn't modify its argument. - ''' - new_len = 0 - for i, num in enumerate(ver): - if num != 0: - new_len = i + 1 - - return ver[:new_len] - -def parse_version(ver_str): - ''' - Convert ver_str into an array representation, e.g. for ver_str="4.6.13.0" - return [4, 6, 13, 0]. - ''' - return [int(num) for num in ver_str.split('.')] - -def version_string(ver, rev=None): - ''' - ver is an array of integers. rev is an optional integer. Produce string - representation of version (optionally with revision number), like: - 1.2.3-5 - No version normalization is performed. - ''' - return '.'.join([str(n) for n in ver]) + ('' if rev is None else f'-{rev}') - -class VersionedContentItem: - '''Stores definitions of multiple versions of website content item.''' - def __init__(self): - self.uuid = None - self.identifier = None - self.by_version = {} - self.known_versions = [] - - def register_item(self, item): - '''Make item queryable by version. Perform sanity checks for uuid.''' - if self.identifier is None: - self.identifier = item['identifier'] - self.uuid = item['uuid'] - elif self.uuid != item['uuid']: - raise ValueError(_('uuid_mismatch_{identifier}') - .format(identifier=self.identifier)) - - ver = item['version'] - ver_str = version_string(ver) - - if ver_str in self.by_version: - raise ValueError(_('version_clash_{identifier}_{version}') - .format(identifier=self.identifier, - version=ver_str)) - - self.by_version[ver_str] = item - self.known_versions.append(ver) - - def get_by_ver(self, ver=None): - ''' - Find and return definition of the newest version of item. - - If ver is specified, instead find and return definition of that version - of the item (or None is absent). - ''' - ver = version_string(ver or self.known_versions[-1]) - - return self.by_version.get(ver) - - def get_all(self): - '''Return a list of all definitions of item, ordered by version.''' - return [self.by_version[version_string(ver)] - for ver in self.known_versions] - -class PatternTreeNode: - ''' - "Pattern Tree" is how we refer to the data structure used for querying - Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal - is to make it possible for given URL to quickly retrieve all known patterns - that match it. - ''' - def __init__(self): - self.wildcard_matches = [None, None, None] - self.literal_match = None - self.children = {} - - def search(self, segments): - ''' - Yields all matches of this segments sequence against the tree that - starts at this node. Results are produces in order from greatest to - lowest pattern specificity. - ''' - nodes = [self] - - for segment in segments: - next_node = nodes[-1].children.get(segment) - if next_node is None: - break - - nodes.append(next_node) - - nsegments = len(segments) - cond_literal = lambda: len(nodes) == nsegments - cond_wildcard = [ - lambda: len(nodes) + 1 == nsegments and segments[-1] != '*', - lambda: len(nodes) + 1 < nsegments, - lambda: len(nodes) + 1 != nsegments or segments[-1] != '***' - ] - - while nodes: - node = nodes.pop() - - for item, condition in [(node.literal_match, cond_literal), - *zip(node.wildcard_matches, cond_wildcard)]: - if item is not None and condition(): - yield item - - def add(self, segments, item_instantiator): - ''' - Make item queryable through (this branch of) the Pattern Tree. If there - was not yet any item associated with the tree path designated by - segments, create a new one using item_instantiator() function. Return - all items matching this path (both the ones that existed and the ones - just created). - ''' - node = self - segment = None - - for segment in segments: - wildcards = node.wildcard_matches - - child = node.children.get(segment) or PatternTreeNode() - node.children[segment] = child - node = child - - if node.literal_match is None: - node.literal_match = item_instantiator() - - if segment not in ('*', '**', '***'): - return [node.literal_match] - - if wildcards[len(segment) - 1] is None: - wildcards[len(segment) - 1] = item_instantiator() - - return [node.literal_match, wildcards[len(segment) - 1]] - -proto_regex = re.compile(r'^(?P\w+)://(?P.*)$') -user_re = r'[^/?#@]+@' # r'(?P[^/?#@]+)@' # discarded for now -query_re = r'\??[^#]*' # r'\??(?P[^#]*)' # discarded for now -domain_re = r'(?P[^/?#]+)' -path_re = r'(?P[^?#]*)' -http_regex = re.compile(f'{domain_re}{path_re}{query_re}.*') -ftp_regex = re.compile(f'(?:{user_re})?{domain_re}{path_re}.*') - -class UrlError(ValueError): - pass - -class DeconstructedUrl: - '''Represents a deconstructed URL or URL pattern''' - def __init__(self, url): - self.url = url - - match = proto_regex.match(url) - if not match: - raise UrlError(_('invalid_URL_{}').format(url)) - - self.proto = match.group('proto') - if self.proto not in ('http', 'https', 'ftp'): - raise UrlError(_('disallowed_protocol_{}').format(proto)) - - if self.proto == 'ftp': - match = ftp_regex.match(match.group('rest')) - elif self.proto in ('http', 'https'): - match = http_regex.match(match.group('rest')) - - if not match: - raise UrlError(_('invalid_URL_{}').format(url)) - - self.domain = match.group('domain').split('.') - self.domain.reverse() - self.path = [*filter(None, match.group('path').split('/'))] - -class MappingItem: - ''' - A mapping, together with one of its patterns, as stored in Pattern Tree. - ''' - def __init__(self, pattern, mapping): - self.pattern = pattern - self.mapping = mapping - - def register(self, patterns_by_proto): - ''' - Make self queryable through the Pattern Tree that starts with the - protocols dictionary passed in the argument. - ''' - deco = DeconstructedUrl(self.pattern) - - domain_tree = patterns_by_proto.get(deco.proto) or PatternTreeNode() - patterns_by_proto[deco.proto] = domain_tree - - for path_tree in domain_tree.add(deco.domain, PatternTreeNode): - for match_list in path_tree.add(deco.path, list): - match_list.append(self) - -class Content: - '''Stores serveable website content.''' - def __init__(self, content_dir_path): - ''' - When an instance of Content is constructed, it searches - content_dir_path for custom serveable site content and loads it. - ''' - self.resources = {} - self.mappings = {} - self.licenses = {} - self.indexes = {} - self.definition_processors = { - 'resource': self._process_resource_or_mapping, - 'mapping': self._process_resource_or_mapping, - 'license': self._process_license - } - self.patterns_by_proto = {} - self.file_sha256sums = {} - - self.content_dir_path = pathlib.Path(content_dir_path).resolve() - - if not self.content_dir_path.is_dir(): - raise ValueError(_('content_dir_path_not_dir')) - - for subdir_path in self.content_dir_path.iterdir(): - if not subdir_path.is_dir(): - continue - try: - self._load_content_from_subdir(subdir_path, subdir_path.name) - except Exception as e: - if current_app._pydrilla_werror: - raise e from None - logging.error(_('couldnt_load_content_from_%s'), subdir_path, - exc_info=True) - - self._report_missing() - self._finalize() - - def _load_content_from_subdir(self, subdir_path, source_name): - ''' - Helper function used to load definitions from index.json of a - subdirectory of the content direcotory. - ''' - index_path = subdir_path / 'index.json' - with open(index_path) as index_file: - index = json.loads(strip_json_comments(index_file.read())) - - self._process_index(index, source_name) - - @staticmethod - def register_item(dict, item): - ''' - Helper function used to add a versioned item definition to content - data structures. - ''' - identifier = item['identifier'] - versioned_item = dict.get(identifier) - if versioned_item is None: - versioned_item = VersionedContentItem() - dict[identifier] = versioned_item - - versioned_item.register_item(item) - - @staticmethod - def _process_copyright_and_license(definition): - '''Helper function used by other _process_*() methods.''' - for field in ['copyright', 'licenses']: - if definition[field] == 'auto': - raise MyNotImplError(f'"{{field}}": "auto"', - definition['source_name']) - - def _get_file_sha256sum(self, path): - ''' - Compute sha256 of the file at path. Cache results on this Content - object. - ''' - path = path.resolve() - sha256sum = self.file_sha256sums.get(path) - - if sha256sum is None: - with open(path, mode='rb') as hashed_file: - sha256sum = sha256(hashed_file.read()).digest().hex() - self.file_sha256sums[path] = sha256sum - - return sha256sum - - def _add_file_sha256sum(self, source_name, file_object): - ''' - Expect file_object to be a dict with field "file" holding a file path - relative to content directory's subdirectory source_name. Compute or - fetch from cache the sha256 sum of that file and put it in file_object's - "sha256" field. - ''' - file_path = self.content_dir_path / source_name / file_object['file'] - file_object['sha256'] = self._get_file_sha256sum(file_path) - - def _process_resource_or_mapping(self, definition, index): - ''' - Sanitizes, autocompletes and registers serveable mapping/resource - definition. - ''' - definition['version'] = normalize_version(definition['version']) - - if definition['type'] == 'resource': - self._process_copyright_and_license(definition) - definition['dependencies'] = definition.get('dependencies', []) - self.register_item(self.resources, definition) - source_name = definition['source_name'] - for script in definition['scripts']: - self._add_file_sha256sum(source_name, script) - else: - self.register_item(self.mappings, definition) - - def _process_license(self, license, index): - '''Sanitizes and registers serveable license definition.''' - identifier = license['identifier'] - if identifier in self.licenses: - raise ValueError(_('license_clash_{}').format(identifier)) - - self.licenses[identifier] = license - - source_name = license['source_name'] - for legal_text in license['legal_text']: - self._add_file_sha256sum(source_name, legal_text) - - notice = license.get('notice') - if notice is not None: - self._add_file_sha256sum(source_name, notice) - - def _process_index(self, index, source_name): - ''' - Sanitizes, autocompletes and registers data from a loaded index.json - file. - ''' - schema_ver = normalize_version(index['schema_version']) - index['schema_version'] = schema_ver - if schema_ver != SCHEMA_VERSION: - raise ValueError('index_json_schema_mismatch_{found}_{required}' - .format(found=version_string(schema_ver), - required=version_string(SCHEMA_VERSION))) - - if source_name in self.indexes: - raise ValueError(_('source_name_clash_{}').format(source_name)) - - index['source_name'] = source_name - - self._process_copyright_and_license(index) - - self.indexes[source_name] = index - - for definition in index['definitions']: - try: - definition['source_name'] = source_name - definition['source_copyright'] = index['copyright'] - definition['source_licenses'] = index['licenses'] - processor = self.definition_processors[definition['type']] - processor(definition, index) - except Exception as e: - if current_app._pydrilla_werror: - raise e from None - logging.error(_('couldnt_load_definition_from_%s'), subdir_path, - exc_info=True) - @staticmethod - def all_items(versioned_items_dict): - '''Iterator over all registered versions of all items.''' - for versioned_item in versioned_items_dict.values(): - for item in versioned_item.by_version.values(): - yield item - - def _report_missing(self): - ''' - Use logger to print information about items that are referenced but - were not loaded. - ''' - def report_missing_license(object, object_type, lic): - if object_type == 'index': - logging.error(_('no_index_license_%(source)s_%(lic)s'), - source=object['source_name'], lic=lic) - return - - ver_str = version_string(object['version']) - kwargs = {object_type: object['identifier'], ver: ver_str, lic: lic} - if object_type == 'resource': - fmt = _('no_resource_license_%(resource)s_%(ver)s_%(lic)s') - else: - fmt = _('no_mapping_license_%(mapping)s_%(ver)s_%(lic)s') - - logging.error(fmt, **kwargs) - - for object_type, iterable in [ - ('index', self.indexes.values()), - ('resource', self.all_items(self.resources)) - ]: - for object in iterable: - to_process = [object['licenses']] - licenses = [] - while to_process: - term = to_process.pop() - - if type(term) is str: - if term not in ['or', 'and'] and \ - term not in self.licenses: - report_missing_license(object, object_type, lic) - continue - - to_process.extend(term) - - def report_missing_dependency(resource, dep): - logging.error(_('no_dep_%(resource)s_%(ver)s_%(dep)s'), - dep=dep, resource=resource['identifier'], - ver=version_string(resource['version'])) - - for resource in self.all_items(self.resources): - for dep in resource['dependencies']: - if dep not in self.resources: - report_missing_dependency(resource, dep) - - def report_missing_payload(mapping, payload): - logging.error(_('no_payload_%(mapping)s_%(ver)s_%(payload)s'), - mapping=mapping['identifier'], payload=payload, - ver=version_string(mapping['version'])) - - for mapping in self.all_items(self.mappings): - for payload in mapping['payloads']: - payload = payload['payload'] - if payload not in self.resources: - report_missing_payload(mapping, payload) - - def _finalize(self): - ''' - Initialize structures needed to serve queries. Called once after all - data gets loaded. - ''' - for dict in [self.resources, self.mappings]: - for versioned_item in dict.values(): - versioned_item.known_versions.sort() - - for mapping in self.all_items(self.mappings): - for payload in mapping['payloads']: - pattern = payload['pattern'] - try: - MappingItem(pattern, mapping)\ - .register(self.patterns_by_proto) - except Exception as e: - if current_app._pydrilla_werror: - raise e from None - logging.error( - _('couldnt_register_%(mapping)s_%(ver)s_%(pattern)s'), - mapping=mapping['identifier'], pattern=pattern, - ver=version_string(mapping['version']) - ) - - def query(self, url): - ''' - Return a list of registered mappings that match url. - - If multiple versions of a mapping are applicable, only the most recent - is included in the result. - ''' - deco = DeconstructedUrl(url) - - mappings = {} - - domain_tree = self.patterns_by_proto.get(deco.proto) \ - or PatternTreeNode() - - def process_item(item): - if url[-1] != '/' and item.pattern[-1] == '/': - return - - identifier = item.mapping['identifier'] - - if identifier not in mappings or \ - item.mapping['version'] > mappings[identifier]['version']: - mappings[identifier] = item.mapping - - for path_tree in domain_tree.search(deco.domain): - for item_list in path_tree.search(deco.path): - for item in item_list: - process_item(item) - - return list(mappings.values()) - -def create_app(config_path=(here / 'config.json'), flask_config={}): - app = Flask(__package__) - app.config.update(flask_config) - - language = flask_config.get('lang', 'en') - translation = gettext.translation('pydrilla', localedir=(here / 'locales'), - languages=[language]) - - app._pydrilla_gettext = translation.gettext - - # https://stackoverflow.com/questions/9449101/how-to-stop-flask-from-initialising-twice-in-debug-mode - if app.debug and os.environ.get('WERKZEUG_RUN_MAIN') != 'true': - return app - - config = load_config(config_path) - for key in ['static_resource_uri', 'content_dir', 'hydrilla_sources_uri']: - if key not in config: - raise ValueError(_('config_key_absent_{}').format(key)) - - app._pydrilla_static_resource_uri = config['static_resource_uri'] - if app._pydrilla_static_resource_uri[-1] != '/': - app._pydrilla_static_resource_uri += '/' - app._pydrilla_hydrilla_sources_uri = config['hydrilla_sources_uri'] - app._pydrilla_werror = config.get('werror', False) - if 'hydrilla_parent' in config: - raise MyNotImplError('hydrilla_parent', config_path.name) - - content_dir = pathlib.Path(config['content_dir']) - if not content_dir.is_absolute(): - content_dir = config_path.parent / content_dir - with app.app_context(): - app._pydrilla_content = Content(content_dir.resolve()) - - app.register_blueprint(bp) - - return app - -def _(text_key): - return current_app._pydrilla_gettext(text_key) - -def content(): - return current_app._pydrilla_content - -class MyEnvironment(Environment): - ''' - A wrapper class around jinja2.Environment that causes GNU gettext function - (as '_' and '__'), url_for function and 'hydrilla_sources_uri' config option - to be passed to every call of each template's render() method. - ''' - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def get_template(self, *args, **kwargs): - template = super().get_template(*args, **kwargs) - old_render = template.render - - def new_render(*args, **kwargs): - _ = current_app._pydrilla_gettext - sources_uri = current_app._pydrilla_hydrilla_sources_uri - - def escaping_gettext(text_key): - from markupsafe import escape - - return str(escape(_(text_key))) - - final_kwargs = { - '_': escaping_gettext, - '__': escaping_gettext, - 'url_for': url_for, - 'hydrilla_sources_uri' : sources_uri - } - final_kwargs.update(kwargs) - - return old_render(*args, **final_kwargs) - - template.render = new_render - - return template - -j2env = MyEnvironment(loader=PackageLoader(__package__), autoescape=False) - -indexpage = j2env.get_template('index.html') -@bp.route('/') -def index(): - return indexpage.render() - -def get_resource_or_mapping(identifier, get_dict): - ver = request.args.get('ver') - versioned_item = get_dict().get(identifier) - - if ver == 'all': - definition = versioned_item.get_all() if versioned_item else [] - else: - if ver is not None: - try: - ver = normalize_version(parse_version(ver)) - except: - abort(400) - - definition = versioned_item and versioned_item.get_by_ver(ver) - if definition is None: - abort(404) - - return json.dumps(definition) - -def get_license_or_source(identifier, get_dict): - definition = get_dict().get(identifier) - if definition is None: - abort(404) - - return json.dumps(definition) - -for item_type, get_dict, get_item in [ - ('resource', lambda: content().resources, get_resource_or_mapping), - ('mapping', lambda: content().mappings, get_resource_or_mapping), - ('license', lambda: content().licenses, get_license_or_source), - ('source', lambda: content().indexes, get_license_or_source) -]: - def _get_item(identifier, get_dict=get_dict, get_item=get_item): - return get_item(identifier, get_dict) - - bp.add_url_rule(f'/{item_type}s/', item_type, _get_item) - -@bp.route('/query') -def query(): - url = request.args['url'] - - return json.dumps(content().query(url)) - -@bp.route('/sources//') -def get_file(identifier, path): - if identifier not in content().indexes: - abort(404) - - new_uri = f'{current_app._pydrilla_static_resource_uri}{identifier}/{path}' - - return redirect(new_uri, code=301) diff --git a/src/pydrilla/templates/base.html b/src/pydrilla/templates/base.html deleted file mode 100644 index 7b26b64..0000000 --- a/src/pydrilla/templates/base.html +++ /dev/null @@ -1,122 +0,0 @@ -{# SPDX-License-Identifier: CC-BY-NC-SA-4.0 - -Base HTML page template. - -This file is part of Hydrilla - -Copyright (C) 2021 Wojtek Kosior - -This file is free cultural work: you can redistribute it with or -without modification under the terms of the Creative Commons -Attribution Share Alike 4.0 International as published by the -Creative Commons Corporation. - -This file is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -Creative Commons Attribution Share Alike 4.0 International -License for more details. - - -I, Wojtek Kosior, thereby promise not to sue for violation of this -file's license. Although I request that you do not make use this code -in a proprietary program, I am not going to enforce this in court. -#} - -{% macro link_for(endpoint, text) -%} - - {{ text }} - -{%- endmacro %} - - - - - {% block head %} - - - {% block title %}{{ _('hydrilla') }}{% endblock %} - {% endblock %} - - - {% block body %} - - -
- {% block content %} - {% endblock %} -
- - - {% endblock %} - - diff --git a/src/pydrilla/templates/index.html b/src/pydrilla/templates/index.html deleted file mode 100644 index 2555df0..0000000 --- a/src/pydrilla/templates/index.html +++ /dev/null @@ -1,30 +0,0 @@ -{# SPDX-License-Identifier: CC-BY-NC-SA-4.0 - -HTML index page template. - -This file is part of Hydrilla - -Copyright (C) 2021 Wojtek Kosior - -This file is free cultural work: you can redistribute it with or -without modification under the terms of the Creative Commons -Attribution Share Alike 4.0 International as published by the -Creative Commons Corporation. - -This file is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -Creative Commons Attribution Share Alike 4.0 International -License for more details. - - -I, Wojtek Kosior, thereby promise not to sue for violation of this -file's license. Although I request that you do not make use this code -in a proprietary program, I am not going to enforce this in court. -#} - -{% extends 'base.html' %} -{% block content %} - {{ super() }} -

{{ _('hydrilla_welcome') }}

-{% endblock %} diff --git a/src/pydrilla_dev_helper.py b/src/pydrilla_dev_helper.py deleted file mode 100644 index 88dc63e..0000000 --- a/src/pydrilla_dev_helper.py +++ /dev/null @@ -1,293 +0,0 @@ -# SPDX-License-Identifier: GPL-3.0-or-later - -# Definitions of helper commands to use with setuptools -# -# This file is part of Hydrilla -# -# Copyright (C) 2021 Wojtek Kosior -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -# -# -# I, Wojtek Kosior, thereby promise not to sue for violation of this file's -# license. Although I request that you do not make use this code in a -# proprietary program, I am not going to enforce this in court. - -from setuptools import Command -from setuptools.command.build_py import build_py -import sys -from pathlib import Path -import subprocess -import re -import os -import json -import importlib - -def mypath(path_or_string): - return Path(path_or_string).resolve() - -debrel_regex = re.compile(r'^[^(]*\([^-]*-([^)]*)\)') - -def extract_debrel(debian_dir): - changelog_path = mypath(debian_dir) / 'changelog' - with open(changelog_path) as changelog_file: - try: - return debrel_regex.match(changelog_file.readline())[1] - except TypeError: - raise RuntimeException('Cannot extract debrel from %s.' % - changelog_path) - -class Helper: - def __init__(self, project_root, app_package_name, version, locales_dir, - locales=['en', 'pl'], default_locale='en', locale_domain=None, - packages_root=None, debian_dir=None, config_path=None): - self.project_root = mypath(project_root) - self.app_package_name = app_package_name - self.version = version - self.locales_dir = mypath(locales_dir) - self.locales = locales - self.default_locale = default_locale - self.locale_domain = locale_domain or app_package_name - self.packages_root = mypath(packages_root or project_root / 'src') - self.app_package_dir = self.packages_root / app_package_name - self.debian_dir = mypath(debian_dir or project_root / 'debian') - self.config_path = config_path and mypath(config_path) - self.locale_files_list = None - - def run_command(self, command, verbose, runner=subprocess.run, **kwargs): - cwd = kwargs.get('cwd') - if cwd: - cwd = mypath(cwd) - where = f'from {cwd} ' - else: - cwd = Path.cwd().resolve() - where = '' - - str_command = [str(command[0])] - - for arg in command[1:]: - if isinstance(arg, Path): - try: - arg = str(arg.relative_to(cwd)) - except ValueError: - arg = str(arg) - - str_command.append(arg) - - if verbose: - print(f'{where}executing {" ".join(str_command)}') - runner(str_command, **kwargs) - - def create_mo_files(self, dry_run=False, verbose=False): - self.locale_files_list = [] - - for locale in self.locales: - messages_dir = self.locales_dir / locale / 'LC_MESSAGES' - - for po_path in messages_dir.glob('*.po'): - mo_path = po_path.with_suffix('.mo') - - if not dry_run: - command = ['msgfmt', po_path, '-o', mo_path] - self.run_command(command, verbose=verbose, check=True) - - self.locale_files_list.extend([po_path, mo_path]) - - def locale_files(self): - if self.locale_files_list is None: - self.create_mo_files(dry_run=True) - - return self.locale_files_list - - def locale_files_relative(self, to=None): - if to is None: - to = self.app_package_dir - - return [file.relative_to(to) for file in self.locale_files()] - - def flask_run(self, locale=None): - for var, val in (('ENV', 'development'), ('DEBUG', 'True')): - os.environ[f'FLASK_{var}'] = os.environ.get(f'FLASK_{var}', val) - - config = {'lang': locale or self.default_locale} - - sys.path.insert(0, str(self.packages_root)) - package = importlib.import_module(self.app_package_name) - - # make relative paths in json config resolve from project's directory - os.chdir(self.project_root) - - kwargs = {'config_path': self.config_path} if self.config_path else {} - package.create_app(flask_config=config, **kwargs).run() - - def update_po_files(self, verbose=False): - pot_path = self.locales_dir / f'{self.locale_domain}.pot' - rglob = self.app_package_dir.rglob - command = ['xgettext', '-d', self.locale_domain, '--language=Python', - '-o', pot_path, *rglob('*.py'), *rglob('*.html')] - - self.run_command(command, verbose=verbose, check=True, - cwd=self.app_package_dir) - - for locale in self.locales: - messages_dir = self.locales_dir / locale / 'LC_MESSAGES' - - for po_path in messages_dir.glob('*.po'): - if po_path.stem != self.app_package_name: - continue; - - if po_path.exists(): - command = ['msgmerge', '--update', po_path, pot_path] - else: - command = ['cp', po_path, pot_path] - - self.run_command(command, verbose=verbose, check=True) - - if (verbose): - print('removing generated .pot file') - pot_path.unlink() - - # we exclude these from the source archive we produce - bad_file_regex = re.compile(r'^\..*|build|debian|dist') - - def make_tarballs(self, verbose=False): - name=self.app_package_name - ver=self.version - debrel=extract_debrel(self.debian_dir) - - source_dirname = f'{name}-{ver}' - source_tarball_name = f'{name}_{ver}.orig.tar.gz' - debian_tarball_name = f'{name}_{ver}-{debrel}.debian.tar.gz' - - source_args = [f'--prefix={source_dirname}/', '-o', - self.project_root.parent / source_tarball_name, 'HEAD'] - - for filepath in self.project_root.iterdir(): - if not self.bad_file_regex.search(filepath.parts[-1]): - source_args.append(filepath) - - debian_args = ['-o', self.project_root.parent / debian_tarball_name, - 'HEAD', self.debian_dir] - - for args in [source_args, debian_args]: - command = ['git', 'archive', '--format=tar.gz', *args] - self.run_command(command, verbose=verbose, check=True) - - def commands(self): - helper = self - - class MsgfmtCommand(Command): - '''A custom command to run msgfmt on all .po files below '{}'.''' - - description = 'use msgfmt to generate .mo files from .po files' - user_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - helper.create_mo_files(verbose=self.verbose) - - MsgfmtCommand.__doc__ = MsgfmtCommand.__doc__.format(helper.locales_dir) - - class RunCommand(Command): - ''' - A custom command to run the app using flask. - - This is similar in effect to: - PYTHONPATH='{packages_root}' FLASK_APP={app_package_name} \\ - FLASK_ENV=development flask run - ''' - - description = 'run the Flask app from source directory' - - user_options = [ - ('locale=', 'l', - "app locale (one of: %s; default: '%s')" % - (', '.join([f"'{l}'" for l in helper.locales]), - helper.default_locale)) - ] - - def initialize_options(self): - self.locale = helper.default_locale - - def finalize_options(self): - if self.locale not in helper.locales: - raise ValueError("Locale '%s' not supported" % self.lang) - - def run(self): - helper.flask_run(locale=self.locale) - - RunCommand.__doc__ = RunCommand.__doc__.format( - packages_root=self.packages_root, - app_package_name=self.app_package_name - ) - - class MsgmergeCommand(Command): - ''' - A custom command to run xgettext and msgmerge to update project's - .po files below '{}'. - ''' - - description = 'use xgettext and msgmerge to update (or generate) .po files for this project' - user_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - helper.update_po_files(verbose=self.verbose) - - MsgmergeCommand.__doc__ = \ - MsgmergeCommand.__doc__.format(helper.locales_dir) - - class TarballsCommand(Command): - ''' - A custom command to run git archive to create debian tarballs of - this project. - ''' - - description = 'use git archive to create .orig.tar.gz and .debian.tar.gz files for this project' - user_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - helper.make_tarballs(verbose=self.verbose) - - class BuildCommand(build_py): - ''' - The build command but runs the custom msgfmt command before build. - ''' - def run(self, *args, **kwargs): - self.run_command('msgfmt') - super().run(*args, **kwargs) - - return { - 'msgfmt': MsgfmtCommand, - 'run': RunCommand, - 'msgmerge': MsgmergeCommand, - 'tarballs': TarballsCommand, - 'build_py': BuildCommand - } diff --git a/src/test/__init__.py b/src/test/__init__.py index e69de29..d382ead 100644 --- a/src/test/__init__.py +++ b/src/test/__init__.py @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior +# +# Available under the terms of Creative Commons Zero v1.0 Universal. diff --git a/src/test/development_config.json b/src/test/development_config.json index 30cf10d..c2382f7 100644 --- a/src/test/development_config.json +++ b/src/test/development_config.json @@ -2,7 +2,7 @@ // Hydrilla development config file. // -// Copyright (C) 2021 Wojtek Kosior +// Copyright (C) 2021, 2022 Wojtek Kosior // // Available under the terms of Creative Commons Zero v1.0 Universal. @@ -10,21 +10,18 @@ // unlike config.json, it shall not be included in distribution { // Relative paths now get resolved from config's containing direcotry. - "content_dir": "./example_content", - - // Except files from content_dir to be served there (used to redirect - // clients). - "static_resource_uri": "http://localhost:8000/", + "malcontent_dir": "./sample_malcontent", // Hydrilla will display this link to users as a place where they can // obtain sources for its software. This config option is meant to ease // compliance with the AGPL. - "hydrilla_sources_uri": "https://git.koszko.org/pydrilla/", + "hydrilla_project_url": "https://hydrillabugs.koszko.org/projects/hydrilla/wiki", - // Make Pydrilla error out on any warning + // Make Hydrilla error out on any warning "werror": true - // With the below we can make Pydrilla look for missing content items in + // With the below we can make hydrilla look for missing content items in // another instance instead of just erroring/warning. - // ,"hydrilla_parent": "https://api.hachette-hydrilla.org/0.2/" + // TODO: feature not implemented + // ,"hydrilla_parent": "https://api.hydrilla.koszko.org/1.0/" } diff --git a/src/test/example_content/hello/bye.js b/src/test/example_content/hello/bye.js deleted file mode 100644 index e6fd70c..0000000 --- a/src/test/example_content/hello/bye.js +++ /dev/null @@ -1,7 +0,0 @@ -// SPDX-License-Identifier: CC0-1.0 - -// Copyright (C) 2021 Wojtek Kosior -// -// Available under the terms of Creative Commons Zero v1.0 Universal. - -console.log(bye_message + "apple!"); diff --git a/src/test/example_content/hello/cc0.txt b/src/test/example_content/hello/cc0.txt deleted file mode 100644 index 0e259d4..0000000 --- a/src/test/example_content/hello/cc0.txt +++ /dev/null @@ -1,121 +0,0 @@ -Creative Commons Legal Code - -CC0 1.0 Universal - - CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE - LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN - ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS - INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES - REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS - PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM - THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED - HEREUNDER. - -Statement of Purpose - -The laws of most jurisdictions throughout the world automatically confer -exclusive Copyright and Related Rights (defined below) upon the creator -and subsequent owner(s) (each and all, an "owner") of an original work of -authorship and/or a database (each, a "Work"). - -Certain owners wish to permanently relinquish those rights to a Work for -the purpose of contributing to a commons of creative, cultural and -scientific works ("Commons") that the public can reliably and without fear -of later claims of infringement build upon, modify, incorporate in other -works, reuse and redistribute as freely as possible in any form whatsoever -and for any purposes, including without limitation commercial purposes. -These owners may contribute to the Commons to promote the ideal of a free -culture and the further production of creative, cultural and scientific -works, or to gain reputation or greater distribution for their Work in -part through the use and efforts of others. - -For these and/or other purposes and motivations, and without any -expectation of additional consideration or compensation, the person -associating CC0 with a Work (the "Affirmer"), to the extent that he or she -is an owner of Copyright and Related Rights in the Work, voluntarily -elects to apply CC0 to the Work and publicly distribute the Work under its -terms, with knowledge of his or her Copyright and Related Rights in the -Work and the meaning and intended legal effect of CC0 on those rights. - -1. Copyright and Related Rights. A Work made available under CC0 may be -protected by copyright and related or neighboring rights ("Copyright and -Related Rights"). Copyright and Related Rights include, but are not -limited to, the following: - - i. the right to reproduce, adapt, distribute, perform, display, - communicate, and translate a Work; - ii. moral rights retained by the original author(s) and/or performer(s); -iii. publicity and privacy rights pertaining to a person's image or - likeness depicted in a Work; - iv. rights protecting against unfair competition in regards to a Work, - subject to the limitations in paragraph 4(a), below; - v. rights protecting the extraction, dissemination, use and reuse of data - in a Work; - vi. database rights (such as those arising under Directive 96/9/EC of the - European Parliament and of the Council of 11 March 1996 on the legal - protection of databases, and under any national implementation - thereof, including any amended or successor version of such - directive); and -vii. other similar, equivalent or corresponding rights throughout the - world based on applicable law or treaty, and any national - implementations thereof. - -2. Waiver. To the greatest extent permitted by, but not in contravention -of, applicable law, Affirmer hereby overtly, fully, permanently, -irrevocably and unconditionally waives, abandons, and surrenders all of -Affirmer's Copyright and Related Rights and associated claims and causes -of action, whether now known or unknown (including existing as well as -future claims and causes of action), in the Work (i) in all territories -worldwide, (ii) for the maximum duration provided by applicable law or -treaty (including future time extensions), (iii) in any current or future -medium and for any number of copies, and (iv) for any purpose whatsoever, -including without limitation commercial, advertising or promotional -purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each -member of the public at large and to the detriment of Affirmer's heirs and -successors, fully intending that such Waiver shall not be subject to -revocation, rescission, cancellation, termination, or any other legal or -equitable action to disrupt the quiet enjoyment of the Work by the public -as contemplated by Affirmer's express Statement of Purpose. - -3. Public License Fallback. Should any part of the Waiver for any reason -be judged legally invalid or ineffective under applicable law, then the -Waiver shall be preserved to the maximum extent permitted taking into -account Affirmer's express Statement of Purpose. In addition, to the -extent the Waiver is so judged Affirmer hereby grants to each affected -person a royalty-free, non transferable, non sublicensable, non exclusive, -irrevocable and unconditional license to exercise Affirmer's Copyright and -Related Rights in the Work (i) in all territories worldwide, (ii) for the -maximum duration provided by applicable law or treaty (including future -time extensions), (iii) in any current or future medium and for any number -of copies, and (iv) for any purpose whatsoever, including without -limitation commercial, advertising or promotional purposes (the -"License"). The License shall be deemed effective as of the date CC0 was -applied by Affirmer to the Work. Should any part of the License for any -reason be judged legally invalid or ineffective under applicable law, such -partial invalidity or ineffectiveness shall not invalidate the remainder -of the License, and in such case Affirmer hereby affirms that he or she -will not (i) exercise any of his or her remaining Copyright and Related -Rights in the Work or (ii) assert any associated claims and causes of -action with respect to the Work, in either case contrary to Affirmer's -express Statement of Purpose. - -4. Limitations and Disclaimers. - - a. No trademark or patent rights held by Affirmer are waived, abandoned, - surrendered, licensed or otherwise affected by this document. - b. Affirmer offers the Work as-is and makes no representations or - warranties of any kind concerning the Work, express, implied, - statutory or otherwise, including without limitation warranties of - title, merchantability, fitness for a particular purpose, non - infringement, or the absence of latent or other defects, accuracy, or - the present or absence of errors, whether or not discoverable, all to - the greatest extent permissible under applicable law. - c. Affirmer disclaims responsibility for clearing rights of other persons - that may apply to the Work or any use thereof, including without - limitation any person's Copyright and Related Rights in the Work. - Further, Affirmer disclaims responsibility for obtaining any necessary - consents, permissions or other rights required for any use of the - Work. - d. Affirmer understands and acknowledges that Creative Commons is not a - party to this document and has no duty or obligation with respect to - this CC0 or use of the Work. diff --git a/src/test/example_content/hello/hello.js b/src/test/example_content/hello/hello.js deleted file mode 100644 index d87ea7f..0000000 --- a/src/test/example_content/hello/hello.js +++ /dev/null @@ -1,7 +0,0 @@ -// SPDX-License-Identifier: CC0-1.0 - -// Copyright (C) 2021 Wojtek Kosior -// -// Available under the terms of Creative Commons Zero v1.0 Universal. - -console.log(hello_message + "apple!"); diff --git a/src/test/example_content/hello/index.json b/src/test/example_content/hello/index.json deleted file mode 100644 index 16843cb..0000000 --- a/src/test/example_content/hello/index.json +++ /dev/null @@ -1,302 +0,0 @@ -// SPDX-License-Identifier: CC0-1.0 - -// Copyright (C) 2021 Wojtek Kosior -// Available under the terms of Creative Commons Zero v1.0 Universal. - -// This is an example index.json file describing Hydrilla site content. As you -// can see, for storing site content information Hydrilla utilizes JSON with an -// additional extension in the form of '//' comments support. Hydrilla shall -// look into each direct subdirectory of the content directory passed to it -// (via a cofig file option). If such subsirectory contains an index.json file, -// Hydrilla shall process it. - -// An index.json file conveys definitions of site resources, pattern->payload -// mappings and licenses thereof. The definitions may reference files under -// index.json's containing directory, using relative paths. This is how scripts, -// license texts, etc. are included. Unix paths (using '/' as separator) are -// assumed. It is not allowed for an index.json file to reference files outside -// its directory. - -// Certain objects are allowed to contain a "comment" field. Although '//' -// comments can be used in index.json files, they will be stripped when the file -// is processed. If a comment should be included in the JSON definitions served -// by Hydrilla API, it should be put in a "comment" field of the proper object. - -// Various kinds of objects contain version information. Version is always an -// array of integers, with major version number being the first array item. When -// applicable, a version is accompanied by a revision field which contains a -// positive integer. If versions specified by arrays of different length need to -// be compared, the shorter array gets padded with zeroes on the right. This -// means that for example version 1.3 could be given as both [1, 3] and -// [1, 3, 0, 0] (aka 1.3.0.0) and either would mean the same. - -{ - // Once our json schema changes, this version will change. Our software will - // be able to handle both current and older formats thanks to this - // information present in every index.json file. Different schema versions - // are always incompatible (e.g. a Hydrilla instance that understands schema - // version 0.2.0.0 will not understand version 0.2.0.1). Schemas that are - // backwards-compatible will be denoted by a different revision. - // We will try to make schema version match the version of Hydrilla software - // that introduced it. - "schema_version": [0, 2], - "schema_revision": 1, - - // Copyright of this json file. It's a list of copyright holder information - // objects. Alternatively, "auto" can be used to make Hydrilla attempt to - // extract copyright info from the comment at the beginning of the file. - "copyright": [ - // There can be multiple entries, one for each co-holder of the - // copyright. - { - // There can also be multiple years, like ["2021","2023-2024"]. - "years": ["2021"], - // Name of the copyright holder. Depending on the situation it can - // be just the first name, name+surname, a company name, a - // pseudonym, etc. - "holder": "Wojtek Kosior" - } - ], - - // License of this json file. Identifier has to be known to Hydrilla. Can - // be defined either in the same or another index.json file as a "license" - // item. It is possible to specify license combinations, like: - // [["Expat", "and", "Apache-2.0"], "or", "GPL-3.0-only"] - // Alternatively, "auto" can be used to make Hydrilla attempt to extract - // copyright info from this file's SPDX license identifier. - "licenses": "CC0-1.0", - - // Where this software/work initially comes from. In some cases (i.e. when - // the developer of content is also the one who packages it for Hydrilla) - // this might be the same as "package_url". - "upstream_url": "https://git.koszko.org/pydrilla/tree/src/test/example_content/hello", - - // Where sources for the packaging of this content can be found. - "package_url": "https://git.koszko.org/pydrilla/tree/src/test/example_content/hello", - - // Additional "comment" field can be used if needed. - // "comment": "" - - // List of actual site resources, pattern->payload mappings and licenses. - // Each of them is represented by an object. Meta-sites and replacement site - // interfaces will also belong here once they get implemented. - "definitions": [ - { - // Value of "type" can currently be one of: "resource", "license" - // and "mapping". The one we have here, "resource", defines a list - // of injectable scripts that can be used as a payload or as a - // dependency of another "resource". In the future CSS style sheets - // and WASM modules will also be composite parts of a "resource" as - // scripts are now. - "type": "resource", - - // Used when referring to this resource in "dependencies" list of - // another resource or in "payload" field of a mapping. Should - // be consize and can only use a restricted set of characters. It - // has to match: [-0-9a-zA-Z] - "identifier": "helloapple", - - // "long_name" should be used to specify a user-friendly alternative - // to an identifier. It should generally not collide with a long - // name of some resource with a different uuid and also shouldn't - // change in-between versions of the same resource, although - // exceptions to both rules might be considered. Long name is - // allowed to contain arbitrary unicode characters (within reason!). - "long_name": "Hello Apple", - - // Different versions (e.g. 1.0 and 1.3) of the same resource can be - // defined in separate index.json files. This makes it easy to - // accidently cause an identifier clash. To help detect it, we - // require that each resource has a uuid associated with it. Attempt - // to define multiple resources with the same identifier and - // different uuids will result in an error being reported. Defining - // multiple resources with different identifiers and the same uuid - // is disallowed for now (it may be later permitted if we consider - // it good for some use-case). - "uuid": "a6754dcb-58d8-4b7a-a245-24fd7ad4cd68", - - // Version should match the upstream version of the resource (e.g. a - // version of javascript library). Revision number starts as 1 for - // each new resource version and gets incremented by 1 each time a - // modification to the packaging of this version is done. Hydrilla - // will allow multiple definitions of the same resource to load, as - // long as their versions differ. Thanks to the "version" and - // "revision" fields, clients will know they have to update certain - // resource after it has been updated. If multiple definitions of - // the same version of given resource are provided, an error is - // generated (even if those definitions differ by revision number). - "version": [2021, 11, 10], - "revision": 1, - - // A short, meaningful description of what the resource is and/or - // what it does. - "description": "greets an apple", - - // If needed, a "comment" field can be added to provide some - // additional information. - // "comment": "this resource something something", - - // One should specify the copyright and licensing terms of the - // entire package. The format is the same as when specifying these - // for the index.json file, except "auto" cannot be used. - "copyright": [{"years": ["2021"], "holder": "Wojtek Kosior"}], - "licenses": "CC0-1.0", - - // Resource's "dependencies" array shall contain names of other - // resources that (in case of scripts at least) should get evaluated - // on a page before this resource's own scripts. - "dependencies": ["hello-message"], - - // Array of javascript files that belong to this resource. - "scripts": [ - { - // Script name. It should also be a valid file path relative - // to index.json's containing directory. - "file": "hello.js", - // Copyright and license info of a script file can be - // specified using the same format as in the case of the - // index.json file itself. If "copyright" or "license" is - // not provided, Hydrilla assumes it to be the same as the - // value specified for the resource itself. - "copyright": "auto", - "licenses": "auto" - }, { - "file": "bye.js" - } - ] - }, { - "type": "resource", - "identifier": "hello-message", - "long_name": "Hello Message", - "uuid": "1ec36229-298c-4b35-8105-c4f2e1b9811e", - "version": [2021, 11, 10], - "revision": 2, - "description": "define messages for saying hello and bye", - "copyright": [{"years": ["2021"], "holder": "Wojtek Kosior"}], - "licenses": "CC0-1.0", - // If "dependencies" is empty, it can also be omitted. - // "dependencies": [], - "scripts": [{"file": "message.js"}] - }, { - "type": "mapping", - - // Has similar function to resource's identifier. Should be consize - // and can only use a restricted set of characters. It has to match: - // [-0-9a-zA-Z] - // It can be the same as some resource identifier (those are - // different entities and are treated separately). - "identifier": "helloapple", - - // "long name" and "uuid" have the same meaning as in the case of - // resources. Uuids of a resource and a mapping can technically be - // the same, but it is recommended to avoid even this kind of - // repetition. - "long_name": "Hello Apple", - "uuid": "54d23bba-472e-42f5-9194-eaa24c0e3ee7", - - // "version" differs from its counterpart in resource in that it has - // no accompanying revision number. - "version": [2021, 11, 10], - - // A short, meaningful description of what the mapping does. - "description": "causes apple to get greeted on Hydrillabugs issue tracker", - - // A comment, if necessary. - // "comment": "blah blah because bleh" - - // The "payloads" array specifies, which payloads are to be - // applied to which URLs. - "payloads": [ - { - // Should be a valid Haketilo URL pattern. - "pattern": "https://hydrillabugs.koszko.org/***", - // Should be the name of an existing resource. The resource - // may, but doesn't have to, be defined in the same - // index.json file. - "payload": "helloapple" - }, - // More associations may follow. - { - "pattern": "https://hachettebugs.koszko.org/***", - "payload": "helloapple" - } - ] - }, { - "type": "license", - - // Will be used to refer to this license in other places. Should - // match the SPDX identifier if possible (despite that, please use - // "Expat" instead of "MIT" where possible). Unlike other definition - // types, "license" does not allow uuids to be used to avoid license - // id clashes. Any attempt to define multiple licenses with the same - // id will result in an error being reported. - "identifier": "CC0-1.0", - - // This long name must also be unique among all license definitions. - "long_name": "Creative Commons Zero v1.0 Universal", - - // We don't use "version" in license definitions. We do, however, - // use "revision" to indicate changes to the packaging of a license. - // Revision should be increased by 1 at each such change. - "revision": 2, - - "legal_text": [ - // Legal text can be available in multiple forms. Usually just - // plain .txt file is enough, though. - { - // "format" should match an agreed-upon MIME type if - // possible. - "format": "text/plain", - // Value of "file" should be a path relative to the - // directory of index.json file. - "file": "cc0.txt" - } - // If a markdown version of CC0 was provided, we could add this: - // { - // "format": "text/markdown", - // "file": "cc0.md" - // } - ] - - // If needed, a "comment" field can be added to clarify something. - // For example, when definind "Expat" license we could add: - // - // "comment": "Expat license is the most common form of the license often called \"MIT\". Many other forms of \"MIT\" license exist. Here the name \"Expat\" is used to avoid ambiguity." - - // If applicable, a "notice" can be included. It shall then be an - // object with "file" field containing a path (relative to - // index.json's directory) to a plain text file with that notice. - // - // "notice": { - // "file": "license-notice.txt" - // } - // - // This is needed for example in case of GNU licenses (both with and - // without exceptions). For instance, - // "GPL-3.0-or-later-with-html-exception" could have the following - // in its notice file: - // - // This program is free software: you can redistribute it and/or - // modify it under the terms of the GNU General Public License as - // published by the Free Software Foundation, either version 3 of - // the License, or (at your option) any later version. - // - // This program is distributed in the hope that it will be useful, - // but WITHOUT ANY WARRANTY; without even the implied warranty of - // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - // GNU General Public License for more details. - // - // As a special exception to the GPL, any HTML file which merely - // makes function calls to this code, and for that purpose - // includes it by reference shall be deemed a separate work for - // copyright law purposes. If you modify this code, you may extend - // this exception to your version of the code, but you are not - // obligated to do so. If you do not wish to do so, delete this - // exception statement from your version. - // - // You should have received a copy of the GNU General Public License - // along with this program. If not, see - // . - } - ] -} diff --git a/src/test/example_content/hello/message.js b/src/test/example_content/hello/message.js deleted file mode 100644 index da5966d..0000000 --- a/src/test/example_content/hello/message.js +++ /dev/null @@ -1,8 +0,0 @@ -// SPDX-License-Identifier: CC0-1.0 - -// Copyright (C) 2021 Wojtek Kosior -// -// Available under the terms of Creative Commons Zero v1.0 Universal. - -var hello_message = "hello, " -var bye_message = "bye, " diff --git a/src/test/source-package-example b/src/test/source-package-example new file mode 160000 index 0000000..e571b39 --- /dev/null +++ b/src/test/source-package-example @@ -0,0 +1 @@ +Subproject commit e571b3911f198e3feccc8d06390c79131f9cf09d diff --git a/src/test/test_pydrilla.py b/src/test/test_pydrilla.py deleted file mode 100644 index 50757a7..0000000 --- a/src/test/test_pydrilla.py +++ /dev/null @@ -1,153 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later - -# Repository tests -# -# This file is part of Hydrilla -# -# Copyright (C) 2021 Wojtek Kosior -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# -# -# I, Wojtek Kosior, thereby promise not to sue for violation of this -# file's license. Although I request that you do not make use this code -# in a proprietary program, I am not going to enforce this in court. - -import pytest -import sys -import shutil -from pathlib import Path -from hashlib import sha256 -from os import mkdir, unlink, environ -import json -from markupsafe import escape - -from pydrilla import pydrilla, create_app - -test_dir = Path(__file__).resolve().parent -packages_dir = test_dir.parent -development_config_path = test_dir / 'development_config.json' -example_content_dir = test_dir / 'example_content' - -@pytest.fixture -def client(): - app = create_app(development_config_path, flask_config={'TESTING': True}) - - with app.test_client() as client: - yield client - -@pytest.fixture -def development_config(): - with open(development_config_path) as config_file: - yield json.loads(pydrilla.strip_json_comments(config_file.read())) - -def test_api_basic(client, development_config): - def verify_sha256sum(source_name, file_object): - with open(example_content_dir / source_name / file_object['file'], - mode='rb') as file: - assert sha256(file.read()).digest().hex() == file_object['sha256'] - - response = client.get('/') - assert b'html' in response.data - sources_uri = development_config['hydrilla_sources_uri'] - assert escape(sources_uri).encode() in response.data - - for item_type in ['mapping', 'resource']: - response = client.get(f'/{item_type}s/helloapple') - assert response.status_code == 200 - definition = json.loads(response.data.decode()) - assert definition['type'] == item_type - assert definition['source_name'] == 'hello' - assert definition['version'] == [2021, 11, 10] - if item_type == 'resource': - assert type(definition['scripts']) is list - assert len(definition['scripts']) > 0 - for script_file in definition['scripts']: - verify_sha256sum(definition['source_name'], script_file) - - response = client.get(f'/{item_type}s/helloapple?ver=2021.11.10.0') - assert response.status_code == 200 - assert definition == json.loads(response.data.decode()) - - response = client.get(f'/{item_type}s/helloapple?ver=2021.11.10.999') - assert response.status_code == 404 - - response = client.get(f'/{item_type}s/helloapple?ver=random_bad_input') - assert response.status_code == 400 - - response = client.get(f'/{item_type}s/random-bad-identifier') - assert response.status_code == 404 - - response = client.get(f'/{item_type}s/helloapple?ver=all') - assert response.status_code == 200 - definitions = json.loads(response.data.decode()) - assert type(definitions) is list - assert all([d['type'] == item_type for d in definitions]) - assert any([d['version'] == [2021, 11, 10] for d in definitions]) - - response = client.get('/licenses/CC0-1.0') - assert response.status_code == 200 - definition = json.loads(response.data.decode()) - assert definition['type'] == 'license' - assert definition['long_name'] == 'Creative Commons Zero v1.0 Universal' - assert definition['source_name'] == 'hello' - - assert type(definition['legal_text']) is list - assert len(definition['legal_text']) > 0 - for license_file in definition['legal_text']: - verify_sha256sum(definition['source_name'], license_file) - - response = client.get('/licenses/random-bad-identifier') - assert response.status_code == 404 - - response = client.get('/sources/hello') - assert response.status_code == 200 - definition = json.loads(response.data.decode()) - - assert definition['source_name'] == 'hello' - assert type(definition['schema_version']) is list - - response = client.get('/sources/random-bad-identifier') - assert response.status_code == 404 - - response = client.get('/query?url=https://hachettebugs.koszko.org') - assert response.status_code == 200 - definitions = json.loads(response.data.decode()) - assert type(definitions) is list - assert all([d['type'] == 'mapping' for d in definitions]) - assert any([p['pattern'] == 'https://hachettebugs.koszko.org/***' - for d in definitions for p in d['payloads']]) - - response = client.get('/query?url=https://random_bad_domain.org/something') - assert response.status_code == 200 - definitions = json.loads(response.data.decode()) - assert definitions == [] - - resource_uri = development_config['static_resource_uri'] - response = client.get('/sources/hello/hello.js') - assert response.status_code == 301 - assert response.location == resource_uri + 'hello/hello.js' - response = client.get('/sources/random-bad-identifier/hello.js') - assert response.status_code == 404 - response = client.get('/sources/hello/random/bad/path') - assert response.status_code == 301 - assert response.location == resource_uri + 'hello/random/bad/path' - -def test_normalize_version(): - assert pydrilla.normalize_version([4, 5, 3, 0, 0]) == [4, 5, 3] - assert pydrilla.normalize_version([1, 0, 5, 0]) == [1, 0, 5] - assert pydrilla.normalize_version([3, 3]) == [3, 3] - -def test_strip_json_comments(development_config): - assert development_config['static_resource_uri'] == 'http://localhost:8000/' diff --git a/src/test/test_server.py b/src/test/test_server.py new file mode 100644 index 0000000..def48dc --- /dev/null +++ b/src/test/test_server.py @@ -0,0 +1,199 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +# Repository tests +# +# This file is part of Hydrilla +# +# Copyright (C) 2021, 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +import pytest +import sys +import shutil +import json + +from pathlib import Path +from hashlib import sha256 +from tempfile import TemporaryDirectory +from typing import Iterable, Callable + +from flask.testing import FlaskClient +from markupsafe import escape + +from hydrilla import util as hydrilla_util +from hydrilla.builder import Build +from hydrilla.server import create_app + +here = Path(__file__).resolve().parent +config_path = here / 'development_config.json' +source_path = here / 'source-package-example' + +@pytest.fixture(scope="session") +def default_setup() -> Iterable[dict[str, Path]]: + with TemporaryDirectory() as tmpdir: + setup = { + 'malcontent_dir': Path(tmpdir) / 'sample_malcontent', + 'config_path': Path(tmpdir) / 'config.json', + 'containing_dir': Path(tmpdir) + } + + setup['config_path'].symlink_to(config_path) + + build = Build(source_path, Path('index.json')) + build.write_package_files(setup['malcontent_dir']) + + yield setup + +@pytest.fixture(scope="session") +def client(default_setup: dict[str, Path]) -> Iterable[FlaskClient]: + """Provide app client that serves the object from built sample package.""" + app = create_app(default_setup['config_path'], + flask_config={'TESTING': True}) + + with app.test_client() as client: + yield client + +@pytest.fixture(scope="session") +def development_config(default_setup) -> Iterable[dict]: + """Provide the contents of JSON config file fed to the client.""" + contents = default_setup['config_path'].read_text() + yield json.loads(hydrilla_util.strip_json_comments(contents)) + +def test_project_url(client: FlaskClient, development_config: dict) -> None: + """Fetch index.html and verify project URL fro config is present there.""" + response = client.get('/') + assert b'html' in response.data + project_url = development_config['hydrilla_project_url'] + assert escape(project_url).encode() in response.data + +@pytest.mark.parametrize('item_type', ['resource', 'mapping']) +def test_get_newest(client: FlaskClient, item_type: str) -> None: + """ + Verify that + GET '/{item_type}/{item_identifier}.json' + returns proper definition that is also served at: + GET '/{item_type}/{item_identifier}/{item_version}' + """ + response = client.get(f'/{item_type}/helloapple.json') + assert response.status_code == 200 + definition = json.loads(response.data.decode()) + assert definition['type'] == item_type + assert definition['identifier'] == 'helloapple' + + response = client.get(f'/{item_type}/helloapple/2021.11.10') + assert response.status_code == 200 + assert definition == json.loads(response.data.decode()) + + hydrilla_util.validator_for(f'api_{item_type}_description-1.schema.json')\ + .validate(definition) + +@pytest.mark.parametrize('item_type', ['resource', 'mapping']) +def test_get_nonexistent(client: FlaskClient, item_type: str) -> None: + """ + Verify that attempts to GET a JSON definition of a nonexistent item or item + version result in 404. + """ + response = client.get(f'/{item_type}/nonexistentapple.json') + assert response.status_code == 404 + response = client.get(f'/{item_type}/helloapple/1.2.3.999') + assert response.status_code == 404 + +@pytest.mark.parametrize('item_type', ['resource', 'mapping']) +def test_file_refs(client: FlaskClient, item_type: str) -> None: + """ + Verify that files referenced by definitions are accessible under their + proper URLs and that their hashes match. + """ + response = client.get(f'/{item_type}/helloapple/2021.11.10') + assert response.status_code == 200 + definition = json.loads(response.data.decode()) + + for file_ref in [*definition.get('scripts', []), + *definition['source_copyright']]: + hash_sum = file_ref["sha256"] + response = client.get(f'/file/sha256-{hash_sum}') + + assert response.status_code == 200 + assert sha256(response.data).digest().hex() == hash_sum + +def test_empty_query(client: FlaskClient) -> None: + """ + Verify that querying mappings for URL gives an empty list when there're no + mathes. + """ + response = client.get(f'/query?url=https://nonexiste.nt/example') + assert response.status_code == 200 + + response_object = json.loads(response.data.decode()) + + assert response_object['mappings'] == [] + + hydrilla_util.validator_for('api_query_result-1.schema.json')\ + .validate(response_object) + +def test_query(client: FlaskClient) -> None: + """ + Verify that querying mappings for URL gives a list with reference(s) the the + matching mapping(s). + """ + response = client.get(f'/query?url=https://hydrillabugs.koszko.org/') + assert response.status_code == 200 + + response_object = json.loads(response.data.decode()) + + assert response_object['mappings'] == [{ + 'identifier': 'helloapple', + 'long_name': 'Hello Apple', + 'version': [2021, 11, 10] + }] + + hydrilla_util.validator_for('api_query_result-1.schema.json')\ + .validate(response_object) + +def test_source(client: FlaskClient) -> None: + """Verify source descriptions are properly served.""" + response = client.get(f'/source/hello.json') + assert response.status_code == 200 + + description = json.loads(response.data.decode()) + assert description['source_name'] == 'hello' + + assert sorted([d['identifier'] for d in description['definitions']]) == \ + ['hello-message', 'helloapple', 'helloapple'] + + zipfile_hash = description['source_archives']['zip']['sha256'] + response = client.get(f'/source/hello.zip') + assert sha256(response.data).digest().hex() == zipfile_hash + + hydrilla_util.validator_for('api_source_description-1.schema.json')\ + .validate(description) + +def test_missing_source(client: FlaskClient) -> None: + """Verify requests for nonexistent sources result in 404.""" + response = client.get(f'/source/nonexistent.json') + assert response.status_code == 404 + + response = client.get(f'/source/nonexistent.zip') + assert response.status_code == 404 + +def test_normalize_version(): + assert hydrilla_util.normalize_version([4, 5, 3, 0, 0]) == [4, 5, 3] + assert hydrilla_util.normalize_version([1, 0, 5, 0]) == [1, 0, 5] + assert hydrilla_util.normalize_version([3, 3]) == [3, 3] -- cgit v1.2.3