diff options
Diffstat (limited to 'src/hydrilla/server')
-rw-r--r-- | src/hydrilla/server/config.json | 3 | ||||
-rw-r--r-- | src/hydrilla/server/config.py | 43 | ||||
-rw-r--r-- | src/hydrilla/server/locales/en_US/LC_MESSAGES/hydrilla-messages.po | 147 | ||||
-rw-r--r-- | src/hydrilla/server/malcontent.py | 252 | ||||
-rw-r--r-- | src/hydrilla/server/serve.py | 560 | ||||
-rw-r--r-- | src/hydrilla/server/templates/base.html | 5 | ||||
-rw-r--r-- | src/hydrilla/server/templates/index.html | 5 |
7 files changed, 379 insertions, 636 deletions
diff --git a/src/hydrilla/server/config.json b/src/hydrilla/server/config.json index bde341c..e307548 100644 --- a/src/hydrilla/server/config.json +++ b/src/hydrilla/server/config.json @@ -28,9 +28,6 @@ // What port to listen on (if not being run through WSGI). "port": 10112, - // What localization to use for console messages and served HTML files. - "language": "en_US", - // Whether to exit upon emitting a warning. "werror": false } diff --git a/src/hydrilla/server/config.py b/src/hydrilla/server/config.py index 1edd070..42aabab 100644 --- a/src/hydrilla/server/config.py +++ b/src/hydrilla/server/config.py @@ -21,19 +21,20 @@ # # # I, Wojtek Kosior, thereby promise not to sue for violation of this -# file's license. Although I request that you do not make use this code -# in a proprietary program, I am not going to enforce this in court. - -# Enable using with Python 3.7. -from __future__ import annotations +# file's license. Although I request that you do not make use of this +# code in a proprietary program, I am not going to enforce this in +# court. import json +import typing as t from pathlib import Path -import jsonschema +import jsonschema # type: ignore -from .. import util +from ..translations import smart_gettext as _ +from ..exceptions import HaketiloException +from .. import json_instances config_schema = { '$schema': 'http://json-schema.org/draft-07/schema#', @@ -42,9 +43,6 @@ config_schema = { 'malcontent_dir': { 'type': 'string' }, - 'malcontent_dir': { - 'type': 'string' - }, 'hydrilla_project_url': { 'type': 'string' }, @@ -67,15 +65,18 @@ config_schema = { }, 'werror': { 'type': 'boolean' + }, + 'verify_files': { + 'type': 'boolean' } } } here = Path(__file__).resolve().parent -def load(config_paths: list[Path]=[here / 'config.json'], - can_fail: list[bool]=[]) -> dict: - config = {} +def load(config_paths: t.List[Path]=[here / 'config.json'], + can_fail: t.List[bool]=[]) -> t.Dict[str, t.Any]: + config: t.Dict[str, t.Any] = {} bools_missing = max(0, len(config_paths) - len(can_fail)) config_paths = [*config_paths] @@ -92,17 +93,13 @@ def load(config_paths: list[Path]=[here / 'config.json'], continue raise e from None - new_config = json.loads(util.strip_json_comments(json_text)) + new_config = json.loads(json_instances.strip_json_comments(json_text)) jsonschema.validate(new_config, config_schema) config.update(new_config) - if 'malcontent_dir' in config: - malcontent_dir = Path(config['malcontent_dir']) - if not malcontent_dir.is_absolute(): - malcontent_dir = path.parent / malcontent_dir - - config['malcontent_dir'] = str(malcontent_dir.resolve()) + if 'malcontent_dir' in new_config: + malcontent_path_relative_to = path.parent for key, failure_ok in [('try_configs', True), ('use_configs', False)]: paths = new_config.get(key, []) @@ -110,6 +107,12 @@ def load(config_paths: list[Path]=[here / 'config.json'], config_paths.extend(paths) can_fail.extend([failure_ok] * len(paths)) + + if 'malcontent_dir' in config: + malcontent_dir_str = config['malcontent_dir'] + malcontent_dir_path = malcontent_path_relative_to / malcontent_dir_str + config['malcontent_dir'] = str(malcontent_dir_path) + for key in ('try_configs', 'use_configs'): if key in config: config.pop(key) diff --git a/src/hydrilla/server/locales/en_US/LC_MESSAGES/hydrilla-messages.po b/src/hydrilla/server/locales/en_US/LC_MESSAGES/hydrilla-messages.po deleted file mode 100644 index 7ea930a..0000000 --- a/src/hydrilla/server/locales/en_US/LC_MESSAGES/hydrilla-messages.po +++ /dev/null @@ -1,147 +0,0 @@ -# SPDX-License-Identifier: CC0-1.0 -# -# English (United States) translations for hydrilla. -# Copyright (C) 2021, 2022 Wojtek Kosior <koszko@koszko.org> -# Available under the terms of Creative Commons Zero v1.0 Universal. -msgid "" -msgstr "" -"Project-Id-Version: hydrilla.builder 0.1\n" -"Report-Msgid-Bugs-To: koszko@koszko.org\n" -"POT-Creation-Date: 2022-04-22 17:09+0200\n" -"PO-Revision-Date: 2022-02-12 00:00+0000\n" -"Last-Translator: Wojtek Kosior <koszko@koszko.org>\n" -"Language: en_US\n" -"Language-Team: en_US <koszko@koszko.org>\n" -"Plural-Forms: nplurals=2; plural=(n != 1)\n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel 2.8.0\n" - -#: src/hydrilla/server/serve.py:122 -#, python-brace-format -msgid "uuid_mismatch_{identifier}" -msgstr "Two different uuids were specified for item '{identifier}'." - -#: src/hydrilla/server/serve.py:129 -#, python-brace-format -msgid "version_clash_{identifier}_{version}" -msgstr "Version '{version}' specified more than once for item '{identifier}'." - -#: src/hydrilla/server/serve.py:245 src/hydrilla/server/serve.py:257 -msgid "invalid_URL_{}" -msgstr "Invalid URL/pattern: '{}'." - -#: src/hydrilla/server/serve.py:249 -msgid "disallowed_protocol_{}" -msgstr "Disallowed protocol: '{}'." - -#: src/hydrilla/server/serve.py:302 -msgid "malcontent_dir_path_not_dir_{}" -msgstr "Provided 'malcontent_dir' path does not name a directory: {}" - -#: src/hydrilla/server/serve.py:321 -msgid "couldnt_load_item_from_{}" -msgstr "Couldn't load item from {}." - -#: src/hydrilla/server/serve.py:347 -msgid "item_{item}_in_file_{file}" -msgstr "Item {item} incorrectly present under {file}." - -#: src/hydrilla/server/serve.py:353 -msgid "item_version_{ver}_in_file_{file}" -msgstr "Item version {ver} incorrectly present under {file}." - -#: src/hydrilla/server/serve.py:376 -msgid "no_dep_{resource}_{ver}_{dep}" -msgstr "Unknown dependency '{dep}' of resource '{resource}', version '{ver}'." - -#: src/hydrilla/server/serve.py:387 -msgid "no_payload_{mapping}_{ver}_{payload}" -msgstr "Unknown payload '{payload}' of mapping '{mapping}', version '{ver}'." - -#: src/hydrilla/server/serve.py:413 -msgid "couldnt_register_{mapping}_{ver}_{pattern}" -msgstr "" -"Couldn't register mapping '{mapping}', version '{ver}' (pattern " -"'{pattern}')." - -#: src/hydrilla/server/serve.py:566 src/hydrilla/server/serve.py:588 -#: src/hydrilla/server/serve.py:626 -#, python-format -msgid "%(prog)s_%(version)s_license" -msgstr "" -"%(prog)s %(version)s\n" -"Copyright (C) 2021,2022 Wojtek Kosior and contributors.\n" -"License GPLv3+: GNU AGPL version 3 or later " -"<https://gnu.org/licenses/gpl.html>\n" -"This is free software: you are free to change and redistribute it.\n" -"There is NO WARRANTY, to the extent permitted by law." - -#: src/hydrilla/server/serve.py:577 -msgid "directory_to_serve_from_overrides_config" -msgstr "" -"Directory to serve files from. Overrides value from the config file (if " -"any)." - -#: src/hydrilla/server/serve.py:579 -msgid "project_url_to_display_overrides_config" -msgstr "" -"Project url to display on generated HTML pages. Overrides value from the " -"config file (if any)." - -#: src/hydrilla/server/serve.py:581 -msgid "tcp_port_to_listen_on_overrides_config" -msgstr "" -"TCP port number to listen on (0-65535). Overrides value from the config " -"file (if any)." - -#: src/hydrilla/server/serve.py:584 -msgid "path_to_config_file_explain_default" -msgstr "" -"Path to Hydrilla server configuration file (optional, by default Hydrilla" -" loads its own config file, which in turn tries to load " -"/etc/hydrilla/config.json)." - -#: src/hydrilla/server/serve.py:586 -msgid "language_to_use_overrides_config" -msgstr "" -"Language to use (also affects served HTML files). Overrides value from " -"the config file (if any)." - -#: src/hydrilla/server/serve.py:589 src/hydrilla/server/serve.py:627 -msgid "version_printing" -msgstr "Print version information and exit." - -#: src/hydrilla/server/serve.py:617 -msgid "config_option_{}_not_supplied" -msgstr "Missing configuration option '{}'." - -#: src/hydrilla/server/serve.py:621 -msgid "serve_hydrilla_packages_explain_wsgi_considerations" -msgstr "" -"Serve Hydrilla packages.\n" -"\n" -"This command is meant to be a quick way to run a local or development " -"Hydrilla instance. For better performance, consider deployment using " -"WSGI." - -#: src/hydrilla/server/serve.py:632 -msgid "serve_hydrilla_packages_wsgi_help" -msgstr "" -"Serve Hydrilla packages.\n" -"\n" -"This program is a WSGI script that runs Hydrilla repository behind an " -"HTTP server like Apache2 or Nginx. You can configure Hydrilla through the" -" /etc/hydrilla/config.json file." - -#. 'hydrilla' as a title -#: src/hydrilla/server/templates/base.html:99 -#: src/hydrilla/server/templates/base.html:105 -msgid "hydrilla" -msgstr "Hydrilla" - -#: src/hydrilla/server/templates/index.html:29 -msgid "hydrilla_welcome" -msgstr "Welcome to Hydrilla!" - diff --git a/src/hydrilla/server/malcontent.py b/src/hydrilla/server/malcontent.py new file mode 100644 index 0000000..9bdf6dc --- /dev/null +++ b/src/hydrilla/server/malcontent.py @@ -0,0 +1,252 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +# Processing of repository packages. +# +# This file is part of Hydrilla +# +# Copyright (C) 2021, 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use of this +# code in a proprietary program, I am not going to enforce this in +# court. + +import logging +import dataclasses as dc +import typing as t + +from pathlib import Path + +from immutables import Map + +from ..translations import smart_gettext as _ +from ..exceptions import HaketiloException +from .. import versions +from .. import item_infos +from .. import pattern_tree + + +MappingTree = pattern_tree.PatternTree[item_infos.MappingInfo] + +# VersionedType = t.TypeVar( +# 'VersionedType', +# item_infos.ResourceInfo, +# item_infos.MappingInfo +# ) + +class Malcontent: + """ + Represent a directory with files that can be loaded and served by Hydrilla. + """ + def __init__( + self, + malcontent_dir_path: Path, + werror: bool, + verify_files: bool + ): + """ + When an instance of Malcontent is constructed, it searches + malcontent_dir_path for serveable site-modifying packages and loads + them into its data structures. + """ + self.werror: bool = werror + self.verify_files: bool = verify_files + + self.resource_infos: item_infos.VersionedResourceInfoMap = Map() + self.mapping_infos: item_infos.VersionedMappingInfoMap = Map() + + self.mapping_tree: MappingTree = MappingTree() + + self.malcontent_dir_path = malcontent_dir_path + + if not self.malcontent_dir_path.is_dir(): + fmt = _('err.server.malcontent_path_not_dir_{}') + raise HaketiloException(fmt.format(malcontent_dir_path)) + + for type in [item_infos.ItemType.RESOURCE, item_infos.ItemType.MAPPING]: + type_path = self.malcontent_dir_path / type.value + if not type_path.is_dir(): + continue + + for subpath in type_path.iterdir(): + if not subpath.is_dir(): + continue + + for ver_file in subpath.iterdir(): + try: + self._load_item(type, ver_file) + except: + if self.werror: + raise + + fmt = _('err.server.couldnt_load_item_from_{}') + logging.error(fmt.format(ver_file), exc_info=True) + + self._report_missing() + self._finalize() + + def _check_package_files(self, info: item_infos.AnyInfo) -> None: + by_sha256_dir = self.malcontent_dir_path / 'file' / 'sha256' + + for file_spec in info.files: + if (by_sha256_dir / file_spec.sha256).is_file(): + continue + + fmt = _('err.server.no_file_{required_by}_{ver}_{file}_{sha256}') + msg = fmt.format( + required_by = info.identifier, + ver = versions.version_string(info.version), + file = file_spec.name, + sha256 = file_spec.sha256 + ) + if (self.werror): + raise HaketiloException(msg) + else: + logging.error(msg) + + def _load_item(self, type: item_infos.ItemType, ver_file: Path) \ + -> None: + """ + Reads, validates and autocompletes serveable mapping/resource + definition, then registers information from it in data structures. + """ + version = versions.parse(ver_file.name) + identifier = ver_file.parent.name + + item_info = type.info_class.load(ver_file) + + if item_info.identifier != identifier: + fmt = _('err.server.item_{item}_in_file_{file}') + msg = fmt.format({'item': item_info.identifier, 'file': ver_file}) + raise HaketiloException(msg) + + if item_info.version != version: + ver_str = versions.version_string(item_info.version) + fmt = _('item_version_{ver}_in_file_{file}') + msg = fmt.format({'ver': ver_str, 'file': ver_file}) + raise HaketiloException(msg) + + if self.verify_files: + self._check_package_files(item_info) + + if isinstance(item_info, item_infos.ResourceInfo): + self.resource_infos = item_infos.register_in_versioned_map( + map = self.resource_infos, + info = item_info + ) + else: + self.mapping_infos = item_infos.register_in_versioned_map( + map = self.mapping_infos, + info = item_info + ) + + def _report_missing(self) -> None: + """ + Use logger to print information about items that are referenced but + were not loaded. + """ + def report_missing_dependency( + info: item_infos.ResourceInfo, + dep: str + ) -> None: + msg = _('err.server.no_dep_{resource}_{ver}_{dep}')\ + .format(dep=dep, resource=info.identifier, + ver=versions.version_string(info.version)) + logging.error(msg) + + for resource_info in item_infos.all_map_infos(self.resource_infos): + for dep_specifier in resource_info.dependencies: + identifier = dep_specifier.identifier + if identifier not in self.resource_infos: + report_missing_dependency(resource_info, identifier) + + def report_missing_payload( + info: item_infos.MappingInfo, + payload: str + ) -> None: + msg = _('err.server.no_payload_{mapping}_{ver}_{payload}')\ + .format(mapping=info.identifier, payload=payload, + ver=versions.version_string(info.version)) + logging.error(msg) + + for mapping_info in item_infos.all_map_infos(self.mapping_infos): + for resource_specifier in mapping_info.payloads.values(): + identifier = resource_specifier.identifier + if identifier not in self.resource_infos: + report_missing_payload(mapping_info, identifier) + + def report_missing_mapping( + info: item_infos.AnyInfo, + required: str + ) -> None: + msg = _('err.server.no_mapping_{required_by}_{ver}_{required}')\ + .format(required_by=info.identifier, required=required, + ver=versions.version_string(info.version)) + logging.error(msg) + + infos: t.Iterable[item_infos.AnyInfo] = ( + *item_infos.all_map_infos(self.mapping_infos), + *item_infos.all_map_infos(self.resource_infos) + ) + for item_info in infos: + for mapping_specifier in item_info.required_mappings: + identifier = mapping_specifier.identifier + if identifier not in self.mapping_infos: + report_missing_mapping(item_info, identifier) + + def _finalize(self): + """ + Initialize structures needed to serve queries. Called once after all + data gets loaded. + """ + for info in item_infos.all_map_infos(self.mapping_infos): + for pattern in info.payloads: + try: + self.mapping_tree = \ + self.mapping_tree.register(pattern, info) + except: + if self.werror: + raise + msg = _('server.err.couldnt_register_{mapping}_{ver}_{pattern}')\ + .format(mapping=info.identifier, pattern=pattern, + ver=util.version_string(info.version)) + logging.error(msg) + + def query(self, url: str) -> t.Sequence[item_infos.MappingInfo]: + """ + Return a list of registered mappings that match url. + + If multiple versions of a mapping are applicable, only the most recent + is included in the result. + """ + collected: t.Dict[str, item_infos.MappingInfo] = {} + for result_set in self.mapping_tree.search(url): + for wrapped_mapping_info in result_set: + info = wrapped_mapping_info.item + previous = collected.get(info.identifier) + if previous and previous.version > info.version: + continue + + collected[info.identifier] = info + + return list(collected.values()) + + def get_all_resources(self) -> t.Sequence[item_infos.ResourceInfo]: + return tuple(item_infos.all_map_infos(self.resource_infos)) + + def get_all_mappings(self) -> t.Sequence[item_infos.MappingInfo]: + return tuple(item_infos.all_map_infos(self.mapping_infos)) diff --git a/src/hydrilla/server/serve.py b/src/hydrilla/server/serve.py index a6a1204..68dde7a 100644 --- a/src/hydrilla/server/serve.py +++ b/src/hydrilla/server/serve.py @@ -21,429 +21,35 @@ # # # I, Wojtek Kosior, thereby promise not to sue for violation of this -# file's license. Although I request that you do not make use this code -# in a proprietary program, I am not going to enforce this in court. - -# Enable using with Python 3.7. -from __future__ import annotations +# file's license. Although I request that you do not make use of this +# code in a proprietary program, I am not going to enforce this in +# court. import re import os -import pathlib import json -import logging +import typing as t from pathlib import Path -from hashlib import sha256 -from abc import ABC, abstractmethod -from typing import Optional, Union, Iterable import click import flask +import werkzeug -from werkzeug import Response - -from .. import util +from ..exceptions import HaketiloException +from .. import _version +from ..translations import smart_gettext as _, translation as make_translation +from .. import versions +from .. import item_infos from . import config -from . import _version +from . import malcontent -here = Path(__file__).resolve().parent generated_by = { 'name': 'hydrilla.server', 'version': _version.version } -class ItemInfo(ABC): - """Shortened data of a resource/mapping.""" - def __init__(self, item_obj: dict): - """Initialize ItemInfo using item definition read from JSON.""" - self.version = util.normalize_version(item_obj['version']) - self.identifier = item_obj['identifier'] - self.uuid = item_obj.get('uuid') - self.long_name = item_obj['long_name'] - - def path(self) -> str: - """ - Get a relative path to this item's JSON definition with respect to - directory containing items of this type. - """ - return f'{self.identifier}/{util.version_string(self.version)}' - -class ResourceInfo(ItemInfo): - """Shortened data of a resource.""" - def __init__(self, resource_obj: dict): - """Initialize ResourceInfo using resource definition read from JSON.""" - super().__init__(resource_obj) - - dependencies = resource_obj.get('dependencies', []) - self.dependencies = [res_ref['identifier'] for res_ref in dependencies] - -class MappingInfo(ItemInfo): - """Shortened data of a mapping.""" - def __init__(self, mapping_obj: dict): - """Initialize MappingInfo using mapping definition read from JSON.""" - super().__init__(mapping_obj) - - self.payloads = {} - for pattern, res_ref in mapping_obj.get('payloads', {}).items(): - self.payloads[pattern] = res_ref['identifier'] - - def as_query_result(self) -> str: - """ - Produce a json.dump()-able object describing this mapping as one of a - collection of query results. - """ - return { - 'version': self.version, - 'identifier': self.identifier, - 'long_name': self.long_name - } - -class VersionedItemInfo: - """Stores data of multiple versions of given resource/mapping.""" - def __init__(self): - self.uuid = None - self.identifier = None - self.by_version = {} - self.known_versions = [] - - def register(self, item_info: ItemInfo) -> None: - """ - Make item info queryable by version. Perform sanity checks for uuid. - """ - if self.identifier is None: - self.identifier = item_info.identifier - - if self.uuid is None: - self.uuid = item_info.uuid - - if self.uuid is not None and self.uuid != item_info.uuid: - raise ValueError(f_('uuid_mismatch_{identifier}') - .format(identifier=self.identifier)) - - ver = item_info.version - ver_str = util.version_string(ver) - - if ver_str in self.by_version: - raise ValueError(f_('version_clash_{identifier}_{version}') - .format(identifier=self.identifier, - version=ver_str)) - - self.by_version[ver_str] = item_info - self.known_versions.append(ver) - - def get_by_ver(self, ver: Optional[list[int]]=None) -> Optional[ItemInfo]: - """ - Find and return info of the newest version of item. - - If ver is specified, instead find and return info of that version of the - item (or None if absent). - """ - ver = util.version_string(ver or self.known_versions[-1]) - - return self.by_version.get(ver) - - def get_all(self) -> list[ItemInfo]: - """ - Return a list of item info for all its versions, from oldest ot newest. - """ - return [self.by_version[util.version_string(ver)] - for ver in self.known_versions] - -class PatternTreeNode: - """ - "Pattern Tree" is how we refer to the data structure used for querying - Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal - is to make it possible for given URL to quickly retrieve all known patterns - that match it. - """ - def __init__(self): - self.wildcard_matches = [None, None, None] - self.literal_match = None - self.children = {} - - def search(self, segments): - """ - Yields all matches of this segments sequence against the tree that - starts at this node. Results are produces in order from greatest to - lowest pattern specificity. - """ - nodes = [self] - - for segment in segments: - next_node = nodes[-1].children.get(segment) - if next_node is None: - break - - nodes.append(next_node) - - nsegments = len(segments) - cond_literal = lambda: len(nodes) == nsegments - cond_wildcard = [ - lambda: len(nodes) + 1 == nsegments and segments[-1] != '*', - lambda: len(nodes) + 1 < nsegments, - lambda: len(nodes) + 1 != nsegments or segments[-1] != '***' - ] - - while nodes: - node = nodes.pop() - - for item, condition in [(node.literal_match, cond_literal), - *zip(node.wildcard_matches, cond_wildcard)]: - if item is not None and condition(): - yield item - - def add(self, segments, item_instantiator): - """ - Make item queryable through (this branch of) the Pattern Tree. If there - was not yet any item associated with the tree path designated by - segments, create a new one using item_instantiator() function. Return - all items matching this path (both the ones that existed and the ones - just created). - """ - node = self - segment = None - - for segment in segments: - wildcards = node.wildcard_matches - - child = node.children.get(segment) or PatternTreeNode() - node.children[segment] = child - node = child - - if node.literal_match is None: - node.literal_match = item_instantiator() - - if segment not in ('*', '**', '***'): - return [node.literal_match] - - if wildcards[len(segment) - 1] is None: - wildcards[len(segment) - 1] = item_instantiator() - - return [node.literal_match, wildcards[len(segment) - 1]] - -proto_regex = re.compile(r'^(?P<proto>\w+)://(?P<rest>.*)$') -user_re = r'[^/?#@]+@' # r'(?P<user>[^/?#@]+)@' # discarded for now -query_re = r'\??[^#]*' # r'\??(?P<query>[^#]*)' # discarded for now -domain_re = r'(?P<domain>[^/?#]+)' -path_re = r'(?P<path>[^?#]*)' -http_regex = re.compile(f'{domain_re}{path_re}{query_re}.*') -ftp_regex = re.compile(f'(?:{user_re})?{domain_re}{path_re}.*') - -class UrlError(ValueError): - """Used to report a URL or URL pattern that is invalid or unsupported.""" - pass - -class DeconstructedUrl: - """Represents a deconstructed URL or URL pattern""" - def __init__(self, url): - self.url = url - - match = proto_regex.match(url) - if not match: - raise UrlError(f_('invalid_URL_{}').format(url)) - - self.proto = match.group('proto') - if self.proto not in ('http', 'https', 'ftp'): - raise UrlError(f_('disallowed_protocol_{}').format(proto)) - - if self.proto == 'ftp': - match = ftp_regex.match(match.group('rest')) - elif self.proto in ('http', 'https'): - match = http_regex.match(match.group('rest')) - - if not match: - raise UrlError(f_('invalid_URL_{}').format(url)) - - self.domain = match.group('domain').split('.') - self.domain.reverse() - self.path = [*filter(None, match.group('path').split('/'))] - -class PatternMapping: - """ - A mapping info, together with one of its patterns, as stored in Pattern - Tree. - """ - def __init__(self, pattern: str, mapping_info: MappingInfo): - self.pattern = pattern - self.mapping_info = mapping_info - - def register(self, pattern_tree: dict): - """ - Make self queryable through the Pattern Tree passed in the argument. - """ - deco = DeconstructedUrl(self.pattern) - - domain_tree = pattern_tree.get(deco.proto) or PatternTreeNode() - pattern_tree[deco.proto] = domain_tree - - for path_tree in domain_tree.add(deco.domain, PatternTreeNode): - for match_list in path_tree.add(deco.path, list): - match_list.append(self) - -class Malcontent: - """ - Instance of this class represents a directory with files that can be loaded - and served by Hydrilla. - """ - def __init__(self, malcontent_dir_path: Path): - """ - When an instance of Malcontent is constructed, it searches - malcontent_dir_path for serveable site-modifying packages and loads - them into its data structures. - """ - self.infos = {'resource': {}, 'mapping': {}} - self.pattern_tree = {} - - self.malcontent_dir_path = malcontent_dir_path - - if not self.malcontent_dir_path.is_dir(): - raise ValueError(f_('malcontent_dir_path_not_dir_{}') - .format(malcontent_dir_path)) - - for item_type in ('mapping', 'resource'): - type_path = self.malcontent_dir_path / item_type - if not type_path.is_dir(): - continue - - for subpath in type_path.iterdir(): - if not subpath.is_dir(): - continue - - for ver_file in subpath.iterdir(): - try: - self._load_item(item_type, ver_file) - except Exception as e: - if flask.current_app._hydrilla_werror: - raise e from None - - msg = f_('couldnt_load_item_from_{}').format(ver_file) - logging.error(msg, exc_info=True) - - self._report_missing() - self._finalize() - - def _load_item(self, item_type: str, ver_file: Path) -> None: - """ - Reads, validates and autocompletes serveable mapping/resource - definition, then registers information from it in data structures. - """ - version = util.parse_version(ver_file.name) - identifier = ver_file.parent.name - - with open(ver_file, 'rt') as file_handle: - item_json = json.load(file_handle) - - util.validator_for(f'api_{item_type}_description-1.0.1.schema.json')\ - .validate(item_json) - - if item_type == 'resource': - item_info = ResourceInfo(item_json) - else: - item_info = MappingInfo(item_json) - - if item_info.identifier != identifier: - msg = f_('item_{item}_in_file_{file}')\ - .format({'item': item_info.identifier, 'file': ver_file}) - raise ValueError(msg) - - if item_info.version != version: - ver_str = util.version_string(item_info.version) - msg = f_('item_version_{ver}_in_file_{file}')\ - .format({'ver': ver_str, 'file': ver_file}) - raise ValueError(msg) - - versioned_info = self.infos[item_type].get(identifier) - if versioned_info is None: - versioned_info = VersionedItemInfo() - self.infos[item_type][identifier] = versioned_info - - versioned_info.register(item_info) - - def _all_of_type(self, item_type: str) -> Iterable[ItemInfo]: - """Iterator over all registered versions of all mappings/resources.""" - for versioned_info in self.infos[item_type].values(): - for item_info in versioned_info.by_version.values(): - yield item_info - - def _report_missing(self) -> None: - """ - Use logger to print information about items that are referenced but - were not loaded. - """ - def report_missing_dependency(info: ResourceInfo, dep: str) -> None: - msg = f_('no_dep_{resource}_{ver}_{dep}')\ - .format(dep=dep, resource=info.identifier, - ver=util.version_string(info.version)) - logging.error(msg) - - for resource_info in self._all_of_type('resource'): - for dep in resource_info.dependencies: - if dep not in self.infos['resource']: - report_missing_dependency(resource_info, dep) - - def report_missing_payload(info: MappingInfo, payload: str) -> None: - msg = f_('no_payload_{mapping}_{ver}_{payload}')\ - .format(mapping=info.identifier, payload=payload, - ver=util.version_string(info.version)) - logging.error(msg) - - for mapping_info in self._all_of_type('mapping'): - for payload in mapping_info.payloads.values(): - if payload not in self.infos['resource']: - report_missing_payload(mapping_info, payload) - - def _finalize(self): - """ - Initialize structures needed to serve queries. Called once after all - data gets loaded. - """ - for infos_dict in self.infos.values(): - for versioned_info in infos_dict.values(): - versioned_info.known_versions.sort() - - for info in self._all_of_type('mapping'): - for pattern in info.payloads: - try: - PatternMapping(pattern, info).register(self.pattern_tree) - except Exception as e: - if flask.current_app._hydrilla_werror: - raise e from None - msg = f_('couldnt_register_{mapping}_{ver}_{pattern}')\ - .format(mapping=info.identifier, pattern=pattern, - ver=util.version_string(info.version)) - logging.error(msg) - - def query(self, url: str) -> list[MappingInfo]: - """ - Return a list of registered mappings that match url. - - If multiple versions of a mapping are applicable, only the most recent - is included in the result. - """ - deco = DeconstructedUrl(url) - - collected = {} - - domain_tree = self.pattern_tree.get(deco.proto) or PatternTreeNode() - - def process_mapping(pattern_mapping: PatternMapping) -> None: - if url[-1] != '/' and pattern_mapping.pattern[-1] == '/': - return - - info = pattern_mapping.mapping_info - - if info.identifier not in collected or \ - info.version > collected[info.identifier].version: - collected[info.identifier] = info - - for path_tree in domain_tree.search(deco.domain): - for matches_list in path_tree.search(deco.path): - for pattern_mapping in matches_list: - process_mapping(pattern_mapping) - - return list(collected.values()) bp = flask.Blueprint('bp', __package__) @@ -467,46 +73,36 @@ class HydrillaApp(flask.Flask): ] } - self._hydrilla_translation = \ - util.translation(here / 'locales', hydrilla_config['language']) - self._hydrilla_project_url = hydrilla_config['hydrilla_project_url'] self._hydrilla_port = hydrilla_config['port'] self._hydrilla_werror = hydrilla_config.get('werror', False) + verify_files = hydrilla_config.get('verify_files', True) if 'hydrilla_parent' in hydrilla_config: - raise ValueError("Option 'hydrilla_parent' is not implemented.") + raise HaketiloException(_('err.server.opt_hydrilla_parent_not_implemented')) - malcontent_dir = Path(hydrilla_config['malcontent_dir']).resolve() - with self.app_context(): - self._hydrilla_malcontent = Malcontent(malcontent_dir) + malcontent_dir_path = Path(hydrilla_config['malcontent_dir']).resolve() + self._hydrilla_malcontent = malcontent.Malcontent( + malcontent_dir_path = malcontent_dir_path, + werror = self._hydrilla_werror, + verify_files = verify_files + ) - self.register_blueprint(bp) + self.jinja_env.install_gettext_translations(make_translation()) - def create_jinja_environment(self, *args, **kwargs) \ - -> flask.templating.Environment: - """ - Flask's create_jinja_environment(), but tweaked to always include the - 'hydrilla_project_url' global variable and to install proper - translations. - """ - env = super().create_jinja_environment(*args, **kwargs) - env.install_gettext_translations(self._hydrilla_translation) - env.globals['hydrilla_project_url'] = self._hydrilla_project_url + self.jinja_env.globals['hydrilla_project_url'] = \ + hydrilla_config['hydrilla_project_url'] - return env + self.register_blueprint(bp) def run(self, *args, **kwargs): """ - Flask's run(), but tweaked to use the port from hydrilla configuration - by default. + Flask's run() but tweaked to use the port from hydrilla configuration by + default. """ return super().run(*args, port=self._hydrilla_port, **kwargs) -def f_(text_key): - return flask.current_app._hydrilla_translation.gettext(text_key) - -def malcontent(): - return flask.current_app._hydrilla_malcontent +def get_malcontent() -> malcontent.Malcontent: + return t.cast(HydrillaApp, flask.current_app)._hydrilla_malcontent @bp.route('/') def index(): @@ -514,7 +110,8 @@ def index(): identifier_json_re = re.compile(r'^([-0-9a-z.]+)\.json$') -def get_resource_or_mapping(item_type: str, identifier: str) -> Response: +def get_resource_or_mapping(item_type: str, identifier: str) \ + -> werkzeug.Response: """ Strip '.json' from 'identifier', look the item up and send its JSON description. @@ -525,36 +122,84 @@ def get_resource_or_mapping(item_type: str, identifier: str) -> Response: identifier = match.group(1) - versioned_info = malcontent().infos[item_type].get(identifier) + infos: t.Mapping[str, item_infos.VersionedItemInfo] + if item_type == 'resource': + infos = get_malcontent().resource_infos + else: + infos = get_malcontent().mapping_infos - info = versioned_info and versioned_info.get_by_ver() - if info is None: + versioned_info = infos.get(identifier) + + if versioned_info is None: flask.abort(404) + info = versioned_info.newest_info + # no need for send_from_directory(); path is safe, constructed by us - file_path = malcontent().malcontent_dir_path / item_type / info.path() - return flask.send_file(open(file_path, 'rb'), mimetype='application/json') + info_path = f'{info.identifier}/{versions.version_string(info.version)}' + file_path = get_malcontent().malcontent_dir_path / item_type / info_path + + if flask.__version__[0:2] in ('0.', '1.'): + caching_args = {'add_etags': False, 'cache_timeout': 0} + else: + caching_args = {'etag': False} + + return flask.send_file( + str(file_path), + mimetype = 'application/json', + conditional = False, + **caching_args # type: ignore + ) @bp.route('/mapping/<string:identifier_dot_json>') -def get_newest_mapping(identifier_dot_json: str) -> Response: +def get_newest_mapping(identifier_dot_json: str) -> werkzeug.Response: return get_resource_or_mapping('mapping', identifier_dot_json) @bp.route('/resource/<string:identifier_dot_json>') -def get_newest_resource(identifier_dot_json: str) -> Response: +def get_newest_resource(identifier_dot_json: str) -> werkzeug.Response: return get_resource_or_mapping('resource', identifier_dot_json) +def make_ref(info: item_infos.AnyInfo) -> t.Dict[str, t.Any]: + ref: t.Dict[str, t.Any] = { + 'version': info.version, + 'identifier': info.identifier, + 'long_name': info.long_name + } + + if isinstance(info, item_infos.ResourceInfo): + ref['revision'] = info.revision + + return ref + @bp.route('/query') def query(): url = flask.request.args['url'] - mapping_refs = [i.as_query_result() for i in malcontent().query(url)] + mapping_refs = [make_ref(info) for info in get_malcontent().query(url)] + result = { '$schema': 'https://hydrilla.koszko.org/schemas/api_query_result-1.schema.json', 'mappings': mapping_refs, 'generated_by': generated_by } - return Response(json.dumps(result), mimetype='application/json') + return werkzeug.Response(json.dumps(result), mimetype='application/json') + +@bp.route('/list_all') +def list_all_packages(): + malcontent = get_malcontent() + + resource_refs = [make_ref(info) for info in malcontent.get_all_resources()] + mapping_refs = [make_ref(info) for info in malcontent.get_all_mappings()] + + result = { + '$schema': 'https://hydrilla.koszko.org/schemas/api_package_list-2.schema.json', + 'resources': resource_refs, + 'mappings': mapping_refs, + 'generated_by': generated_by + } + + return werkzeug.Response(json.dumps(result), mimetype='application/json') @bp.route('/--help') def mm_help(): @@ -569,9 +214,6 @@ default_config_path = Path('/etc/hydrilla/config.json') default_malcontent_dir = '/var/lib/hydrilla/malcontent' default_project_url = 'https://hydrillabugs.koszko.org/projects/hydrilla/wiki' -console_gettext = util.translation(here / 'locales').gettext -_ = console_gettext - @click.command(help=_('serve_hydrilla_packages_explain_wsgi_considerations')) @click.option('-m', '--malcontent-dir', type=click.Path(exists=True, file_okay=False), @@ -583,24 +225,25 @@ _ = console_gettext @click.option('-c', '--config', 'config_path', type=click.Path(exists=True, dir_okay=False, resolve_path=True), help=_('path_to_config_file_explain_default')) -@click.option('-l', '--language', type=click.STRING, - help=_('language_to_use_overrides_config')) @click.version_option(version=_version.version, prog_name='Hydrilla', message=_('%(prog)s_%(version)s_license'), help=_('version_printing')) -def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str], - port: Optional[int], config_path: Optional[str], - language: Optional[str]) -> None: +def start( + malcontent_dir: t.Optional[str], + hydrilla_project_url: t.Optional[str], + port: t.Optional[int], + config_path: t.Optional[str] +) -> None: """ Run a development Hydrilla server. This command is meant to be the entry point of hydrilla command exported by this package. """ - config_load_opts = {} if config_path is None \ - else {'config_path': [Path(config_path)]} - - hydrilla_config = config.load(**config_load_opts) + if config_path is None: + hydrilla_config = config.load() + else: + hydrilla_config = config.load(config_paths=[Path(config_path)]) if malcontent_dir is not None: hydrilla_config['malcontent_dir'] = str(Path(malcontent_dir).resolve()) @@ -611,14 +254,7 @@ def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str], if port is not None: hydrilla_config['port'] = port - if language is not None: - hydrilla_config['language'] = language - - lang = hydrilla_config.get('language') - _ = console_gettext if lang is None else \ - util.translation(here / 'locales', lang).gettext - - for opt in ('malcontent_dir', 'hydrilla_project_url', 'port', 'language'): + for opt in ('malcontent_dir', 'hydrilla_project_url', 'port'): if opt not in hydrilla_config: raise ValueError(_('config_option_{}_not_supplied').format(opt)) @@ -632,7 +268,7 @@ def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str], @click.version_option(version=_version.version, prog_name='Hydrilla', message=_('%(prog)s_%(version)s_license'), help=_('version_printing')) -def start_wsgi() -> None: +def start_wsgi() -> flask.Flask: """ Create application object for use in WSGI deployment. diff --git a/src/hydrilla/server/templates/base.html b/src/hydrilla/server/templates/base.html index 34cb214..7d8c3a6 100644 --- a/src/hydrilla/server/templates/base.html +++ b/src/hydrilla/server/templates/base.html @@ -19,8 +19,9 @@ License for more details. I, Wojtek Kosior, thereby promise not to sue for violation of this -file's license. Although I request that you do not make use this code -in a proprietary program, I am not going to enforce this in court. +file's license. Although I request that you do not make use of this +code in a proprietary program, I am not going to enforce this in +court. #} {% macro link_for(endpoint, text) -%} diff --git a/src/hydrilla/server/templates/index.html b/src/hydrilla/server/templates/index.html index 3063239..b3a1325 100644 --- a/src/hydrilla/server/templates/index.html +++ b/src/hydrilla/server/templates/index.html @@ -19,8 +19,9 @@ License for more details. I, Wojtek Kosior, thereby promise not to sue for violation of this -file's license. Although I request that you do not make use this code -in a proprietary program, I am not going to enforce this in court. +file's license. Although I request that you do not make use of this +code in a proprietary program, I am not going to enforce this in +court. #} {% extends 'base.html' %} |