From a36677eb70b92cf64fccb16075b7fec55660157f Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Wed, 17 Aug 2022 21:33:05 +0200 Subject: bring Hydrilla server part back to a usable state --- README.md | 8 +- src/hydrilla/item_infos.py | 265 ++++++++++---------------------- src/hydrilla/json_instances.py | 4 +- src/hydrilla/schemas/2.x | 2 +- src/hydrilla/server/config.py | 27 ++-- src/hydrilla/server/malcontent.py | 312 ++++++++++++++++++++++++++++++++++++++ src/hydrilla/server/serve.py | 304 +++++++++---------------------------- 7 files changed, 483 insertions(+), 439 deletions(-) create mode 100644 src/hydrilla/server/malcontent.py diff --git a/README.md b/README.md index de97673..12e2eff 100644 --- a/README.md +++ b/README.md @@ -8,10 +8,10 @@ user scripts repository server, Hydrilla. Haketilo is now being rewritten as an HTTP proxy. This repository contains combined sources of Hydrilla and Haketilo which are going to be distributed together. -This source tree is a work in progress and is thus a complete mess. You can see -some unused or inadequate source files (and even UML diagrams) flying around. -The Hydrilla part is currently not functional. You can play with the proxy part -if you really want (for brave people only 😉). +This source tree is a work in progress. The server part has simlar functionality +as in earlier Hydrilla releases and should be mostly usable at this point. The +proxy part is still being developed, but you can play with it nevertheless if +you're brave 😉 You're going to need at least Python 3.7 plus the following Python libraries * mitmproxy 8 (sadly, not in Debian yet) diff --git a/src/hydrilla/item_infos.py b/src/hydrilla/item_infos.py index a26e57a..a01fe3a 100644 --- a/src/hydrilla/item_infos.py +++ b/src/hydrilla/item_infos.py @@ -51,8 +51,6 @@ from .url_patterns import parse_pattern, ParsedUrl, ParsedPattern from .exceptions import HaketiloException from .translations import smart_gettext as _ -VerTuple = t.Tuple[int, ...] - @dc.dataclass(frozen=True, unsafe_hash=True) class ItemSpecifier: """....""" @@ -162,34 +160,15 @@ class ItemInfoBase(ABC, ItemIdentity, Categorizable): required_mappings: tuple[ItemSpecifier, ...] = dc.field(hash=False, compare=False) generated_by: t.Optional[GeneratedBy] = dc.field(hash=False, compare=False) - # def path_relative_to_type(self) -> str: - # """ - # Get a relative path to this item's JSON definition with respect to - # directory containing items of this type. - # """ - # return f'{self.identifier}/{versions.version_string(self.version)}' - - # def path(self) -> str: - # """ - # Get a relative path to this item's JSON definition with respect to - # malcontent directory containing loadable items. - # """ - # return f'{self.type_name}/{self.path_relative_to_type()}' - - # @property - # def identity(self): - # """....""" - # return ItemIdentity( - # repository = self.repository, - # version = self.version, - # identifier = self.identifier - # ) - @property - def versioned_identifier(self): + def versioned_identifier(self) -> str: """....""" return f'{self.identifier}-{versions.version_string(self.version)}' + @property + def files(self) -> tuple[FileSpecifier, ...]: + return self.source_copyright + @staticmethod def _get_base_init_kwargs( item_obj: t.Mapping[str, t.Any], @@ -242,9 +221,13 @@ class ResourceInfo(ItemInfoBase): scripts: tuple[FileSpecifier, ...] = dc.field(hash=False, compare=False) @property - def versioned_identifier(self): + def versioned_identifier(self) -> str: """....""" - return f'{super().versioned_identifier()}-{self.revision}' + return f'{super().versioned_identifier}-{self.revision}' + + @property + def files(self) -> tuple[FileSpecifier, ...]: + return tuple((*self.source_copyright, *self.scripts)) @staticmethod def make( @@ -291,19 +274,6 @@ class ResourceInfo(ItemInfoBase): repo_iteration ) - # def __lt__(self, other: 'ResourceInfo') -> bool: - # """....""" - # return ( - # self.identifier, - # self.version, - # self.revision, - # self.repository - # ) < ( - # other.identifier, - # other.version, - # other.revision, - # other.repository - # ) def make_payloads(payloads_obj: t.Mapping[str, t.Any]) \ -> t.Mapping[ParsedPattern, ItemSpecifier]: @@ -400,151 +370,68 @@ def _load_item_info( ) -# CategorizedType = t.TypeVar( -# 'CategorizedType', -# bound=Categorizable -# ) - -# CategorizedUpdater = t.Callable[ -# [t.Optional[CategorizedType]], -# t.Optional[CategorizedType] -# ] - -# CategoryKeyType = t.TypeVar('CategoryKeyType', bound=t.Hashable) - -# @dc.dataclass(frozen=True) -# class CategorizedItemInfo(Categorizable, t.Generic[CategorizedType, CategoryKeyType]): -# """....""" -# SelfType = t.TypeVar( -# 'SelfType', -# bound = 'CategorizedItemInfo[CategorizedType, CategoryKeyType]' -# ) - -# uuid: t.Optional[str] = None -# identifier: str = '' -# items: Map[CategoryKeyType, CategorizedType] = Map() -# _initialized: bool = False - -# def _update( -# self: 'SelfType', -# key: CategoryKeyType, -# updater: CategorizedUpdater -# ) -> 'SelfType': -# """...... Perform sanity checks for uuid.""" -# uuid = self.uuid - -# items = self.items.mutate() - -# updated = updater(items.get(key)) -# if updated is None: -# items.pop(key, None) - -# identifier = self.identifier -# else: -# items[key] = updated - -# identifier = updated.identifier -# if self._initialized: -# assert identifier == self.identifier - -# if uuid is not None: -# if updated.uuid is not None and uuid != updated.uuid: -# raise HaketiloException(_('uuid_mismatch_{identifier}') -# .format(identifier=identifier)) -# else: -# uuid = updated.uuid - -# return dc.replace( -# self, -# identifier = identifier, -# uuid = uuid, -# items = items.finish(), -# _initialized = self._initialized or updated is not None -# ) - -# def is_empty(self) -> bool: -# """....""" -# return len(self.items) == 0 - - -# VersionedType = t.TypeVar('VersionedType', ResourceInfo, MappingInfo) - -# class VersionedItemInfo( -# CategorizedItemInfo[VersionedType, VerTuple], -# t.Generic[VersionedType] -# ): -# """Stores data of multiple versions of given resource/mapping.""" -# SelfType = t.TypeVar('SelfType', bound='VersionedItemInfo[VersionedType]') - -# def register(self: 'SelfType', item_info: VersionedType) -> 'SelfType': -# """ -# Make item info queryable by version. Perform sanity checks for uuid. -# """ -# return self._update(item_info.version, lambda old_info: item_info) - -# def unregister(self: 'SelfType', version: VerTuple) -> 'SelfType': -# """....""" -# return self._update(version, lambda old_info: None) - -# def newest_version(self) -> VerTuple: -# """....""" -# assert not self.is_empty() - -# return max(self.items.keys()) - -# def get_newest(self) -> VersionedType: -# """Find and return info of the newest version of item.""" -# newest = self.items[self.newest_version()] -# assert newest is not None -# return newest - -# def get_by_ver(self, ver: t.Iterable[int]) -> t.Optional[VersionedType]: -# """ -# Find and return info of the specified version of the item (or None if -# absent). -# """ -# return self.items.get(tuple(ver)) - -# def get_all(self) -> t.Iterator[VersionedType]: -# """Generate item info for all its versions, from oldest to newest.""" -# for version in sorted(self.items.keys()): -# yield self.items[version] - - -# MultiRepoType = t.TypeVar('MultiRepoType', ResourceInfo, MappingInfo) -# MultiRepoVersioned = VersionedItemInfo[MultiRepoType] - -# class MultiRepoItemInfo( -# CategorizedItemInfo[MultiRepoVersioned, str], -# t.Generic[MultiRepoType] -# ): -# SelfType = t.TypeVar('SelfType', bound='MultiRepoItemInfo[MultiRepoType]') - -# def register(self: 'SelfType', item_info: MultiRepoType) -> 'SelfType': -# """ -# Make item info queryable by version. Perform sanity checks for uuid. -# """ -# def updater(old_item: t.Optional[MultiRepoVersioned]) \ -# -> MultiRepoVersioned: -# """....""" -# if old_item is None: -# old_item = VersionedItemInfo() - -# return old_item.register(item_info) - -# return self._update(item_info.repository, updater) - -# def unregister(self: 'SelfType', version: VerTuple, repository: str) \ -# -> 'SelfType': -# """....""" -# def updater(old_item: t.Optional[MultiRepoVersioned]) -> \ -# t.Optional[MultiRepoVersioned]: -# """....""" -# if old_item is None: -# return None - -# new_item = old_item.unregister(version) - -# return None if new_item.is_empty() else new_item - -# return self._update(repository, updater) +CategorizedType = t.TypeVar( + 'CategorizedType', + bound=Categorizable +) + +CategorizedUpdater = t.Callable[ + [t.Optional[CategorizedType]], + t.Optional[CategorizedType] +] + +CategoryKeyType = t.TypeVar('CategoryKeyType', bound=t.Hashable) + +@dc.dataclass(frozen=True) +class CategorizedItemInfo(Categorizable, t.Generic[CategorizedType, CategoryKeyType]): + """....""" + SelfType = t.TypeVar( + 'SelfType', + bound = 'CategorizedItemInfo[CategorizedType, CategoryKeyType]' + ) + + uuid: t.Optional[str] = None + identifier: str = '' + items: Map[CategoryKeyType, CategorizedType] = Map() + _initialized: bool = False + + def _update( + self: 'SelfType', + key: CategoryKeyType, + updater: CategorizedUpdater + ) -> 'SelfType': + """...... Perform sanity checks for uuid.""" + uuid = self.uuid + + items = self.items.mutate() + + updated = updater(items.get(key)) + if updated is None: + items.pop(key, None) + + identifier = self.identifier + else: + items[key] = updated + + identifier = updated.identifier + if self._initialized: + assert identifier == self.identifier + + if uuid is not None: + if updated.uuid is not None and uuid != updated.uuid: + raise HaketiloException(_('uuid_mismatch_{identifier}') + .format(identifier=identifier)) + else: + uuid = updated.uuid + + return dc.replace( + self, + identifier = identifier, + uuid = uuid, + items = items.finish(), + _initialized = self._initialized or updated is not None + ) + + def is_empty(self) -> bool: + """....""" + return len(self.items) == 0 diff --git a/src/hydrilla/json_instances.py b/src/hydrilla/json_instances.py index fc8f975..be8dbc6 100644 --- a/src/hydrilla/json_instances.py +++ b/src/hydrilla/json_instances.py @@ -179,8 +179,8 @@ def read_instance(instance_or_path: InstanceOrPathOrIO) -> object: try: return parse_instance(text) except: - raise HaketiloException(_('text_in_{}_not_valid_json')\ - .format(instance_or_path)) + fmt = _('err.util.text_in_{}_not_valid_json') + raise HaketiloException(fmt.format(instance_or_path)) def get_schema_version(instance: object) -> tuple[int, ...]: """ diff --git a/src/hydrilla/schemas/2.x b/src/hydrilla/schemas/2.x index 7206db4..d94ef45 160000 --- a/src/hydrilla/schemas/2.x +++ b/src/hydrilla/schemas/2.x @@ -1 +1 @@ -Subproject commit 7206db45f277c10c34d1b7ed9bd35343ac742d30 +Subproject commit d94ef4544faac662f49bed41700c9010804b2450 diff --git a/src/hydrilla/server/config.py b/src/hydrilla/server/config.py index c7c5657..7109eb1 100644 --- a/src/hydrilla/server/config.py +++ b/src/hydrilla/server/config.py @@ -28,20 +28,20 @@ from __future__ import annotations import json +import typing as t from pathlib import Path import jsonschema # type: ignore +from ..translations import smart_gettext as _ +from ..exceptions import HaketiloException from .. import json_instances config_schema = { '$schema': 'http://json-schema.org/draft-07/schema#', 'type': 'object', 'properties': { - 'malcontent_dir': { - 'type': 'string' - }, 'malcontent_dir': { 'type': 'string' }, @@ -67,6 +67,9 @@ config_schema = { }, 'werror': { 'type': 'boolean' + }, + 'verify_files': { + 'type': 'boolean' } } } @@ -75,7 +78,7 @@ here = Path(__file__).resolve().parent def load(config_paths: list[Path]=[here / 'config.json'], can_fail: list[bool]=[]) -> dict: - config = {} + config: dict[str, t.Any] = {} bools_missing = max(0, len(config_paths) - len(can_fail)) config_paths = [*config_paths] @@ -92,17 +95,13 @@ def load(config_paths: list[Path]=[here / 'config.json'], continue raise e from None - new_config = json_instances.strip_json_comments(json_text) + new_config = json.loads(json_instances.strip_json_comments(json_text)) jsonschema.validate(new_config, config_schema) config.update(new_config) - if 'malcontent_dir' in config: - malcontent_dir = Path(config['malcontent_dir']) - if not malcontent_dir.is_absolute(): - malcontent_dir = path.parent / malcontent_dir - - config['malcontent_dir'] = str(malcontent_dir.resolve()) + if 'malcontent_dir' in new_config: + malcontent_path_relative_to = path.parent for key, failure_ok in [('try_configs', True), ('use_configs', False)]: paths = new_config.get(key, []) @@ -110,6 +109,12 @@ def load(config_paths: list[Path]=[here / 'config.json'], config_paths.extend(paths) can_fail.extend([failure_ok] * len(paths)) + + if 'malcontent_dir' in config: + malcontent_dir_str = config['malcontent_dir'] + malcontent_dir_path = malcontent_path_relative_to / malcontent_dir_str + config['malcontent_dir'] = str(malcontent_dir_path) + for key in ('try_configs', 'use_configs'): if key in config: config.pop(key) diff --git a/src/hydrilla/server/malcontent.py b/src/hydrilla/server/malcontent.py new file mode 100644 index 0000000..49c0fb4 --- /dev/null +++ b/src/hydrilla/server/malcontent.py @@ -0,0 +1,312 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +# Processing of repository packages. +# +# This file is part of Hydrilla +# +# Copyright (C) 2021, 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +# Enable using with Python 3.7. +from __future__ import annotations + +import logging +import dataclasses as dc +import typing as t + +from pathlib import Path + +from ..translations import smart_gettext as _ +from ..exceptions import HaketiloException +from .. import versions +from .. import item_infos +from .. import pattern_tree + +VersionedType = t.TypeVar( + 'VersionedType', + item_infos.ResourceInfo, + item_infos.MappingInfo +) + +class VersionedItemInfo( + item_infos.CategorizedItemInfo[VersionedType, versions.VerTuple], + t.Generic[VersionedType] +): + """Stores data of multiple versions of given resource/mapping.""" + SelfType = t.TypeVar('SelfType', bound='VersionedItemInfo[VersionedType]') + + def register(self: 'SelfType', item_info: VersionedType) -> 'SelfType': + """ + Make item info queryable by version. Perform sanity checks for uuid. + """ + return self._update(item_info.version, lambda old_info: item_info) + + def unregister(self: 'SelfType', version: versions.VerTuple) -> 'SelfType': + """....""" + return self._update(version, lambda old_info: None) + + @property + def newest_version(self) -> versions.VerTuple: + """....""" + assert not self.is_empty() + + return max(self.items.keys()) + + @property + def newest_info(self) -> VersionedType: + """Find and return info of the newest version of item.""" + return self.items[self.newest_version] + + def get_by_ver(self, ver: t.Sequence[int]) -> t.Optional[VersionedType]: + """ + Find and return info of the specified version of the item (or None if + absent). + """ + return self.items.get(versions.normalize_version(ver)) + + def get_all(self) -> t.Iterable[VersionedType]: + """Generate item info for all its versions, from oldest to newest.""" + return [self.items[version] for version in sorted(self.items.keys())] + + +MappingTree = pattern_tree.PatternTree[item_infos.MappingInfo] + +ResourceInfos = dict[str, VersionedItemInfo[item_infos.ResourceInfo]] +MappingInfos = dict[str, VersionedItemInfo[item_infos.MappingInfo]] + +class Malcontent: + """ + Represent a directory with files that can be loaded and served by Hydrilla. + """ + def __init__( + self, + malcontent_dir_path: Path, + werror: bool, + verify_files: bool + ): + """ + When an instance of Malcontent is constructed, it searches + malcontent_dir_path for serveable site-modifying packages and loads + them into its data structures. + """ + self.werror: bool = werror + self.verify_files: bool = verify_files + + self.resource_infos: ResourceInfos = {} + self.mapping_infos: MappingInfos = {} + + self.mapping_tree: MappingTree = MappingTree() + + self.malcontent_dir_path = malcontent_dir_path + + if not self.malcontent_dir_path.is_dir(): + fmt = _('err.server.malcontent_path_not_dir_{}') + raise HaketiloException(fmt.format(malcontent_dir_path)) + + types: t.Iterable[t.Type[item_infos.AnyInfo]] = ( + item_infos.MappingInfo, + item_infos.ResourceInfo + ) + for info_type in types: + type_path = self.malcontent_dir_path / info_type.type_name + if not type_path.is_dir(): + continue + + for subpath in type_path.iterdir(): + if not subpath.is_dir(): + continue + + for ver_file in subpath.iterdir(): + try: + self._load_item(info_type, ver_file) + except: + if self.werror: + raise + + fmt = _('err.server.couldnt_load_item_from_{}') + logging.error(fmt.format(ver_file), exc_info=True) + + self._report_missing() + self._finalize() + + def _check_package_files(self, info: item_infos.AnyInfo) -> None: + by_sha256_dir = self.malcontent_dir_path / 'file' / 'sha256' + + for file_spec in info.files: + if (by_sha256_dir / file_spec.sha256).is_file(): + continue + + fmt = _('err.server.no_file_{required_by}_{ver}_{file}_{sha256}') + msg = fmt.format( + required_by = info.identifier, + ver = versions.version_string(info.version), + file = file_spec.name, + sha256 = file_spec.sha256 + ) + if (self.werror): + raise HaketiloException(msg) + else: + logging.error(msg) + + @staticmethod + def _register_info( + infos: dict[str, VersionedItemInfo[VersionedType]], + identifier: str, + item_info: VersionedType + ) -> None: + versioned_info = infos.get(identifier, VersionedItemInfo()) + infos[identifier] = versioned_info.register(item_info) + + def _load_item( + self, + info_type: t.Type[item_infos.AnyInfo], + ver_file: Path + ) -> None: + """ + Reads, validates and autocompletes serveable mapping/resource + definition, then registers information from it in data structures. + """ + version = versions.parse(ver_file.name) + identifier = ver_file.parent.name + + item_info = info_type.load(ver_file) + + if item_info.identifier != identifier: + fmt = _('err.server.item_{item}_in_file_{file}') + msg = fmt.format({'item': item_info.identifier, 'file': ver_file}) + raise HaketiloException(msg) + + if item_info.version != version: + ver_str = versions.version_string(item_info.version) + fmt = _('item_version_{ver}_in_file_{file}') + msg = fmt.format({'ver': ver_str, 'file': ver_file}) + raise HaketiloException(msg) + + if self.verify_files: + self._check_package_files(item_info) + + if isinstance(item_info, item_infos.ResourceInfo): + self._register_info(self.resource_infos, identifier, item_info) + else: + self._register_info(self.mapping_infos, identifier, item_info) + + @staticmethod + def _all_infos(infos: dict[str, VersionedItemInfo[VersionedType]]) \ + -> t.Iterator[VersionedType]: + for versioned_info in infos.values(): + for item_info in versioned_info.get_all(): + yield item_info + + def _report_missing(self) -> None: + """ + Use logger to print information about items that are referenced but + were not loaded. + """ + def report_missing_dependency( + info: item_infos.ResourceInfo, + dep: str + ) -> None: + msg = _('err.server.no_dep_{resource}_{ver}_{dep}')\ + .format(dep=dep, resource=info.identifier, + ver=versions.version_string(info.version)) + logging.error(msg) + + for resource_info in self._all_infos(self.resource_infos): + for dep_specifier in resource_info.dependencies: + identifier = dep_specifier.identifier + if identifier not in self.resource_infos: + report_missing_dependency(resource_info, identifier) + + def report_missing_payload( + info: item_infos.MappingInfo, + payload: str + ) -> None: + msg = _('err.server.no_payload_{mapping}_{ver}_{payload}')\ + .format(mapping=info.identifier, payload=payload, + ver=versions.version_string(info.version)) + logging.error(msg) + + for mapping_info in self._all_infos(self.mapping_infos): + for resource_specifier in mapping_info.payloads.values(): + identifier = resource_specifier.identifier + if identifier not in self.resource_infos: + report_missing_payload(mapping_info, identifier) + + def report_missing_mapping( + info: item_infos.AnyInfo, + required: str + ) -> None: + msg = _('err.server.no_mapping_{required_by}_{ver}_{required}')\ + .format(required_by=info.identifier, required=required, + ver=versions.version_string(info.version)) + logging.error(msg) + + infos: t.Iterable[item_infos.AnyInfo] = ( + *self._all_infos(self.mapping_infos), + *self._all_infos(self.resource_infos) + ) + for item_info in infos: + for mapping_specifier in item_info.required_mappings: + identifier = mapping_specifier.identifier + if identifier not in self.mapping_infos: + report_missing_mapping(item_info, identifier) + + def _finalize(self): + """ + Initialize structures needed to serve queries. Called once after all + data gets loaded. + """ + for info in self._all_infos(self.mapping_infos): + for pattern in info.payloads: + try: + self.mapping_tree = \ + self.mapping_tree.register(pattern, info) + except: + if self.werror: + raise + msg = _('server.err.couldnt_register_{mapping}_{ver}_{pattern}')\ + .format(mapping=info.identifier, pattern=pattern, + ver=util.version_string(info.version)) + logging.error(msg) + + def query(self, url: str) -> t.Sequence[item_infos.MappingInfo]: + """ + Return a list of registered mappings that match url. + + If multiple versions of a mapping are applicable, only the most recent + is included in the result. + """ + collected: dict[str, item_infos.MappingInfo] = {} + for result_set in self.mapping_tree.search(url): + for wrapped_mapping_info in result_set: + info = wrapped_mapping_info.item + previous = collected.get(info.identifier) + if previous and previous.version > info.version: + continue + + collected[info.identifier] = info + + return list(collected.values()) + + def get_all_resources(self) -> t.Sequence[item_infos.ResourceInfo]: + return tuple(self._all_infos(self.resource_infos)) + + def get_all_mappings(self) -> t.Sequence[item_infos.MappingInfo]: + return tuple(self._all_infos(self.mapping_infos)) diff --git a/src/hydrilla/server/serve.py b/src/hydrilla/server/serve.py index 8f0d557..823437a 100644 --- a/src/hydrilla/server/serve.py +++ b/src/hydrilla/server/serve.py @@ -29,221 +29,29 @@ from __future__ import annotations import re import os -import pathlib import json -import logging +import typing as t from pathlib import Path -from hashlib import sha256 -from abc import ABC, abstractmethod -from typing import Optional, Union, Iterable, TypeVar, Generic import click import flask +import werkzeug -from werkzeug import Response - -from .. import _version, versions, json_instances -from ..item_infos import ResourceInfo, MappingInfo, VersionedItemInfo +from ..exceptions import HaketiloException +from .. import _version from ..translations import smart_gettext as _, translation as make_translation -#from ..url_patterns import PatternTree +from .. import versions +from .. import item_infos from . import config +from . import malcontent -here = Path(__file__).resolve().parent generated_by = { 'name': 'hydrilla.server', 'version': _version.version } - # def as_query_result(self) -> dict[str, Union[str, list[int]]]: - # """ - # Produce a json.dump()-able object describing this mapping as one of a - # collection of query results. - # """ - # return { - # 'version': self.version, - # 'identifier': self.identifier, - # 'long_name': self.long_name - # } - -class Malcontent: - """ - Represent a directory with files that can be loaded and served by Hydrilla. - """ - def __init__(self, malcontent_dir_path: Path): - """ - When an instance of Malcontent is constructed, it searches - malcontent_dir_path for serveable site-modifying packages and loads - them into its data structures. - """ - self.resource_infos: dict[str, VersionedItemInfo[ResourceInfo]] = {} - self.mapping_infos: dict[str, VersionedItemInfo[MappingInfo]] = {} - - self.pattern_tree: PatternTree[MappingInfo] = PatternTree() - - self.malcontent_dir_path = malcontent_dir_path - - if not self.malcontent_dir_path.is_dir(): - raise ValueError(_('malcontent_dir_path_not_dir_{}') - .format(malcontent_dir_path)) - - for item_type in ('mapping', 'resource'): - type_path = self.malcontent_dir_path / item_type - if not type_path.is_dir(): - continue - - for subpath in type_path.iterdir(): - if not subpath.is_dir(): - continue - - for ver_file in subpath.iterdir(): - try: - self._load_item(item_type, ver_file) - except Exception as e: - if flask.current_app._hydrilla_werror: - raise e from None - - msg = _('couldnt_load_item_from_{}').format(ver_file) - logging.error(msg, exc_info=True) - - self._report_missing() - self._finalize() - - @staticmethod - def _register_info(infos: dict[str, VersionedItemInfo[VersionedType]], - identifier: str, item_info: VersionedType) -> None: - """ - ........... - """ - infos.setdefault(identifier, VersionedItemInfo())\ - .register(item_info) - - def _load_item(self, item_type: str, ver_file: Path) -> None: - """ - Reads, validates and autocompletes serveable mapping/resource - definition, then registers information from it in data structures. - """ - version = versions.parse_version(ver_file.name) - identifier = ver_file.parent.name - - item_json, major = util.load_instance_from_file(ver_file) - - util.validator_for(f'api_{item_type}_description-{major}.schema.json')\ - .validate(item_json) - - # Assertion needed for mypy. If validation passed, this should not fail. - assert major is not None - - item_info: ItemInfo = ResourceInfo(item_json, major) \ - if item_type == 'resource' else MappingInfo(item_json, major) - - if item_info.identifier != identifier: - msg = _('item_{item}_in_file_{file}')\ - .format({'item': item_info.identifier, 'file': ver_file}) - raise ValueError(msg) - - if item_info.version != version: - ver_str = util.version_string(item_info.version) - msg = _('item_version_{ver}_in_file_{file}')\ - .format({'ver': ver_str, 'file': ver_file}) - raise ValueError(msg) - - if isinstance(item_info, ResourceInfo): - self._register_info(self.resource_infos, identifier, item_info) - elif isinstance(item_info, MappingInfo): - self._register_info(self.mapping_infos, identifier, item_info) - - @staticmethod - def _all_infos(infos: dict[str, VersionedItemInfo[VersionedType]]) \ - -> Iterable[VersionedType]: - """ - ........... - """ - for versioned_info in infos.values(): - for item_info in versioned_info.by_version.values(): - yield item_info - - def _report_missing(self) -> None: - """ - Use logger to print information about items that are referenced but - were not loaded. - """ - def report_missing_dependency(info: ResourceInfo, dep: str) -> None: - msg = _('no_dep_{resource}_{ver}_{dep}')\ - .format(dep=dep, resource=info.identifier, - ver=util.version_string(info.version)) - logging.error(msg) - - for resource_info in self._all_infos(self.resource_infos): - for dep in resource_info.dependencies: - if dep not in self.resource_infos: - report_missing_dependency(resource_info, dep) - - def report_missing_payload(info: MappingInfo, payload: str) -> None: - msg = _('no_payload_{mapping}_{ver}_{payload}')\ - .format(mapping=info.identifier, payload=payload, - ver=util.version_string(info.version)) - logging.error(msg) - - for mapping_info in self._all_infos(self.mapping_infos): - for payload in mapping_info.payloads.values(): - if payload not in self.resource_infos: - report_missing_payload(mapping_info, payload) - - def report_missing_mapping(info: ItemInfo, - required_mapping: str) -> None: - msg = _('no_mapping_{required_by}_{ver}_{required}')\ - .format(required_by=info.identifier, required=required_mapping, - ver=util.version_string(info.version)) - logging.error(msg) - - for item_info in (*self._all_infos(self.mapping_infos), - *self._all_infos(self.resource_infos)): - for required in item_info.required_mappings: - if required not in self.mapping_infos: - report_missing_mapping(item_info, required) - - def _finalize(self): - """ - Initialize structures needed to serve queries. Called once after all - data gets loaded. - """ - for versioned_info in (*self.mapping_infos.values(), - *self.resource_infos.values()): - versioned_info.known_versions.sort() - - for info in self._all_infos(self.mapping_infos): - for pattern in info.payloads: - try: - self.pattern_tree = \ - self.pattern_tree.register(pattern, info) - except Exception as e: - if flask.current_app._hydrilla_werror: - raise e from None - msg = _('couldnt_register_{mapping}_{ver}_{pattern}')\ - .format(mapping=info.identifier, pattern=pattern, - ver=util.version_string(info.version)) - logging.error(msg) - - def query(self, url: str) -> list[MappingInfo]: - """ - Return a list of registered mappings that match url. - - If multiple versions of a mapping are applicable, only the most recent - is included in the result. - """ - collected: dict[str, MappingInfo] = {} - for result_set in self.pattern_tree.search(url): - for wrapped_mapping_info in result_set: - info = wrapped_mapping_info.item - previous = collected.get(info.identifier) - if previous and previous.version > info.version: - continue - - collected[info.identifier] = info - - return list(collected.values()) bp = flask.Blueprint('bp', __package__) @@ -267,41 +75,36 @@ class HydrillaApp(flask.Flask): ] } - self._hydrilla_project_url = hydrilla_config['hydrilla_project_url'] self._hydrilla_port = hydrilla_config['port'] self._hydrilla_werror = hydrilla_config.get('werror', False) + verify_files = hydrilla_config.get('verify_files', True) if 'hydrilla_parent' in hydrilla_config: - raise ValueError("Option 'hydrilla_parent' is not implemented.") + raise HaketiloException(_('err.server.opt_hydrilla_parent_not_implemented')) - malcontent_dir = Path(hydrilla_config['malcontent_dir']).resolve() - with self.app_context(): - self._hydrilla_malcontent = Malcontent(malcontent_dir) + malcontent_dir_path = Path(hydrilla_config['malcontent_dir']).resolve() + self._hydrilla_malcontent = malcontent.Malcontent( + malcontent_dir_path = malcontent_dir_path, + werror = self._hydrilla_werror, + verify_files = verify_files + ) - self.register_blueprint(bp) + self.jinja_env.install_gettext_translations(make_translation()) - def create_jinja_environment(self, *args, **kwargs) \ - -> flask.templating.Environment: - """ - Flask's create_jinja_environment(), but tweaked to always include the - 'hydrilla_project_url' global variable and to install proper - translations. - """ - env = super().create_jinja_environment(*args, **kwargs) # type: ignore - env.install_gettext_translations(make_translation()) - env.globals['hydrilla_project_url'] = self._hydrilla_project_url + self.jinja_env.globals['hydrilla_project_url'] = \ + hydrilla_config['hydrilla_project_url'] - return env + self.register_blueprint(bp) def run(self, *args, **kwargs): """ - Flask's run(), but tweaked to use the port from hydrilla configuration - by default. + Flask's run() but tweaked to use the port from hydrilla configuration by + default. """ return super().run(*args, port=self._hydrilla_port, **kwargs) -def malcontent(): - return flask.current_app._hydrilla_malcontent +def get_malcontent() -> malcontent.Malcontent: + return t.cast(HydrillaApp, flask.current_app)._hydrilla_malcontent @bp.route('/') def index(): @@ -309,7 +112,8 @@ def index(): identifier_json_re = re.compile(r'^([-0-9a-z.]+)\.json$') -def get_resource_or_mapping(item_type: str, identifier: str) -> Response: +def get_resource_or_mapping(item_type: str, identifier: str) \ + -> werkzeug.Response: """ Strip '.json' from 'identifier', look the item up and send its JSON description. @@ -320,41 +124,73 @@ def get_resource_or_mapping(item_type: str, identifier: str) -> Response: identifier = match.group(1) + infos: t.Mapping[str, malcontent.VersionedItemInfo] if item_type == 'resource': - infos = malcontent().resource_infos + infos = get_malcontent().resource_infos else: - infos = malcontent().mapping_infos + infos = get_malcontent().mapping_infos versioned_info = infos.get(identifier) - info = versioned_info and versioned_info.get_by_ver() - if info is None: + if versioned_info is None: flask.abort(404) + info = versioned_info.newest_info() + # no need for send_from_directory(); path is safe, constructed by us - file_path = malcontent().malcontent_dir_path / item_type / info.path() + info_path = f'{info.identifier}/{versions.version_string(info.version)}' + file_path = get_malcontent().malcontent_dir_path / item_type / info_path return flask.send_file(open(file_path, 'rb'), mimetype='application/json') @bp.route('/mapping/') -def get_newest_mapping(identifier_dot_json: str) -> Response: +def get_newest_mapping(identifier_dot_json: str) -> werkzeug.Response: return get_resource_or_mapping('mapping', identifier_dot_json) @bp.route('/resource/') -def get_newest_resource(identifier_dot_json: str) -> Response: +def get_newest_resource(identifier_dot_json: str) -> werkzeug.Response: return get_resource_or_mapping('resource', identifier_dot_json) +def make_ref(info: item_infos.AnyInfo) -> dict[str, t.Any]: + ref: dict[str, t.Any] = { + 'version': info.version, + 'identifier': info.identifier, + 'long_name': info.long_name + } + + if isinstance(info, item_infos.ResourceInfo): + ref['revision'] = info.revision + + return ref + @bp.route('/query') def query(): url = flask.request.args['url'] - mapping_refs = [i.as_query_result() for i in malcontent().query(url)] + mapping_refs = [make_ref(info) for info in get_malcontent().query(url)] + result = { '$schema': 'https://hydrilla.koszko.org/schemas/api_query_result-1.schema.json', 'mappings': mapping_refs, 'generated_by': generated_by } - return Response(json.dumps(result), mimetype='application/json') + return werkzeug.Response(json.dumps(result), mimetype='application/json') + +@bp.route('/list_all') +def list_all_packages(): + malcontent = get_malcontent() + + resource_refs = [make_ref(info) for info in malcontent.get_all_resources()] + mapping_refs = [make_ref(info) for info in malcontent.get_all_mappings()] + + result = { + '$schema': 'https://hydrilla.koszko.org/schemas/api_package_list-2.schema.json', + 'resources': resource_refs, + 'mappings': mapping_refs, + 'generated_by': generated_by + } + + return werkzeug.Response(json.dumps(result), mimetype='application/json') @bp.route('/--help') def mm_help(): @@ -383,8 +219,12 @@ default_project_url = 'https://hydrillabugs.koszko.org/projects/hydrilla/wiki' @click.version_option(version=_version.version, prog_name='Hydrilla', message=_('%(prog)s_%(version)s_license'), help=_('version_printing')) -def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str], - port: Optional[int], config_path: Optional[str]) -> None: +def start( + malcontent_dir: t.Optional[str], + hydrilla_project_url: t.Optional[str], + port: t.Optional[int], + config_path: t.Optional[str] +) -> None: """ Run a development Hydrilla server. -- cgit v1.2.3