From 52d12a4fa124daa1595529e3e7008276a7986d95 Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Mon, 13 Jun 2022 11:06:49 +0200 Subject: unfinished partial work --- src/hydrilla/server/serve.py | 406 ++++++++++--------------------------------- 1 file changed, 88 insertions(+), 318 deletions(-) (limited to 'src/hydrilla/server/serve.py') diff --git a/src/hydrilla/server/serve.py b/src/hydrilla/server/serve.py index 779f3d2..8f0d557 100644 --- a/src/hydrilla/server/serve.py +++ b/src/hydrilla/server/serve.py @@ -36,16 +36,18 @@ import logging from pathlib import Path from hashlib import sha256 from abc import ABC, abstractmethod -from typing import Optional, Union, Iterable +from typing import Optional, Union, Iterable, TypeVar, Generic import click import flask from werkzeug import Response -from .. import util +from .. import _version, versions, json_instances +from ..item_infos import ResourceInfo, MappingInfo, VersionedItemInfo +from ..translations import smart_gettext as _, translation as make_translation +#from ..url_patterns import PatternTree from . import config -from . import _version here = Path(__file__).resolve().parent @@ -54,243 +56,20 @@ generated_by = { 'version': _version.version } -class ItemInfo(ABC): - """Shortened data of a resource/mapping.""" - def __init__(self, item_obj: dict, major_schema_version: int): - """Initialize ItemInfo using item definition read from JSON.""" - self.version = util.normalize_version(item_obj['version']) - self.identifier = item_obj['identifier'] - self.uuid = item_obj.get('uuid') - self.long_name = item_obj['long_name'] - - self.required_mappings = [] - if major_schema_version >= 2: - self.required_mappings = [map_ref['identifier'] for map_ref in - item_obj.get('required_mappings', [])] - - def path(self) -> str: - """ - Get a relative path to this item's JSON definition with respect to - directory containing items of this type. - """ - return f'{self.identifier}/{util.version_string(self.version)}' - -class ResourceInfo(ItemInfo): - """Shortened data of a resource.""" - def __init__(self, resource_obj: dict, major_schema_version: int): - """Initialize ResourceInfo using resource definition read from JSON.""" - super().__init__(resource_obj, major_schema_version) - - dependencies = resource_obj.get('dependencies', []) - self.dependencies = [res_ref['identifier'] for res_ref in dependencies] - -class MappingInfo(ItemInfo): - """Shortened data of a mapping.""" - def __init__(self, mapping_obj: dict, major_schema_version: int): - """Initialize MappingInfo using mapping definition read from JSON.""" - super().__init__(mapping_obj, major_schema_version) - - self.payloads = {} - for pattern, res_ref in mapping_obj.get('payloads', {}).items(): - self.payloads[pattern] = res_ref['identifier'] - - def as_query_result(self) -> str: - """ - Produce a json.dump()-able object describing this mapping as one of a - collection of query results. - """ - return { - 'version': self.version, - 'identifier': self.identifier, - 'long_name': self.long_name - } - -class VersionedItemInfo: - """Stores data of multiple versions of given resource/mapping.""" - def __init__(self): - self.uuid = None - self.identifier = None - self.by_version = {} - self.known_versions = [] - - def register(self, item_info: ItemInfo) -> None: - """ - Make item info queryable by version. Perform sanity checks for uuid. - """ - if self.identifier is None: - self.identifier = item_info.identifier - - if self.uuid is None: - self.uuid = item_info.uuid - - if self.uuid is not None and self.uuid != item_info.uuid: - raise ValueError(f_('uuid_mismatch_{identifier}') - .format(identifier=self.identifier)) - - ver = item_info.version - ver_str = util.version_string(ver) - - if ver_str in self.by_version: - raise ValueError(f_('version_clash_{identifier}_{version}') - .format(identifier=self.identifier, - version=ver_str)) - - self.by_version[ver_str] = item_info - self.known_versions.append(ver) - - def get_by_ver(self, ver: Optional[list[int]]=None) -> Optional[ItemInfo]: - """ - Find and return info of the newest version of item. - - If ver is specified, instead find and return info of that version of the - item (or None if absent). - """ - ver = util.version_string(ver or self.known_versions[-1]) - - return self.by_version.get(ver) - - def get_all(self) -> list[ItemInfo]: - """ - Return a list of item info for all its versions, from oldest ot newest. - """ - return [self.by_version[util.version_string(ver)] - for ver in self.known_versions] - -class PatternTreeNode: - """ - "Pattern Tree" is how we refer to the data structure used for querying - Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal - is to make it possible for given URL to quickly retrieve all known patterns - that match it. - """ - def __init__(self): - self.wildcard_matches = [None, None, None] - self.literal_match = None - self.children = {} - - def search(self, segments): - """ - Yields all matches of this segments sequence against the tree that - starts at this node. Results are produces in order from greatest to - lowest pattern specificity. - """ - nodes = [self] - - for segment in segments: - next_node = nodes[-1].children.get(segment) - if next_node is None: - break - - nodes.append(next_node) - - nsegments = len(segments) - cond_literal = lambda: len(nodes) == nsegments - cond_wildcard = [ - lambda: len(nodes) + 1 == nsegments and segments[-1] != '*', - lambda: len(nodes) + 1 < nsegments, - lambda: len(nodes) + 1 != nsegments or segments[-1] != '***' - ] - - while nodes: - node = nodes.pop() - - for item, condition in [(node.literal_match, cond_literal), - *zip(node.wildcard_matches, cond_wildcard)]: - if item is not None and condition(): - yield item - - def add(self, segments, item_instantiator): - """ - Make item queryable through (this branch of) the Pattern Tree. If there - was not yet any item associated with the tree path designated by - segments, create a new one using item_instantiator() function. Return - all items matching this path (both the ones that existed and the ones - just created). - """ - node = self - segment = None - - for segment in segments: - wildcards = node.wildcard_matches - - child = node.children.get(segment) or PatternTreeNode() - node.children[segment] = child - node = child - - if node.literal_match is None: - node.literal_match = item_instantiator() - - if segment not in ('*', '**', '***'): - return [node.literal_match] - - if wildcards[len(segment) - 1] is None: - wildcards[len(segment) - 1] = item_instantiator() - - return [node.literal_match, wildcards[len(segment) - 1]] - -proto_regex = re.compile(r'^(?P\w+)://(?P.*)$') -user_re = r'[^/?#@]+@' # r'(?P[^/?#@]+)@' # discarded for now -query_re = r'\??[^#]*' # r'\??(?P[^#]*)' # discarded for now -domain_re = r'(?P[^/?#]+)' -path_re = r'(?P[^?#]*)' -http_regex = re.compile(f'{domain_re}{path_re}{query_re}.*') -ftp_regex = re.compile(f'(?:{user_re})?{domain_re}{path_re}.*') - -class UrlError(ValueError): - """Used to report a URL or URL pattern that is invalid or unsupported.""" - pass - -class DeconstructedUrl: - """Represents a deconstructed URL or URL pattern""" - def __init__(self, url): - self.url = url - - match = proto_regex.match(url) - if not match: - raise UrlError(f_('invalid_URL_{}').format(url)) - - self.proto = match.group('proto') - if self.proto not in ('http', 'https', 'ftp'): - raise UrlError(f_('disallowed_protocol_{}').format(proto)) - - if self.proto == 'ftp': - match = ftp_regex.match(match.group('rest')) - elif self.proto in ('http', 'https'): - match = http_regex.match(match.group('rest')) - - if not match: - raise UrlError(f_('invalid_URL_{}').format(url)) - - self.domain = match.group('domain').split('.') - self.domain.reverse() - self.path = [*filter(None, match.group('path').split('/'))] - -class PatternMapping: - """ - A mapping info, together with one of its patterns, as stored in Pattern - Tree. - """ - def __init__(self, pattern: str, mapping_info: MappingInfo): - self.pattern = pattern - self.mapping_info = mapping_info - - def register(self, pattern_tree: dict): - """ - Make self queryable through the Pattern Tree passed in the argument. - """ - deco = DeconstructedUrl(self.pattern) - - domain_tree = pattern_tree.get(deco.proto) or PatternTreeNode() - pattern_tree[deco.proto] = domain_tree - - for path_tree in domain_tree.add(deco.domain, PatternTreeNode): - for match_list in path_tree.add(deco.path, list): - match_list.append(self) + # def as_query_result(self) -> dict[str, Union[str, list[int]]]: + # """ + # Produce a json.dump()-able object describing this mapping as one of a + # collection of query results. + # """ + # return { + # 'version': self.version, + # 'identifier': self.identifier, + # 'long_name': self.long_name + # } class Malcontent: """ - Instance of this class represents a directory with files that can be loaded - and served by Hydrilla. + Represent a directory with files that can be loaded and served by Hydrilla. """ def __init__(self, malcontent_dir_path: Path): """ @@ -298,13 +77,15 @@ class Malcontent: malcontent_dir_path for serveable site-modifying packages and loads them into its data structures. """ - self.infos = {'resource': {}, 'mapping': {}} - self.pattern_tree = {} + self.resource_infos: dict[str, VersionedItemInfo[ResourceInfo]] = {} + self.mapping_infos: dict[str, VersionedItemInfo[MappingInfo]] = {} + + self.pattern_tree: PatternTree[MappingInfo] = PatternTree() self.malcontent_dir_path = malcontent_dir_path if not self.malcontent_dir_path.is_dir(): - raise ValueError(f_('malcontent_dir_path_not_dir_{}') + raise ValueError(_('malcontent_dir_path_not_dir_{}') .format(malcontent_dir_path)) for item_type in ('mapping', 'resource'): @@ -323,18 +104,27 @@ class Malcontent: if flask.current_app._hydrilla_werror: raise e from None - msg = f_('couldnt_load_item_from_{}').format(ver_file) + msg = _('couldnt_load_item_from_{}').format(ver_file) logging.error(msg, exc_info=True) self._report_missing() self._finalize() + @staticmethod + def _register_info(infos: dict[str, VersionedItemInfo[VersionedType]], + identifier: str, item_info: VersionedType) -> None: + """ + ........... + """ + infos.setdefault(identifier, VersionedItemInfo())\ + .register(item_info) + def _load_item(self, item_type: str, ver_file: Path) -> None: """ Reads, validates and autocompletes serveable mapping/resource definition, then registers information from it in data structures. """ - version = util.parse_version(ver_file.name) + version = versions.parse_version(ver_file.name) identifier = ver_file.parent.name item_json, major = util.load_instance_from_file(ver_file) @@ -342,32 +132,35 @@ class Malcontent: util.validator_for(f'api_{item_type}_description-{major}.schema.json')\ .validate(item_json) - if item_type == 'resource': - item_info = ResourceInfo(item_json, major) - else: - item_info = MappingInfo(item_json, major) + # Assertion needed for mypy. If validation passed, this should not fail. + assert major is not None + + item_info: ItemInfo = ResourceInfo(item_json, major) \ + if item_type == 'resource' else MappingInfo(item_json, major) if item_info.identifier != identifier: - msg = f_('item_{item}_in_file_{file}')\ + msg = _('item_{item}_in_file_{file}')\ .format({'item': item_info.identifier, 'file': ver_file}) raise ValueError(msg) if item_info.version != version: ver_str = util.version_string(item_info.version) - msg = f_('item_version_{ver}_in_file_{file}')\ + msg = _('item_version_{ver}_in_file_{file}')\ .format({'ver': ver_str, 'file': ver_file}) raise ValueError(msg) - versioned_info = self.infos[item_type].get(identifier) - if versioned_info is None: - versioned_info = VersionedItemInfo() - self.infos[item_type][identifier] = versioned_info + if isinstance(item_info, ResourceInfo): + self._register_info(self.resource_infos, identifier, item_info) + elif isinstance(item_info, MappingInfo): + self._register_info(self.mapping_infos, identifier, item_info) - versioned_info.register(item_info) - - def _all_of_type(self, item_type: str) -> Iterable[ItemInfo]: - """Iterator over all registered versions of all mappings/resources.""" - for versioned_info in self.infos[item_type].values(): + @staticmethod + def _all_infos(infos: dict[str, VersionedItemInfo[VersionedType]]) \ + -> Iterable[VersionedType]: + """ + ........... + """ + for versioned_info in infos.values(): for item_info in versioned_info.by_version.values(): yield item_info @@ -377,38 +170,38 @@ class Malcontent: were not loaded. """ def report_missing_dependency(info: ResourceInfo, dep: str) -> None: - msg = f_('no_dep_{resource}_{ver}_{dep}')\ + msg = _('no_dep_{resource}_{ver}_{dep}')\ .format(dep=dep, resource=info.identifier, ver=util.version_string(info.version)) logging.error(msg) - for resource_info in self._all_of_type('resource'): + for resource_info in self._all_infos(self.resource_infos): for dep in resource_info.dependencies: - if dep not in self.infos['resource']: + if dep not in self.resource_infos: report_missing_dependency(resource_info, dep) def report_missing_payload(info: MappingInfo, payload: str) -> None: - msg = f_('no_payload_{mapping}_{ver}_{payload}')\ + msg = _('no_payload_{mapping}_{ver}_{payload}')\ .format(mapping=info.identifier, payload=payload, ver=util.version_string(info.version)) logging.error(msg) - for mapping_info in self._all_of_type('mapping'): + for mapping_info in self._all_infos(self.mapping_infos): for payload in mapping_info.payloads.values(): - if payload not in self.infos['resource']: + if payload not in self.resource_infos: report_missing_payload(mapping_info, payload) - def report_missing_mapping(info: Union[MappingInfo, ResourceInfo], + def report_missing_mapping(info: ItemInfo, required_mapping: str) -> None: msg = _('no_mapping_{required_by}_{ver}_{required}')\ .format(required_by=info.identifier, required=required_mapping, ver=util.version_string(info.version)) logging.error(msg) - for item_info in (*self._all_of_type('mapping'), - *self._all_of_type('resource')): + for item_info in (*self._all_infos(self.mapping_infos), + *self._all_infos(self.resource_infos)): for required in item_info.required_mappings: - if required not in self.infos['mapping']: + if required not in self.mapping_infos: report_missing_mapping(item_info, required) def _finalize(self): @@ -416,18 +209,19 @@ class Malcontent: Initialize structures needed to serve queries. Called once after all data gets loaded. """ - for infos_dict in self.infos.values(): - for versioned_info in infos_dict.values(): + for versioned_info in (*self.mapping_infos.values(), + *self.resource_infos.values()): versioned_info.known_versions.sort() - for info in self._all_of_type('mapping'): + for info in self._all_infos(self.mapping_infos): for pattern in info.payloads: try: - PatternMapping(pattern, info).register(self.pattern_tree) + self.pattern_tree = \ + self.pattern_tree.register(pattern, info) except Exception as e: if flask.current_app._hydrilla_werror: raise e from None - msg = f_('couldnt_register_{mapping}_{ver}_{pattern}')\ + msg = _('couldnt_register_{mapping}_{ver}_{pattern}')\ .format(mapping=info.identifier, pattern=pattern, ver=util.version_string(info.version)) logging.error(msg) @@ -439,27 +233,16 @@ class Malcontent: If multiple versions of a mapping are applicable, only the most recent is included in the result. """ - deco = DeconstructedUrl(url) - - collected = {} - - domain_tree = self.pattern_tree.get(deco.proto) or PatternTreeNode() - - def process_mapping(pattern_mapping: PatternMapping) -> None: - if url[-1] != '/' and pattern_mapping.pattern[-1] == '/': - return - - info = pattern_mapping.mapping_info + collected: dict[str, MappingInfo] = {} + for result_set in self.pattern_tree.search(url): + for wrapped_mapping_info in result_set: + info = wrapped_mapping_info.item + previous = collected.get(info.identifier) + if previous and previous.version > info.version: + continue - if info.identifier not in collected or \ - info.version > collected[info.identifier].version: collected[info.identifier] = info - for path_tree in domain_tree.search(deco.domain): - for matches_list in path_tree.search(deco.path): - for pattern_mapping in matches_list: - process_mapping(pattern_mapping) - return list(collected.values()) bp = flask.Blueprint('bp', __package__) @@ -484,8 +267,6 @@ class HydrillaApp(flask.Flask): ] } - self._hydrilla_translation = \ - util.translation(here / 'locales', hydrilla_config['language']) self._hydrilla_project_url = hydrilla_config['hydrilla_project_url'] self._hydrilla_port = hydrilla_config['port'] self._hydrilla_werror = hydrilla_config.get('werror', False) @@ -506,8 +287,8 @@ class HydrillaApp(flask.Flask): 'hydrilla_project_url' global variable and to install proper translations. """ - env = super().create_jinja_environment(*args, **kwargs) - env.install_gettext_translations(self._hydrilla_translation) + env = super().create_jinja_environment(*args, **kwargs) # type: ignore + env.install_gettext_translations(make_translation()) env.globals['hydrilla_project_url'] = self._hydrilla_project_url return env @@ -519,9 +300,6 @@ class HydrillaApp(flask.Flask): """ return super().run(*args, port=self._hydrilla_port, **kwargs) -def f_(text_key): - return flask.current_app._hydrilla_translation.gettext(text_key) - def malcontent(): return flask.current_app._hydrilla_malcontent @@ -542,7 +320,12 @@ def get_resource_or_mapping(item_type: str, identifier: str) -> Response: identifier = match.group(1) - versioned_info = malcontent().infos[item_type].get(identifier) + if item_type == 'resource': + infos = malcontent().resource_infos + else: + infos = malcontent().mapping_infos + + versioned_info = infos.get(identifier) info = versioned_info and versioned_info.get_by_ver() if info is None: @@ -586,9 +369,6 @@ default_config_path = Path('/etc/hydrilla/config.json') default_malcontent_dir = '/var/lib/hydrilla/malcontent' default_project_url = 'https://hydrillabugs.koszko.org/projects/hydrilla/wiki' -console_gettext = util.translation(here / 'locales').gettext -_ = console_gettext - @click.command(help=_('serve_hydrilla_packages_explain_wsgi_considerations')) @click.option('-m', '--malcontent-dir', type=click.Path(exists=True, file_okay=False), @@ -600,24 +380,21 @@ _ = console_gettext @click.option('-c', '--config', 'config_path', type=click.Path(exists=True, dir_okay=False, resolve_path=True), help=_('path_to_config_file_explain_default')) -@click.option('-l', '--language', type=click.STRING, - help=_('language_to_use_overrides_config')) @click.version_option(version=_version.version, prog_name='Hydrilla', message=_('%(prog)s_%(version)s_license'), help=_('version_printing')) def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str], - port: Optional[int], config_path: Optional[str], - language: Optional[str]) -> None: + port: Optional[int], config_path: Optional[str]) -> None: """ Run a development Hydrilla server. This command is meant to be the entry point of hydrilla command exported by this package. """ - config_load_opts = {} if config_path is None \ - else {'config_path': [Path(config_path)]} - - hydrilla_config = config.load(**config_load_opts) + if config_path is None: + hydrilla_config = config.load() + else: + hydrilla_config = config.load(config_paths=[Path(config_path)]) if malcontent_dir is not None: hydrilla_config['malcontent_dir'] = str(Path(malcontent_dir).resolve()) @@ -628,14 +405,7 @@ def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str], if port is not None: hydrilla_config['port'] = port - if language is not None: - hydrilla_config['language'] = language - - lang = hydrilla_config.get('language') - _ = console_gettext if lang is None else \ - util.translation(here / 'locales', lang).gettext - - for opt in ('malcontent_dir', 'hydrilla_project_url', 'port', 'language'): + for opt in ('malcontent_dir', 'hydrilla_project_url', 'port'): if opt not in hydrilla_config: raise ValueError(_('config_option_{}_not_supplied').format(opt)) @@ -649,7 +419,7 @@ def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str], @click.version_option(version=_version.version, prog_name='Hydrilla', message=_('%(prog)s_%(version)s_license'), help=_('version_printing')) -def start_wsgi() -> None: +def start_wsgi() -> flask.Flask: """ Create application object for use in WSGI deployment. -- cgit v1.2.3