diff options
Diffstat (limited to 'src/hydrilla/server/serve.py')
-rw-r--r-- | src/hydrilla/server/serve.py | 560 |
1 files changed, 98 insertions, 462 deletions
diff --git a/src/hydrilla/server/serve.py b/src/hydrilla/server/serve.py index a6a1204..68dde7a 100644 --- a/src/hydrilla/server/serve.py +++ b/src/hydrilla/server/serve.py @@ -21,429 +21,35 @@ # # # I, Wojtek Kosior, thereby promise not to sue for violation of this -# file's license. Although I request that you do not make use this code -# in a proprietary program, I am not going to enforce this in court. - -# Enable using with Python 3.7. -from __future__ import annotations +# file's license. Although I request that you do not make use of this +# code in a proprietary program, I am not going to enforce this in +# court. import re import os -import pathlib import json -import logging +import typing as t from pathlib import Path -from hashlib import sha256 -from abc import ABC, abstractmethod -from typing import Optional, Union, Iterable import click import flask +import werkzeug -from werkzeug import Response - -from .. import util +from ..exceptions import HaketiloException +from .. import _version +from ..translations import smart_gettext as _, translation as make_translation +from .. import versions +from .. import item_infos from . import config -from . import _version +from . import malcontent -here = Path(__file__).resolve().parent generated_by = { 'name': 'hydrilla.server', 'version': _version.version } -class ItemInfo(ABC): - """Shortened data of a resource/mapping.""" - def __init__(self, item_obj: dict): - """Initialize ItemInfo using item definition read from JSON.""" - self.version = util.normalize_version(item_obj['version']) - self.identifier = item_obj['identifier'] - self.uuid = item_obj.get('uuid') - self.long_name = item_obj['long_name'] - - def path(self) -> str: - """ - Get a relative path to this item's JSON definition with respect to - directory containing items of this type. - """ - return f'{self.identifier}/{util.version_string(self.version)}' - -class ResourceInfo(ItemInfo): - """Shortened data of a resource.""" - def __init__(self, resource_obj: dict): - """Initialize ResourceInfo using resource definition read from JSON.""" - super().__init__(resource_obj) - - dependencies = resource_obj.get('dependencies', []) - self.dependencies = [res_ref['identifier'] for res_ref in dependencies] - -class MappingInfo(ItemInfo): - """Shortened data of a mapping.""" - def __init__(self, mapping_obj: dict): - """Initialize MappingInfo using mapping definition read from JSON.""" - super().__init__(mapping_obj) - - self.payloads = {} - for pattern, res_ref in mapping_obj.get('payloads', {}).items(): - self.payloads[pattern] = res_ref['identifier'] - - def as_query_result(self) -> str: - """ - Produce a json.dump()-able object describing this mapping as one of a - collection of query results. - """ - return { - 'version': self.version, - 'identifier': self.identifier, - 'long_name': self.long_name - } - -class VersionedItemInfo: - """Stores data of multiple versions of given resource/mapping.""" - def __init__(self): - self.uuid = None - self.identifier = None - self.by_version = {} - self.known_versions = [] - - def register(self, item_info: ItemInfo) -> None: - """ - Make item info queryable by version. Perform sanity checks for uuid. - """ - if self.identifier is None: - self.identifier = item_info.identifier - - if self.uuid is None: - self.uuid = item_info.uuid - - if self.uuid is not None and self.uuid != item_info.uuid: - raise ValueError(f_('uuid_mismatch_{identifier}') - .format(identifier=self.identifier)) - - ver = item_info.version - ver_str = util.version_string(ver) - - if ver_str in self.by_version: - raise ValueError(f_('version_clash_{identifier}_{version}') - .format(identifier=self.identifier, - version=ver_str)) - - self.by_version[ver_str] = item_info - self.known_versions.append(ver) - - def get_by_ver(self, ver: Optional[list[int]]=None) -> Optional[ItemInfo]: - """ - Find and return info of the newest version of item. - - If ver is specified, instead find and return info of that version of the - item (or None if absent). - """ - ver = util.version_string(ver or self.known_versions[-1]) - - return self.by_version.get(ver) - - def get_all(self) -> list[ItemInfo]: - """ - Return a list of item info for all its versions, from oldest ot newest. - """ - return [self.by_version[util.version_string(ver)] - for ver in self.known_versions] - -class PatternTreeNode: - """ - "Pattern Tree" is how we refer to the data structure used for querying - Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal - is to make it possible for given URL to quickly retrieve all known patterns - that match it. - """ - def __init__(self): - self.wildcard_matches = [None, None, None] - self.literal_match = None - self.children = {} - - def search(self, segments): - """ - Yields all matches of this segments sequence against the tree that - starts at this node. Results are produces in order from greatest to - lowest pattern specificity. - """ - nodes = [self] - - for segment in segments: - next_node = nodes[-1].children.get(segment) - if next_node is None: - break - - nodes.append(next_node) - - nsegments = len(segments) - cond_literal = lambda: len(nodes) == nsegments - cond_wildcard = [ - lambda: len(nodes) + 1 == nsegments and segments[-1] != '*', - lambda: len(nodes) + 1 < nsegments, - lambda: len(nodes) + 1 != nsegments or segments[-1] != '***' - ] - - while nodes: - node = nodes.pop() - - for item, condition in [(node.literal_match, cond_literal), - *zip(node.wildcard_matches, cond_wildcard)]: - if item is not None and condition(): - yield item - - def add(self, segments, item_instantiator): - """ - Make item queryable through (this branch of) the Pattern Tree. If there - was not yet any item associated with the tree path designated by - segments, create a new one using item_instantiator() function. Return - all items matching this path (both the ones that existed and the ones - just created). - """ - node = self - segment = None - - for segment in segments: - wildcards = node.wildcard_matches - - child = node.children.get(segment) or PatternTreeNode() - node.children[segment] = child - node = child - - if node.literal_match is None: - node.literal_match = item_instantiator() - - if segment not in ('*', '**', '***'): - return [node.literal_match] - - if wildcards[len(segment) - 1] is None: - wildcards[len(segment) - 1] = item_instantiator() - - return [node.literal_match, wildcards[len(segment) - 1]] - -proto_regex = re.compile(r'^(?P<proto>\w+)://(?P<rest>.*)$') -user_re = r'[^/?#@]+@' # r'(?P<user>[^/?#@]+)@' # discarded for now -query_re = r'\??[^#]*' # r'\??(?P<query>[^#]*)' # discarded for now -domain_re = r'(?P<domain>[^/?#]+)' -path_re = r'(?P<path>[^?#]*)' -http_regex = re.compile(f'{domain_re}{path_re}{query_re}.*') -ftp_regex = re.compile(f'(?:{user_re})?{domain_re}{path_re}.*') - -class UrlError(ValueError): - """Used to report a URL or URL pattern that is invalid or unsupported.""" - pass - -class DeconstructedUrl: - """Represents a deconstructed URL or URL pattern""" - def __init__(self, url): - self.url = url - - match = proto_regex.match(url) - if not match: - raise UrlError(f_('invalid_URL_{}').format(url)) - - self.proto = match.group('proto') - if self.proto not in ('http', 'https', 'ftp'): - raise UrlError(f_('disallowed_protocol_{}').format(proto)) - - if self.proto == 'ftp': - match = ftp_regex.match(match.group('rest')) - elif self.proto in ('http', 'https'): - match = http_regex.match(match.group('rest')) - - if not match: - raise UrlError(f_('invalid_URL_{}').format(url)) - - self.domain = match.group('domain').split('.') - self.domain.reverse() - self.path = [*filter(None, match.group('path').split('/'))] - -class PatternMapping: - """ - A mapping info, together with one of its patterns, as stored in Pattern - Tree. - """ - def __init__(self, pattern: str, mapping_info: MappingInfo): - self.pattern = pattern - self.mapping_info = mapping_info - - def register(self, pattern_tree: dict): - """ - Make self queryable through the Pattern Tree passed in the argument. - """ - deco = DeconstructedUrl(self.pattern) - - domain_tree = pattern_tree.get(deco.proto) or PatternTreeNode() - pattern_tree[deco.proto] = domain_tree - - for path_tree in domain_tree.add(deco.domain, PatternTreeNode): - for match_list in path_tree.add(deco.path, list): - match_list.append(self) - -class Malcontent: - """ - Instance of this class represents a directory with files that can be loaded - and served by Hydrilla. - """ - def __init__(self, malcontent_dir_path: Path): - """ - When an instance of Malcontent is constructed, it searches - malcontent_dir_path for serveable site-modifying packages and loads - them into its data structures. - """ - self.infos = {'resource': {}, 'mapping': {}} - self.pattern_tree = {} - - self.malcontent_dir_path = malcontent_dir_path - - if not self.malcontent_dir_path.is_dir(): - raise ValueError(f_('malcontent_dir_path_not_dir_{}') - .format(malcontent_dir_path)) - - for item_type in ('mapping', 'resource'): - type_path = self.malcontent_dir_path / item_type - if not type_path.is_dir(): - continue - - for subpath in type_path.iterdir(): - if not subpath.is_dir(): - continue - - for ver_file in subpath.iterdir(): - try: - self._load_item(item_type, ver_file) - except Exception as e: - if flask.current_app._hydrilla_werror: - raise e from None - - msg = f_('couldnt_load_item_from_{}').format(ver_file) - logging.error(msg, exc_info=True) - - self._report_missing() - self._finalize() - - def _load_item(self, item_type: str, ver_file: Path) -> None: - """ - Reads, validates and autocompletes serveable mapping/resource - definition, then registers information from it in data structures. - """ - version = util.parse_version(ver_file.name) - identifier = ver_file.parent.name - - with open(ver_file, 'rt') as file_handle: - item_json = json.load(file_handle) - - util.validator_for(f'api_{item_type}_description-1.0.1.schema.json')\ - .validate(item_json) - - if item_type == 'resource': - item_info = ResourceInfo(item_json) - else: - item_info = MappingInfo(item_json) - - if item_info.identifier != identifier: - msg = f_('item_{item}_in_file_{file}')\ - .format({'item': item_info.identifier, 'file': ver_file}) - raise ValueError(msg) - - if item_info.version != version: - ver_str = util.version_string(item_info.version) - msg = f_('item_version_{ver}_in_file_{file}')\ - .format({'ver': ver_str, 'file': ver_file}) - raise ValueError(msg) - - versioned_info = self.infos[item_type].get(identifier) - if versioned_info is None: - versioned_info = VersionedItemInfo() - self.infos[item_type][identifier] = versioned_info - - versioned_info.register(item_info) - - def _all_of_type(self, item_type: str) -> Iterable[ItemInfo]: - """Iterator over all registered versions of all mappings/resources.""" - for versioned_info in self.infos[item_type].values(): - for item_info in versioned_info.by_version.values(): - yield item_info - - def _report_missing(self) -> None: - """ - Use logger to print information about items that are referenced but - were not loaded. - """ - def report_missing_dependency(info: ResourceInfo, dep: str) -> None: - msg = f_('no_dep_{resource}_{ver}_{dep}')\ - .format(dep=dep, resource=info.identifier, - ver=util.version_string(info.version)) - logging.error(msg) - - for resource_info in self._all_of_type('resource'): - for dep in resource_info.dependencies: - if dep not in self.infos['resource']: - report_missing_dependency(resource_info, dep) - - def report_missing_payload(info: MappingInfo, payload: str) -> None: - msg = f_('no_payload_{mapping}_{ver}_{payload}')\ - .format(mapping=info.identifier, payload=payload, - ver=util.version_string(info.version)) - logging.error(msg) - - for mapping_info in self._all_of_type('mapping'): - for payload in mapping_info.payloads.values(): - if payload not in self.infos['resource']: - report_missing_payload(mapping_info, payload) - - def _finalize(self): - """ - Initialize structures needed to serve queries. Called once after all - data gets loaded. - """ - for infos_dict in self.infos.values(): - for versioned_info in infos_dict.values(): - versioned_info.known_versions.sort() - - for info in self._all_of_type('mapping'): - for pattern in info.payloads: - try: - PatternMapping(pattern, info).register(self.pattern_tree) - except Exception as e: - if flask.current_app._hydrilla_werror: - raise e from None - msg = f_('couldnt_register_{mapping}_{ver}_{pattern}')\ - .format(mapping=info.identifier, pattern=pattern, - ver=util.version_string(info.version)) - logging.error(msg) - - def query(self, url: str) -> list[MappingInfo]: - """ - Return a list of registered mappings that match url. - - If multiple versions of a mapping are applicable, only the most recent - is included in the result. - """ - deco = DeconstructedUrl(url) - - collected = {} - - domain_tree = self.pattern_tree.get(deco.proto) or PatternTreeNode() - - def process_mapping(pattern_mapping: PatternMapping) -> None: - if url[-1] != '/' and pattern_mapping.pattern[-1] == '/': - return - - info = pattern_mapping.mapping_info - - if info.identifier not in collected or \ - info.version > collected[info.identifier].version: - collected[info.identifier] = info - - for path_tree in domain_tree.search(deco.domain): - for matches_list in path_tree.search(deco.path): - for pattern_mapping in matches_list: - process_mapping(pattern_mapping) - - return list(collected.values()) bp = flask.Blueprint('bp', __package__) @@ -467,46 +73,36 @@ class HydrillaApp(flask.Flask): ] } - self._hydrilla_translation = \ - util.translation(here / 'locales', hydrilla_config['language']) - self._hydrilla_project_url = hydrilla_config['hydrilla_project_url'] self._hydrilla_port = hydrilla_config['port'] self._hydrilla_werror = hydrilla_config.get('werror', False) + verify_files = hydrilla_config.get('verify_files', True) if 'hydrilla_parent' in hydrilla_config: - raise ValueError("Option 'hydrilla_parent' is not implemented.") + raise HaketiloException(_('err.server.opt_hydrilla_parent_not_implemented')) - malcontent_dir = Path(hydrilla_config['malcontent_dir']).resolve() - with self.app_context(): - self._hydrilla_malcontent = Malcontent(malcontent_dir) + malcontent_dir_path = Path(hydrilla_config['malcontent_dir']).resolve() + self._hydrilla_malcontent = malcontent.Malcontent( + malcontent_dir_path = malcontent_dir_path, + werror = self._hydrilla_werror, + verify_files = verify_files + ) - self.register_blueprint(bp) + self.jinja_env.install_gettext_translations(make_translation()) - def create_jinja_environment(self, *args, **kwargs) \ - -> flask.templating.Environment: - """ - Flask's create_jinja_environment(), but tweaked to always include the - 'hydrilla_project_url' global variable and to install proper - translations. - """ - env = super().create_jinja_environment(*args, **kwargs) - env.install_gettext_translations(self._hydrilla_translation) - env.globals['hydrilla_project_url'] = self._hydrilla_project_url + self.jinja_env.globals['hydrilla_project_url'] = \ + hydrilla_config['hydrilla_project_url'] - return env + self.register_blueprint(bp) def run(self, *args, **kwargs): """ - Flask's run(), but tweaked to use the port from hydrilla configuration - by default. + Flask's run() but tweaked to use the port from hydrilla configuration by + default. """ return super().run(*args, port=self._hydrilla_port, **kwargs) -def f_(text_key): - return flask.current_app._hydrilla_translation.gettext(text_key) - -def malcontent(): - return flask.current_app._hydrilla_malcontent +def get_malcontent() -> malcontent.Malcontent: + return t.cast(HydrillaApp, flask.current_app)._hydrilla_malcontent @bp.route('/') def index(): @@ -514,7 +110,8 @@ def index(): identifier_json_re = re.compile(r'^([-0-9a-z.]+)\.json$') -def get_resource_or_mapping(item_type: str, identifier: str) -> Response: +def get_resource_or_mapping(item_type: str, identifier: str) \ + -> werkzeug.Response: """ Strip '.json' from 'identifier', look the item up and send its JSON description. @@ -525,36 +122,84 @@ def get_resource_or_mapping(item_type: str, identifier: str) -> Response: identifier = match.group(1) - versioned_info = malcontent().infos[item_type].get(identifier) + infos: t.Mapping[str, item_infos.VersionedItemInfo] + if item_type == 'resource': + infos = get_malcontent().resource_infos + else: + infos = get_malcontent().mapping_infos - info = versioned_info and versioned_info.get_by_ver() - if info is None: + versioned_info = infos.get(identifier) + + if versioned_info is None: flask.abort(404) + info = versioned_info.newest_info + # no need for send_from_directory(); path is safe, constructed by us - file_path = malcontent().malcontent_dir_path / item_type / info.path() - return flask.send_file(open(file_path, 'rb'), mimetype='application/json') + info_path = f'{info.identifier}/{versions.version_string(info.version)}' + file_path = get_malcontent().malcontent_dir_path / item_type / info_path + + if flask.__version__[0:2] in ('0.', '1.'): + caching_args = {'add_etags': False, 'cache_timeout': 0} + else: + caching_args = {'etag': False} + + return flask.send_file( + str(file_path), + mimetype = 'application/json', + conditional = False, + **caching_args # type: ignore + ) @bp.route('/mapping/<string:identifier_dot_json>') -def get_newest_mapping(identifier_dot_json: str) -> Response: +def get_newest_mapping(identifier_dot_json: str) -> werkzeug.Response: return get_resource_or_mapping('mapping', identifier_dot_json) @bp.route('/resource/<string:identifier_dot_json>') -def get_newest_resource(identifier_dot_json: str) -> Response: +def get_newest_resource(identifier_dot_json: str) -> werkzeug.Response: return get_resource_or_mapping('resource', identifier_dot_json) +def make_ref(info: item_infos.AnyInfo) -> t.Dict[str, t.Any]: + ref: t.Dict[str, t.Any] = { + 'version': info.version, + 'identifier': info.identifier, + 'long_name': info.long_name + } + + if isinstance(info, item_infos.ResourceInfo): + ref['revision'] = info.revision + + return ref + @bp.route('/query') def query(): url = flask.request.args['url'] - mapping_refs = [i.as_query_result() for i in malcontent().query(url)] + mapping_refs = [make_ref(info) for info in get_malcontent().query(url)] + result = { '$schema': 'https://hydrilla.koszko.org/schemas/api_query_result-1.schema.json', 'mappings': mapping_refs, 'generated_by': generated_by } - return Response(json.dumps(result), mimetype='application/json') + return werkzeug.Response(json.dumps(result), mimetype='application/json') + +@bp.route('/list_all') +def list_all_packages(): + malcontent = get_malcontent() + + resource_refs = [make_ref(info) for info in malcontent.get_all_resources()] + mapping_refs = [make_ref(info) for info in malcontent.get_all_mappings()] + + result = { + '$schema': 'https://hydrilla.koszko.org/schemas/api_package_list-2.schema.json', + 'resources': resource_refs, + 'mappings': mapping_refs, + 'generated_by': generated_by + } + + return werkzeug.Response(json.dumps(result), mimetype='application/json') @bp.route('/--help') def mm_help(): @@ -569,9 +214,6 @@ default_config_path = Path('/etc/hydrilla/config.json') default_malcontent_dir = '/var/lib/hydrilla/malcontent' default_project_url = 'https://hydrillabugs.koszko.org/projects/hydrilla/wiki' -console_gettext = util.translation(here / 'locales').gettext -_ = console_gettext - @click.command(help=_('serve_hydrilla_packages_explain_wsgi_considerations')) @click.option('-m', '--malcontent-dir', type=click.Path(exists=True, file_okay=False), @@ -583,24 +225,25 @@ _ = console_gettext @click.option('-c', '--config', 'config_path', type=click.Path(exists=True, dir_okay=False, resolve_path=True), help=_('path_to_config_file_explain_default')) -@click.option('-l', '--language', type=click.STRING, - help=_('language_to_use_overrides_config')) @click.version_option(version=_version.version, prog_name='Hydrilla', message=_('%(prog)s_%(version)s_license'), help=_('version_printing')) -def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str], - port: Optional[int], config_path: Optional[str], - language: Optional[str]) -> None: +def start( + malcontent_dir: t.Optional[str], + hydrilla_project_url: t.Optional[str], + port: t.Optional[int], + config_path: t.Optional[str] +) -> None: """ Run a development Hydrilla server. This command is meant to be the entry point of hydrilla command exported by this package. """ - config_load_opts = {} if config_path is None \ - else {'config_path': [Path(config_path)]} - - hydrilla_config = config.load(**config_load_opts) + if config_path is None: + hydrilla_config = config.load() + else: + hydrilla_config = config.load(config_paths=[Path(config_path)]) if malcontent_dir is not None: hydrilla_config['malcontent_dir'] = str(Path(malcontent_dir).resolve()) @@ -611,14 +254,7 @@ def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str], if port is not None: hydrilla_config['port'] = port - if language is not None: - hydrilla_config['language'] = language - - lang = hydrilla_config.get('language') - _ = console_gettext if lang is None else \ - util.translation(here / 'locales', lang).gettext - - for opt in ('malcontent_dir', 'hydrilla_project_url', 'port', 'language'): + for opt in ('malcontent_dir', 'hydrilla_project_url', 'port'): if opt not in hydrilla_config: raise ValueError(_('config_option_{}_not_supplied').format(opt)) @@ -632,7 +268,7 @@ def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str], @click.version_option(version=_version.version, prog_name='Hydrilla', message=_('%(prog)s_%(version)s_license'), help=_('version_printing')) -def start_wsgi() -> None: +def start_wsgi() -> flask.Flask: """ Create application object for use in WSGI deployment. |