diff options
Diffstat (limited to 'src/hydrilla/server')
-rw-r--r-- | src/hydrilla/server/__init__.py | 8 | ||||
-rw-r--r-- | src/hydrilla/server/__main__.py | 9 | ||||
-rw-r--r-- | src/hydrilla/server/_version.py | 5 | ||||
-rw-r--r-- | src/hydrilla/server/config.json | 36 | ||||
-rw-r--r-- | src/hydrilla/server/config.py | 117 | ||||
-rw-r--r-- | src/hydrilla/server/locales/en_US/LC_MESSAGES/hydrilla-messages.po | 147 | ||||
-rw-r--r-- | src/hydrilla/server/serve.py | 642 | ||||
-rw-r--r-- | src/hydrilla/server/templates/base.html | 123 | ||||
-rw-r--r-- | src/hydrilla/server/templates/index.html | 30 |
9 files changed, 1117 insertions, 0 deletions
diff --git a/src/hydrilla/server/__init__.py b/src/hydrilla/server/__init__.py new file mode 100644 index 0000000..7bd71ea --- /dev/null +++ b/src/hydrilla/server/__init__.py @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org> +# +# Available under the terms of Creative Commons Zero v1.0 Universal. + +from . import config +from .serve import start, start_wsgi diff --git a/src/hydrilla/server/__main__.py b/src/hydrilla/server/__main__.py new file mode 100644 index 0000000..037b388 --- /dev/null +++ b/src/hydrilla/server/__main__.py @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org> +# +# Available under the terms of Creative Commons Zero v1.0 Universal. + +from . import serve + +serve.start() diff --git a/src/hydrilla/server/_version.py b/src/hydrilla/server/_version.py new file mode 100644 index 0000000..d953eef --- /dev/null +++ b/src/hydrilla/server/_version.py @@ -0,0 +1,5 @@ +# coding: utf-8 +# file generated by setuptools_scm +# don't change, don't track in version control +version = '1.0' +version_tuple = (1, 0) diff --git a/src/hydrilla/server/config.json b/src/hydrilla/server/config.json new file mode 100644 index 0000000..bde341c --- /dev/null +++ b/src/hydrilla/server/config.json @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: CC0-1.0 + +// Default Hydrilla config file. +// +// Copyright (C) 2021, 2022 Wojtek Kosior +// +// Available under the terms of Creative Commons Zero v1.0 Universal. + +{ + // Relative path to directory from which Hydrilla will load packages + // metadata and serve files. + // Deliberately avoiding word "content", see: + // http://www.gnu.org/philosophy/words-to-avoid.en.html#Content + "malcontent_dir": "/var/lib/hydrilla/malcontent", + + // Hydrilla will display this link to users as a place where they can + // obtain sources for its software. This config option is meant to ease + // compliance with the AGPL. + "hydrilla_project_url": "https://hydrillabugs.koszko.org/projects/hydrilla/wiki", + + // Tell Hydrilla to look for additional configuration in those files, in + // this order. Raise an error if the file does not exist. + //"use_configs": ["/etc/hydrilla/config.json"], + + // Same as above but don't raise an error if the file does not exist. + "try_configs": ["/etc/hydrilla/config.json"], + + // What port to listen on (if not being run through WSGI). + "port": 10112, + + // What localization to use for console messages and served HTML files. + "language": "en_US", + + // Whether to exit upon emitting a warning. + "werror": false +} diff --git a/src/hydrilla/server/config.py b/src/hydrilla/server/config.py new file mode 100644 index 0000000..1edd070 --- /dev/null +++ b/src/hydrilla/server/config.py @@ -0,0 +1,117 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +# Loading Hydrilla server configuration file. +# +# This file is part of Hydrilla +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +# Enable using with Python 3.7. +from __future__ import annotations + +import json + +from pathlib import Path + +import jsonschema + +from .. import util + +config_schema = { + '$schema': 'http://json-schema.org/draft-07/schema#', + 'type': 'object', + 'properties': { + 'malcontent_dir': { + 'type': 'string' + }, + 'malcontent_dir': { + 'type': 'string' + }, + 'hydrilla_project_url': { + 'type': 'string' + }, + 'try_configs': { + 'type': 'array', + 'items': { + 'type': 'string' + } + }, + 'use_configs': { + 'type': 'array', + 'items': { + 'type': 'string' + } + }, + 'port': { + 'type': 'integer', + 'minimum': 0, + 'maximum': 65535 + }, + 'werror': { + 'type': 'boolean' + } + } +} + +here = Path(__file__).resolve().parent + +def load(config_paths: list[Path]=[here / 'config.json'], + can_fail: list[bool]=[]) -> dict: + config = {} + + bools_missing = max(0, len(config_paths) - len(can_fail)) + config_paths = [*config_paths] + can_fail = [*can_fail[:len(config_paths)], *([False] * bools_missing)] + + while config_paths: + path = config_paths.pop() + fail_ok = can_fail.pop() + + try: + json_text = path.read_text() + except Exception as e: + if fail_ok: + continue + raise e from None + + new_config = json.loads(util.strip_json_comments(json_text)) + jsonschema.validate(new_config, config_schema) + + config.update(new_config) + + if 'malcontent_dir' in config: + malcontent_dir = Path(config['malcontent_dir']) + if not malcontent_dir.is_absolute(): + malcontent_dir = path.parent / malcontent_dir + + config['malcontent_dir'] = str(malcontent_dir.resolve()) + + for key, failure_ok in [('try_configs', True), ('use_configs', False)]: + paths = new_config.get(key, []) + paths.reverse() + config_paths.extend(paths) + can_fail.extend([failure_ok] * len(paths)) + + for key in ('try_configs', 'use_configs'): + if key in config: + config.pop(key) + + return config diff --git a/src/hydrilla/server/locales/en_US/LC_MESSAGES/hydrilla-messages.po b/src/hydrilla/server/locales/en_US/LC_MESSAGES/hydrilla-messages.po new file mode 100644 index 0000000..7ea930a --- /dev/null +++ b/src/hydrilla/server/locales/en_US/LC_MESSAGES/hydrilla-messages.po @@ -0,0 +1,147 @@ +# SPDX-License-Identifier: CC0-1.0 +# +# English (United States) translations for hydrilla. +# Copyright (C) 2021, 2022 Wojtek Kosior <koszko@koszko.org> +# Available under the terms of Creative Commons Zero v1.0 Universal. +msgid "" +msgstr "" +"Project-Id-Version: hydrilla.builder 0.1\n" +"Report-Msgid-Bugs-To: koszko@koszko.org\n" +"POT-Creation-Date: 2022-04-22 17:09+0200\n" +"PO-Revision-Date: 2022-02-12 00:00+0000\n" +"Last-Translator: Wojtek Kosior <koszko@koszko.org>\n" +"Language: en_US\n" +"Language-Team: en_US <koszko@koszko.org>\n" +"Plural-Forms: nplurals=2; plural=(n != 1)\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.8.0\n" + +#: src/hydrilla/server/serve.py:122 +#, python-brace-format +msgid "uuid_mismatch_{identifier}" +msgstr "Two different uuids were specified for item '{identifier}'." + +#: src/hydrilla/server/serve.py:129 +#, python-brace-format +msgid "version_clash_{identifier}_{version}" +msgstr "Version '{version}' specified more than once for item '{identifier}'." + +#: src/hydrilla/server/serve.py:245 src/hydrilla/server/serve.py:257 +msgid "invalid_URL_{}" +msgstr "Invalid URL/pattern: '{}'." + +#: src/hydrilla/server/serve.py:249 +msgid "disallowed_protocol_{}" +msgstr "Disallowed protocol: '{}'." + +#: src/hydrilla/server/serve.py:302 +msgid "malcontent_dir_path_not_dir_{}" +msgstr "Provided 'malcontent_dir' path does not name a directory: {}" + +#: src/hydrilla/server/serve.py:321 +msgid "couldnt_load_item_from_{}" +msgstr "Couldn't load item from {}." + +#: src/hydrilla/server/serve.py:347 +msgid "item_{item}_in_file_{file}" +msgstr "Item {item} incorrectly present under {file}." + +#: src/hydrilla/server/serve.py:353 +msgid "item_version_{ver}_in_file_{file}" +msgstr "Item version {ver} incorrectly present under {file}." + +#: src/hydrilla/server/serve.py:376 +msgid "no_dep_{resource}_{ver}_{dep}" +msgstr "Unknown dependency '{dep}' of resource '{resource}', version '{ver}'." + +#: src/hydrilla/server/serve.py:387 +msgid "no_payload_{mapping}_{ver}_{payload}" +msgstr "Unknown payload '{payload}' of mapping '{mapping}', version '{ver}'." + +#: src/hydrilla/server/serve.py:413 +msgid "couldnt_register_{mapping}_{ver}_{pattern}" +msgstr "" +"Couldn't register mapping '{mapping}', version '{ver}' (pattern " +"'{pattern}')." + +#: src/hydrilla/server/serve.py:566 src/hydrilla/server/serve.py:588 +#: src/hydrilla/server/serve.py:626 +#, python-format +msgid "%(prog)s_%(version)s_license" +msgstr "" +"%(prog)s %(version)s\n" +"Copyright (C) 2021,2022 Wojtek Kosior and contributors.\n" +"License GPLv3+: GNU AGPL version 3 or later " +"<https://gnu.org/licenses/gpl.html>\n" +"This is free software: you are free to change and redistribute it.\n" +"There is NO WARRANTY, to the extent permitted by law." + +#: src/hydrilla/server/serve.py:577 +msgid "directory_to_serve_from_overrides_config" +msgstr "" +"Directory to serve files from. Overrides value from the config file (if " +"any)." + +#: src/hydrilla/server/serve.py:579 +msgid "project_url_to_display_overrides_config" +msgstr "" +"Project url to display on generated HTML pages. Overrides value from the " +"config file (if any)." + +#: src/hydrilla/server/serve.py:581 +msgid "tcp_port_to_listen_on_overrides_config" +msgstr "" +"TCP port number to listen on (0-65535). Overrides value from the config " +"file (if any)." + +#: src/hydrilla/server/serve.py:584 +msgid "path_to_config_file_explain_default" +msgstr "" +"Path to Hydrilla server configuration file (optional, by default Hydrilla" +" loads its own config file, which in turn tries to load " +"/etc/hydrilla/config.json)." + +#: src/hydrilla/server/serve.py:586 +msgid "language_to_use_overrides_config" +msgstr "" +"Language to use (also affects served HTML files). Overrides value from " +"the config file (if any)." + +#: src/hydrilla/server/serve.py:589 src/hydrilla/server/serve.py:627 +msgid "version_printing" +msgstr "Print version information and exit." + +#: src/hydrilla/server/serve.py:617 +msgid "config_option_{}_not_supplied" +msgstr "Missing configuration option '{}'." + +#: src/hydrilla/server/serve.py:621 +msgid "serve_hydrilla_packages_explain_wsgi_considerations" +msgstr "" +"Serve Hydrilla packages.\n" +"\n" +"This command is meant to be a quick way to run a local or development " +"Hydrilla instance. For better performance, consider deployment using " +"WSGI." + +#: src/hydrilla/server/serve.py:632 +msgid "serve_hydrilla_packages_wsgi_help" +msgstr "" +"Serve Hydrilla packages.\n" +"\n" +"This program is a WSGI script that runs Hydrilla repository behind an " +"HTTP server like Apache2 or Nginx. You can configure Hydrilla through the" +" /etc/hydrilla/config.json file." + +#. 'hydrilla' as a title +#: src/hydrilla/server/templates/base.html:99 +#: src/hydrilla/server/templates/base.html:105 +msgid "hydrilla" +msgstr "Hydrilla" + +#: src/hydrilla/server/templates/index.html:29 +msgid "hydrilla_welcome" +msgstr "Welcome to Hydrilla!" + diff --git a/src/hydrilla/server/serve.py b/src/hydrilla/server/serve.py new file mode 100644 index 0000000..a6a1204 --- /dev/null +++ b/src/hydrilla/server/serve.py @@ -0,0 +1,642 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +# Main repository logic. +# +# This file is part of Hydrilla +# +# Copyright (C) 2021, 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +# Enable using with Python 3.7. +from __future__ import annotations + +import re +import os +import pathlib +import json +import logging + +from pathlib import Path +from hashlib import sha256 +from abc import ABC, abstractmethod +from typing import Optional, Union, Iterable + +import click +import flask + +from werkzeug import Response + +from .. import util +from . import config +from . import _version + +here = Path(__file__).resolve().parent + +generated_by = { + 'name': 'hydrilla.server', + 'version': _version.version +} + +class ItemInfo(ABC): + """Shortened data of a resource/mapping.""" + def __init__(self, item_obj: dict): + """Initialize ItemInfo using item definition read from JSON.""" + self.version = util.normalize_version(item_obj['version']) + self.identifier = item_obj['identifier'] + self.uuid = item_obj.get('uuid') + self.long_name = item_obj['long_name'] + + def path(self) -> str: + """ + Get a relative path to this item's JSON definition with respect to + directory containing items of this type. + """ + return f'{self.identifier}/{util.version_string(self.version)}' + +class ResourceInfo(ItemInfo): + """Shortened data of a resource.""" + def __init__(self, resource_obj: dict): + """Initialize ResourceInfo using resource definition read from JSON.""" + super().__init__(resource_obj) + + dependencies = resource_obj.get('dependencies', []) + self.dependencies = [res_ref['identifier'] for res_ref in dependencies] + +class MappingInfo(ItemInfo): + """Shortened data of a mapping.""" + def __init__(self, mapping_obj: dict): + """Initialize MappingInfo using mapping definition read from JSON.""" + super().__init__(mapping_obj) + + self.payloads = {} + for pattern, res_ref in mapping_obj.get('payloads', {}).items(): + self.payloads[pattern] = res_ref['identifier'] + + def as_query_result(self) -> str: + """ + Produce a json.dump()-able object describing this mapping as one of a + collection of query results. + """ + return { + 'version': self.version, + 'identifier': self.identifier, + 'long_name': self.long_name + } + +class VersionedItemInfo: + """Stores data of multiple versions of given resource/mapping.""" + def __init__(self): + self.uuid = None + self.identifier = None + self.by_version = {} + self.known_versions = [] + + def register(self, item_info: ItemInfo) -> None: + """ + Make item info queryable by version. Perform sanity checks for uuid. + """ + if self.identifier is None: + self.identifier = item_info.identifier + + if self.uuid is None: + self.uuid = item_info.uuid + + if self.uuid is not None and self.uuid != item_info.uuid: + raise ValueError(f_('uuid_mismatch_{identifier}') + .format(identifier=self.identifier)) + + ver = item_info.version + ver_str = util.version_string(ver) + + if ver_str in self.by_version: + raise ValueError(f_('version_clash_{identifier}_{version}') + .format(identifier=self.identifier, + version=ver_str)) + + self.by_version[ver_str] = item_info + self.known_versions.append(ver) + + def get_by_ver(self, ver: Optional[list[int]]=None) -> Optional[ItemInfo]: + """ + Find and return info of the newest version of item. + + If ver is specified, instead find and return info of that version of the + item (or None if absent). + """ + ver = util.version_string(ver or self.known_versions[-1]) + + return self.by_version.get(ver) + + def get_all(self) -> list[ItemInfo]: + """ + Return a list of item info for all its versions, from oldest ot newest. + """ + return [self.by_version[util.version_string(ver)] + for ver in self.known_versions] + +class PatternTreeNode: + """ + "Pattern Tree" is how we refer to the data structure used for querying + Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal + is to make it possible for given URL to quickly retrieve all known patterns + that match it. + """ + def __init__(self): + self.wildcard_matches = [None, None, None] + self.literal_match = None + self.children = {} + + def search(self, segments): + """ + Yields all matches of this segments sequence against the tree that + starts at this node. Results are produces in order from greatest to + lowest pattern specificity. + """ + nodes = [self] + + for segment in segments: + next_node = nodes[-1].children.get(segment) + if next_node is None: + break + + nodes.append(next_node) + + nsegments = len(segments) + cond_literal = lambda: len(nodes) == nsegments + cond_wildcard = [ + lambda: len(nodes) + 1 == nsegments and segments[-1] != '*', + lambda: len(nodes) + 1 < nsegments, + lambda: len(nodes) + 1 != nsegments or segments[-1] != '***' + ] + + while nodes: + node = nodes.pop() + + for item, condition in [(node.literal_match, cond_literal), + *zip(node.wildcard_matches, cond_wildcard)]: + if item is not None and condition(): + yield item + + def add(self, segments, item_instantiator): + """ + Make item queryable through (this branch of) the Pattern Tree. If there + was not yet any item associated with the tree path designated by + segments, create a new one using item_instantiator() function. Return + all items matching this path (both the ones that existed and the ones + just created). + """ + node = self + segment = None + + for segment in segments: + wildcards = node.wildcard_matches + + child = node.children.get(segment) or PatternTreeNode() + node.children[segment] = child + node = child + + if node.literal_match is None: + node.literal_match = item_instantiator() + + if segment not in ('*', '**', '***'): + return [node.literal_match] + + if wildcards[len(segment) - 1] is None: + wildcards[len(segment) - 1] = item_instantiator() + + return [node.literal_match, wildcards[len(segment) - 1]] + +proto_regex = re.compile(r'^(?P<proto>\w+)://(?P<rest>.*)$') +user_re = r'[^/?#@]+@' # r'(?P<user>[^/?#@]+)@' # discarded for now +query_re = r'\??[^#]*' # r'\??(?P<query>[^#]*)' # discarded for now +domain_re = r'(?P<domain>[^/?#]+)' +path_re = r'(?P<path>[^?#]*)' +http_regex = re.compile(f'{domain_re}{path_re}{query_re}.*') +ftp_regex = re.compile(f'(?:{user_re})?{domain_re}{path_re}.*') + +class UrlError(ValueError): + """Used to report a URL or URL pattern that is invalid or unsupported.""" + pass + +class DeconstructedUrl: + """Represents a deconstructed URL or URL pattern""" + def __init__(self, url): + self.url = url + + match = proto_regex.match(url) + if not match: + raise UrlError(f_('invalid_URL_{}').format(url)) + + self.proto = match.group('proto') + if self.proto not in ('http', 'https', 'ftp'): + raise UrlError(f_('disallowed_protocol_{}').format(proto)) + + if self.proto == 'ftp': + match = ftp_regex.match(match.group('rest')) + elif self.proto in ('http', 'https'): + match = http_regex.match(match.group('rest')) + + if not match: + raise UrlError(f_('invalid_URL_{}').format(url)) + + self.domain = match.group('domain').split('.') + self.domain.reverse() + self.path = [*filter(None, match.group('path').split('/'))] + +class PatternMapping: + """ + A mapping info, together with one of its patterns, as stored in Pattern + Tree. + """ + def __init__(self, pattern: str, mapping_info: MappingInfo): + self.pattern = pattern + self.mapping_info = mapping_info + + def register(self, pattern_tree: dict): + """ + Make self queryable through the Pattern Tree passed in the argument. + """ + deco = DeconstructedUrl(self.pattern) + + domain_tree = pattern_tree.get(deco.proto) or PatternTreeNode() + pattern_tree[deco.proto] = domain_tree + + for path_tree in domain_tree.add(deco.domain, PatternTreeNode): + for match_list in path_tree.add(deco.path, list): + match_list.append(self) + +class Malcontent: + """ + Instance of this class represents a directory with files that can be loaded + and served by Hydrilla. + """ + def __init__(self, malcontent_dir_path: Path): + """ + When an instance of Malcontent is constructed, it searches + malcontent_dir_path for serveable site-modifying packages and loads + them into its data structures. + """ + self.infos = {'resource': {}, 'mapping': {}} + self.pattern_tree = {} + + self.malcontent_dir_path = malcontent_dir_path + + if not self.malcontent_dir_path.is_dir(): + raise ValueError(f_('malcontent_dir_path_not_dir_{}') + .format(malcontent_dir_path)) + + for item_type in ('mapping', 'resource'): + type_path = self.malcontent_dir_path / item_type + if not type_path.is_dir(): + continue + + for subpath in type_path.iterdir(): + if not subpath.is_dir(): + continue + + for ver_file in subpath.iterdir(): + try: + self._load_item(item_type, ver_file) + except Exception as e: + if flask.current_app._hydrilla_werror: + raise e from None + + msg = f_('couldnt_load_item_from_{}').format(ver_file) + logging.error(msg, exc_info=True) + + self._report_missing() + self._finalize() + + def _load_item(self, item_type: str, ver_file: Path) -> None: + """ + Reads, validates and autocompletes serveable mapping/resource + definition, then registers information from it in data structures. + """ + version = util.parse_version(ver_file.name) + identifier = ver_file.parent.name + + with open(ver_file, 'rt') as file_handle: + item_json = json.load(file_handle) + + util.validator_for(f'api_{item_type}_description-1.0.1.schema.json')\ + .validate(item_json) + + if item_type == 'resource': + item_info = ResourceInfo(item_json) + else: + item_info = MappingInfo(item_json) + + if item_info.identifier != identifier: + msg = f_('item_{item}_in_file_{file}')\ + .format({'item': item_info.identifier, 'file': ver_file}) + raise ValueError(msg) + + if item_info.version != version: + ver_str = util.version_string(item_info.version) + msg = f_('item_version_{ver}_in_file_{file}')\ + .format({'ver': ver_str, 'file': ver_file}) + raise ValueError(msg) + + versioned_info = self.infos[item_type].get(identifier) + if versioned_info is None: + versioned_info = VersionedItemInfo() + self.infos[item_type][identifier] = versioned_info + + versioned_info.register(item_info) + + def _all_of_type(self, item_type: str) -> Iterable[ItemInfo]: + """Iterator over all registered versions of all mappings/resources.""" + for versioned_info in self.infos[item_type].values(): + for item_info in versioned_info.by_version.values(): + yield item_info + + def _report_missing(self) -> None: + """ + Use logger to print information about items that are referenced but + were not loaded. + """ + def report_missing_dependency(info: ResourceInfo, dep: str) -> None: + msg = f_('no_dep_{resource}_{ver}_{dep}')\ + .format(dep=dep, resource=info.identifier, + ver=util.version_string(info.version)) + logging.error(msg) + + for resource_info in self._all_of_type('resource'): + for dep in resource_info.dependencies: + if dep not in self.infos['resource']: + report_missing_dependency(resource_info, dep) + + def report_missing_payload(info: MappingInfo, payload: str) -> None: + msg = f_('no_payload_{mapping}_{ver}_{payload}')\ + .format(mapping=info.identifier, payload=payload, + ver=util.version_string(info.version)) + logging.error(msg) + + for mapping_info in self._all_of_type('mapping'): + for payload in mapping_info.payloads.values(): + if payload not in self.infos['resource']: + report_missing_payload(mapping_info, payload) + + def _finalize(self): + """ + Initialize structures needed to serve queries. Called once after all + data gets loaded. + """ + for infos_dict in self.infos.values(): + for versioned_info in infos_dict.values(): + versioned_info.known_versions.sort() + + for info in self._all_of_type('mapping'): + for pattern in info.payloads: + try: + PatternMapping(pattern, info).register(self.pattern_tree) + except Exception as e: + if flask.current_app._hydrilla_werror: + raise e from None + msg = f_('couldnt_register_{mapping}_{ver}_{pattern}')\ + .format(mapping=info.identifier, pattern=pattern, + ver=util.version_string(info.version)) + logging.error(msg) + + def query(self, url: str) -> list[MappingInfo]: + """ + Return a list of registered mappings that match url. + + If multiple versions of a mapping are applicable, only the most recent + is included in the result. + """ + deco = DeconstructedUrl(url) + + collected = {} + + domain_tree = self.pattern_tree.get(deco.proto) or PatternTreeNode() + + def process_mapping(pattern_mapping: PatternMapping) -> None: + if url[-1] != '/' and pattern_mapping.pattern[-1] == '/': + return + + info = pattern_mapping.mapping_info + + if info.identifier not in collected or \ + info.version > collected[info.identifier].version: + collected[info.identifier] = info + + for path_tree in domain_tree.search(deco.domain): + for matches_list in path_tree.search(deco.path): + for pattern_mapping in matches_list: + process_mapping(pattern_mapping) + + return list(collected.values()) + +bp = flask.Blueprint('bp', __package__) + +class HydrillaApp(flask.Flask): + """Flask app that implements a Hydrilla server.""" + def __init__(self, hydrilla_config: dict, flask_config: dict={}): + """Create the Flask instance according to the configuration""" + super().__init__(__package__, static_url_path='/', + static_folder=hydrilla_config['malcontent_dir']) + self.config.update(flask_config) + + # https://stackoverflow.com/questions/9449101/how-to-stop-flask-from-initialising-twice-in-debug-mode + if self.debug and os.environ.get('WERKZEUG_RUN_MAIN') != 'true': + return + + self.jinja_options = { + **self.jinja_options, + 'extensions': [ + *self.jinja_options.get('extensions', []), + 'jinja2.ext.i18n' + ] + } + + self._hydrilla_translation = \ + util.translation(here / 'locales', hydrilla_config['language']) + self._hydrilla_project_url = hydrilla_config['hydrilla_project_url'] + self._hydrilla_port = hydrilla_config['port'] + self._hydrilla_werror = hydrilla_config.get('werror', False) + + if 'hydrilla_parent' in hydrilla_config: + raise ValueError("Option 'hydrilla_parent' is not implemented.") + + malcontent_dir = Path(hydrilla_config['malcontent_dir']).resolve() + with self.app_context(): + self._hydrilla_malcontent = Malcontent(malcontent_dir) + + self.register_blueprint(bp) + + def create_jinja_environment(self, *args, **kwargs) \ + -> flask.templating.Environment: + """ + Flask's create_jinja_environment(), but tweaked to always include the + 'hydrilla_project_url' global variable and to install proper + translations. + """ + env = super().create_jinja_environment(*args, **kwargs) + env.install_gettext_translations(self._hydrilla_translation) + env.globals['hydrilla_project_url'] = self._hydrilla_project_url + + return env + + def run(self, *args, **kwargs): + """ + Flask's run(), but tweaked to use the port from hydrilla configuration + by default. + """ + return super().run(*args, port=self._hydrilla_port, **kwargs) + +def f_(text_key): + return flask.current_app._hydrilla_translation.gettext(text_key) + +def malcontent(): + return flask.current_app._hydrilla_malcontent + +@bp.route('/') +def index(): + return flask.render_template('index.html') + +identifier_json_re = re.compile(r'^([-0-9a-z.]+)\.json$') + +def get_resource_or_mapping(item_type: str, identifier: str) -> Response: + """ + Strip '.json' from 'identifier', look the item up and send its JSON + description. + """ + match = identifier_json_re.match(identifier) + if not match: + flask.abort(404) + + identifier = match.group(1) + + versioned_info = malcontent().infos[item_type].get(identifier) + + info = versioned_info and versioned_info.get_by_ver() + if info is None: + flask.abort(404) + + # no need for send_from_directory(); path is safe, constructed by us + file_path = malcontent().malcontent_dir_path / item_type / info.path() + return flask.send_file(open(file_path, 'rb'), mimetype='application/json') + +@bp.route('/mapping/<string:identifier_dot_json>') +def get_newest_mapping(identifier_dot_json: str) -> Response: + return get_resource_or_mapping('mapping', identifier_dot_json) + +@bp.route('/resource/<string:identifier_dot_json>') +def get_newest_resource(identifier_dot_json: str) -> Response: + return get_resource_or_mapping('resource', identifier_dot_json) + +@bp.route('/query') +def query(): + url = flask.request.args['url'] + + mapping_refs = [i.as_query_result() for i in malcontent().query(url)] + result = { + '$schema': 'https://hydrilla.koszko.org/schemas/api_query_result-1.schema.json', + 'mappings': mapping_refs, + 'generated_by': generated_by + } + + return Response(json.dumps(result), mimetype='application/json') + +@bp.route('/--help') +def mm_help(): + return start.get_help(click.Context(start_wsgi)) + '\n' + +@bp.route('/--version') +def mm_version(): + prog_info = {'prog': 'Hydrilla', 'version': _version.version} + return _('%(prog)s_%(version)s_license') % prog_info + '\n' + +default_config_path = Path('/etc/hydrilla/config.json') +default_malcontent_dir = '/var/lib/hydrilla/malcontent' +default_project_url = 'https://hydrillabugs.koszko.org/projects/hydrilla/wiki' + +console_gettext = util.translation(here / 'locales').gettext +_ = console_gettext + +@click.command(help=_('serve_hydrilla_packages_explain_wsgi_considerations')) +@click.option('-m', '--malcontent-dir', + type=click.Path(exists=True, file_okay=False), + help=_('directory_to_serve_from_overrides_config')) +@click.option('-h', '--hydrilla-project-url', type=click.STRING, + help=_('project_url_to_display_overrides_config')) +@click.option('-p', '--port', type=click.INT, + help=_('tcp_port_to_listen_on_overrides_config')) +@click.option('-c', '--config', 'config_path', + type=click.Path(exists=True, dir_okay=False, resolve_path=True), + help=_('path_to_config_file_explain_default')) +@click.option('-l', '--language', type=click.STRING, + help=_('language_to_use_overrides_config')) +@click.version_option(version=_version.version, prog_name='Hydrilla', + message=_('%(prog)s_%(version)s_license'), + help=_('version_printing')) +def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str], + port: Optional[int], config_path: Optional[str], + language: Optional[str]) -> None: + """ + Run a development Hydrilla server. + + This command is meant to be the entry point of hydrilla command exported by + this package. + """ + config_load_opts = {} if config_path is None \ + else {'config_path': [Path(config_path)]} + + hydrilla_config = config.load(**config_load_opts) + + if malcontent_dir is not None: + hydrilla_config['malcontent_dir'] = str(Path(malcontent_dir).resolve()) + + if hydrilla_project_url is not None: + hydrilla_config['hydrilla_project_url'] = hydrilla_project_url + + if port is not None: + hydrilla_config['port'] = port + + if language is not None: + hydrilla_config['language'] = language + + lang = hydrilla_config.get('language') + _ = console_gettext if lang is None else \ + util.translation(here / 'locales', lang).gettext + + for opt in ('malcontent_dir', 'hydrilla_project_url', 'port', 'language'): + if opt not in hydrilla_config: + raise ValueError(_('config_option_{}_not_supplied').format(opt)) + + HydrillaApp(hydrilla_config).run() + +@click.command(help=_('serve_hydrilla_packages_wsgi_help'), + context_settings={ + 'ignore_unknown_options': True, + 'allow_extra_args': True + }) +@click.version_option(version=_version.version, prog_name='Hydrilla', + message=_('%(prog)s_%(version)s_license'), + help=_('version_printing')) +def start_wsgi() -> None: + """ + Create application object for use in WSGI deployment. + + This command Also handles --help and --version options in case it gets + called outside WSGI environment. + """ + return HydrillaApp(click.get_current_context().obj or config.load()) diff --git a/src/hydrilla/server/templates/base.html b/src/hydrilla/server/templates/base.html new file mode 100644 index 0000000..34cb214 --- /dev/null +++ b/src/hydrilla/server/templates/base.html @@ -0,0 +1,123 @@ +{# SPDX-License-Identifier: CC-BY-SA-4.0 OR AGPL-3.0-or-later + +Base HTML page template. + +This file is part of Hydrilla + +Copyright (C) 2021 Wojtek Kosior + +This file is free cultural work: you can redistribute it with or +without modification under the terms of the Creative Commons +Attribution Share Alike 4.0 International as published by the +Creative Commons Corporation. + +This file is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +Creative Commons Attribution Share Alike 4.0 International +License for more details. + + +I, Wojtek Kosior, thereby promise not to sue for violation of this +file's license. Although I request that you do not make use this code +in a proprietary program, I am not going to enforce this in court. +#} + +{% macro link_for(endpoint, text) -%} + <a href="{{ url_for(endpoint, **kwargs) }}" + {{ caller() if caller is defined }}> + {{ text }} + </a> +{%- endmacro %} + +<!DOCTYPE html> +<html> + <head> + {% block head %} + <meta http-equiv="Content-Security-Policy" content="script-src 'none';"> + <style> + {% block styles %} + html, body, div, h1, h2, h3, h4, h5, h6 { + margin: 0; + padding: 0; + } + + * { + color: #444; + } + + aside { + display: inline-block; + border-left: 0.2em solid #e44; + background-color: #edc; + padding: 0.2em; + } + + .nav { + background-color: #ddd; + } + + .nav>*:hover { + background-color: #999; + } + + .nav>* { + display: inline-block; + padding: 1em; + } + + .nav a { + text-decoration: none; + } + + .home_link { + font-weight: bold; + font-size: 1.5em; + padding: 0.5em; + } + + .content { + margin: auto; + margin-top: 2em; + margin-bottom: 2em; + max-width: 700px; + padding-left: 1em; + padding-right: 1em; + border-left: 1px #999 solid; + border-right: 1px #999 solid; + } + + .footer { + font-size: 0.8em; + padding: 1em; + border-top: 1px #777 solid; + text-align: center; + } + {% endblock %} + </style> + {# TRANSLATORS: 'hydrilla' as a title#} + <title>{% block title %}{{ _('hydrilla') }}{% endblock %}</title> + {% endblock %} + </head> + <body> + {% block body %} + <div class="nav"> + {% call link_for('bp.index', _('hydrilla')) %} + class="home_link" + {% endcall %} + </div> + + <div class="content"> + {% block content %} + {% endblock %} + </div> + + <div class="footer"> + Copyright © Wojtek Kosior. + <br> + This page was generated by Hydrilla which is free/libre software. + You can get a copy <a href="{{ hydrilla_project_url|e }}">here</a>. + </div> + {% endblock %} + </body> +</html> diff --git a/src/hydrilla/server/templates/index.html b/src/hydrilla/server/templates/index.html new file mode 100644 index 0000000..3063239 --- /dev/null +++ b/src/hydrilla/server/templates/index.html @@ -0,0 +1,30 @@ +{# SPDX-License-Identifier: CC-BY-SA-4.0 OR AGPL-3.0-or-later + +HTML index page template. + +This file is part of Hydrilla + +Copyright (C) 2021 Wojtek Kosior + +This file is free cultural work: you can redistribute it with or +without modification under the terms of the Creative Commons +Attribution Share Alike 4.0 International as published by the +Creative Commons Corporation. + +This file is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +Creative Commons Attribution Share Alike 4.0 International +License for more details. + + +I, Wojtek Kosior, thereby promise not to sue for violation of this +file's license. Although I request that you do not make use this code +in a proprietary program, I am not going to enforce this in court. +#} + +{% extends 'base.html' %} +{% block content %} + {{ super() }} + <h2>{{ _('hydrilla_welcome') }}</h2> +{% endblock %} |