diff options
Diffstat (limited to 'src')
21 files changed, 888 insertions, 1398 deletions
diff --git a/src/hydrilla/__init__.py b/src/hydrilla/__init__.py new file mode 100644 index 0000000..6aeb276 --- /dev/null +++ b/src/hydrilla/__init__.py @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: 0BSD + +# Copyright (C) 2013-2020, PyPA + +# https://packaging.python.org/en/latest/guides/packaging-namespace-packages/#pkgutil-style-namespace-packages + +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/src/hydrilla/server/__init__.py b/src/hydrilla/server/__init__.py new file mode 100644 index 0000000..f5a799e --- /dev/null +++ b/src/hydrilla/server/__init__.py @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org> +# +# Available under the terms of Creative Commons Zero v1.0 Universal. + +from .serve import create_app diff --git a/src/hydrilla/server/config.json b/src/hydrilla/server/config.json new file mode 100644 index 0000000..7c9f22b --- /dev/null +++ b/src/hydrilla/server/config.json @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: CC0-1.0 + +// Default Hydrilla config file. +// +// Copyright (C) 2021, 2022 Wojtek Kosior +// +// Available under the terms of Creative Commons Zero v1.0 Universal. + +{ + // Relative path to directory from which Hydrilla will load packages + // metadata and serve files. + // Deliberately avoiding word "content", see: + // http://www.gnu.org/philosophy/words-to-avoid.en.html#Content + "malcontent_dir": "/var/lib/hydrilla/malcontent", + + // Hydrilla will display this link to users as a place where they can + // obtain sources for its software. This config option is meant to ease + // compliance with the AGPL. + "hydrilla_project_url": "https://hydrillabugs.koszko.org/projects/hydrilla/wiki", + + // Tell Hydrilla to look for additional configuration in those files, in + // this order. + "try_configs": ["/etc/hydrilla/config.json"] +} diff --git a/src/pydrilla/locales/en/LC_MESSAGES/pydrilla.po b/src/hydrilla/server/locales/en/LC_MESSAGES/hydrilla.po index f9e6a82..f9e6a82 100644 --- a/src/pydrilla/locales/en/LC_MESSAGES/pydrilla.po +++ b/src/hydrilla/server/locales/en/LC_MESSAGES/hydrilla.po diff --git a/src/hydrilla/server/serve.py b/src/hydrilla/server/serve.py new file mode 100644 index 0000000..815ac63 --- /dev/null +++ b/src/hydrilla/server/serve.py @@ -0,0 +1,604 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +# Main repository logic. +# +# This file is part of Hydrilla +# +# Copyright (C) 2021, 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +import re +import os +import pathlib +import json +import gettext +import logging + +from pathlib import Path +from hashlib import sha256 +from abc import ABC, abstractmethod +from typing import Optional, Union, Iterable + +from flask import Flask, Blueprint, current_app, url_for, abort, request, \ + redirect, send_file +from jinja2 import Environment, PackageLoader +from werkzeug import Response + +from .. import util + +here = pathlib.Path(__file__).resolve().parent + +def load_config(config_path: Path) -> dict: + config = {} + to_load = [config_path] + failures_ok = [False] + + while to_load: + path = to_load.pop() + can_fail = failures_ok.pop() + + try: + json_text = util.strip_json_comments(config_path.read_text()) + new_config = json.loads(json_text) + except Exception as e: + if can_fail: + continue + raise e from None + + config.update(new_config) + + for key, failure_ok in [('try_configs', True), ('use_configs', False)]: + paths = new_config.get(key, []) + paths.reverse() + to_load.extend(paths) + failures_ok.extend([failure_ok] * len(paths)) + + for key in ('try_configs', 'use_configs'): + if key in config: + config.pop(key) + + for key in ('malcontent_dir', 'hydrilla_project_url'): + if key not in config: + raise ValueError(_('config_key_absent_{}').format(key)) + + malcontent_path = Path(config['malcontent_dir']) + if not malcontent_path.is_absolute(): + malcontent_path = config_path.parent / malcontent_path + + config['malcontent_dir'] = str(malcontent_path.resolve()) + + return config + +class ItemInfo(ABC): + """Shortened data of a resource/mapping.""" + def __init__(self, item_obj: dict): + """Initialize ItemInfo using item definition read from JSON.""" + self.version = util.normalize_version(item_obj['version']) + self.identifier = item_obj['identifier'] + self.uuid = item_obj['uuid'] + self.long_name = item_obj['long_name'] + + def path(self) -> str: + """ + Get a relative path to this item's JSON definition with respect to + directory containing items of this type. + """ + return f'{self.identifier}/{util.version_string(self.version)}' + +class ResourceInfo(ItemInfo): + """Shortened data of a resource.""" + def __init__(self, resource_obj: dict): + """Initialize ResourceInfo using resource definition read from JSON.""" + super().__init__(resource_obj) + + self.dependencies = resource_obj.get('dependencies', []) + +class MappingInfo(ItemInfo): + """Shortened data of a mapping.""" + def __init__(self, mapping_obj: dict): + """Initialize MappingInfo using mapping definition read from JSON.""" + super().__init__(mapping_obj) + + self.payloads = {} + for pattern, res_ref in mapping_obj.get('payloads', {}).items(): + self.payloads[pattern] = res_ref['identifier'] + + def as_query_result(self) -> str: + """ + Produce a json.dump()-able object describing this mapping as one of a + collection of query results. + """ + return { + 'version': self.version, + 'identifier': self.identifier, + 'long_name': self.long_name + } + +class VersionedItemInfo: + """Stores data of multiple versions of given resource/mapping.""" + def __init__(self): + self.uuid = None + self.identifier = None + self.by_version = {} + self.known_versions = [] + + def register(self, item_info: ItemInfo) -> None: + """ + Make item info queryable by version. Perform sanity checks for uuid. + """ + if self.identifier is None: + self.identifier = item_info.identifier + self.uuid = item_info.uuid + elif self.uuid != item_info.uuid: + raise ValueError(_('uuid_mismatch_{identifier}') + .format(identifier=self.identifier)) + + ver = item_info.version + ver_str = util.version_string(ver) + + if ver_str in self.by_version: + raise ValueError(_('version_clash_{identifier}_{version}') + .format(identifier=self.identifier, + version=ver_str)) + + self.by_version[ver_str] = item_info + self.known_versions.append(ver) + + def get_by_ver(self, ver: Optional[list[int]]=None) -> Optional[ItemInfo]: + """ + Find and return info of the newest version of item. + + If ver is specified, instead find and return info of that version of the + item (or None if absent). + """ + ver = util.version_string(ver or self.known_versions[-1]) + + return self.by_version.get(ver) + + def get_all(self) -> list[ItemInfo]: + """ + Return a list of item info for all its versions, from oldest ot newest. + """ + return [self.by_version[util.version_string(ver)] + for ver in self.known_versions] + +class PatternTreeNode: + """ + "Pattern Tree" is how we refer to the data structure used for querying + Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal + is to make it possible for given URL to quickly retrieve all known patterns + that match it. + """ + def __init__(self): + self.wildcard_matches = [None, None, None] + self.literal_match = None + self.children = {} + + def search(self, segments): + """ + Yields all matches of this segments sequence against the tree that + starts at this node. Results are produces in order from greatest to + lowest pattern specificity. + """ + nodes = [self] + + for segment in segments: + next_node = nodes[-1].children.get(segment) + if next_node is None: + break + + nodes.append(next_node) + + nsegments = len(segments) + cond_literal = lambda: len(nodes) == nsegments + cond_wildcard = [ + lambda: len(nodes) + 1 == nsegments and segments[-1] != '*', + lambda: len(nodes) + 1 < nsegments, + lambda: len(nodes) + 1 != nsegments or segments[-1] != '***' + ] + + while nodes: + node = nodes.pop() + + for item, condition in [(node.literal_match, cond_literal), + *zip(node.wildcard_matches, cond_wildcard)]: + if item is not None and condition(): + yield item + + def add(self, segments, item_instantiator): + """ + Make item queryable through (this branch of) the Pattern Tree. If there + was not yet any item associated with the tree path designated by + segments, create a new one using item_instantiator() function. Return + all items matching this path (both the ones that existed and the ones + just created). + """ + node = self + segment = None + + for segment in segments: + wildcards = node.wildcard_matches + + child = node.children.get(segment) or PatternTreeNode() + node.children[segment] = child + node = child + + if node.literal_match is None: + node.literal_match = item_instantiator() + + if segment not in ('*', '**', '***'): + return [node.literal_match] + + if wildcards[len(segment) - 1] is None: + wildcards[len(segment) - 1] = item_instantiator() + + return [node.literal_match, wildcards[len(segment) - 1]] + +proto_regex = re.compile(r'^(?P<proto>\w+)://(?P<rest>.*)$') +user_re = r'[^/?#@]+@' # r'(?P<user>[^/?#@]+)@' # discarded for now +query_re = r'\??[^#]*' # r'\??(?P<query>[^#]*)' # discarded for now +domain_re = r'(?P<domain>[^/?#]+)' +path_re = r'(?P<path>[^?#]*)' +http_regex = re.compile(f'{domain_re}{path_re}{query_re}.*') +ftp_regex = re.compile(f'(?:{user_re})?{domain_re}{path_re}.*') + +class UrlError(ValueError): + """Used to report a URL or URL pattern that is invalid or unsupported.""" + pass + +class DeconstructedUrl: + """Represents a deconstructed URL or URL pattern""" + def __init__(self, url): + self.url = url + + match = proto_regex.match(url) + if not match: + raise UrlError(_('invalid_URL_{}').format(url)) + + self.proto = match.group('proto') + if self.proto not in ('http', 'https', 'ftp'): + raise UrlError(_('disallowed_protocol_{}').format(proto)) + + if self.proto == 'ftp': + match = ftp_regex.match(match.group('rest')) + elif self.proto in ('http', 'https'): + match = http_regex.match(match.group('rest')) + + if not match: + raise UrlError(_('invalid_URL_{}').format(url)) + + self.domain = match.group('domain').split('.') + self.domain.reverse() + self.path = [*filter(None, match.group('path').split('/'))] + +class PatternMapping: + """ + A mapping info, together with one of its patterns, as stored in Pattern + Tree. + """ + def __init__(self, pattern: str, mapping_info: MappingInfo): + self.pattern = pattern + self.mapping_info = mapping_info + + def register(self, pattern_tree: dict): + """ + Make self queryable through the Pattern Tree passed in the argument. + """ + deco = DeconstructedUrl(self.pattern) + + domain_tree = pattern_tree.get(deco.proto) or PatternTreeNode() + pattern_tree[deco.proto] = domain_tree + + for path_tree in domain_tree.add(deco.domain, PatternTreeNode): + for match_list in path_tree.add(deco.path, list): + match_list.append(self) + +class Malcontent: + """ + Instance of this class represents a directory with files that can be loaded + and served by Hydrilla. + """ + def __init__(self, malcontent_dir_path: Union[Path, str]): + """ + When an instance of Malcontent is constructed, it searches + malcontent_dir_path for serveable site-modifying packages and loads + them into its data structures. + """ + self.infos = {'resource': {}, 'mapping': {}} + self.pattern_tree = {} + + self.malcontent_dir_path = pathlib.Path(malcontent_dir_path).resolve() + + if not self.malcontent_dir_path.is_dir(): + raise ValueError(_('malcontent_dir_path_not_dir')) + + for item_type in ('mapping', 'resource'): + type_path = self.malcontent_dir_path / item_type + if not type_path.is_dir(): + continue + + for subpath in type_path.iterdir(): + if not subpath.is_dir(): + continue + + for ver_file in subpath.iterdir(): + try: + self._load_item(item_type, ver_file) + except Exception as e: + if current_app._hydrilla_werror: + raise e from None + + msg = _('couldnt_load_item_from_{}').format(ver_file) + logging.error(msg, exc_info=True) + + self._report_missing() + self._finalize() + + def _load_item(self, item_type: str, ver_file: Path) -> None: + """ + Reads, validates and autocompletes serveable mapping/resource + definition, then registers information from it in data structures. + """ + version = util.parse_version(ver_file.name) + identifier = ver_file.parent.name + + with open(ver_file, 'rt') as file_handle: + item_json = json.load(file_handle) + + util.validator_for(f'api_{item_type}_description-1.schema.json')\ + .validate(item_json) + + if item_type == 'resource': + item_info = ResourceInfo(item_json) + else: + item_info = MappingInfo(item_json) + + if item_info.identifier != identifier: + msg = _('item_{item}_in_file_{file}')\ + .format({'item': item_info.identifier, 'file': ver_file}) + raise ValueError(msg) + + if item_info.version != version: + ver_str = util.version_string(item_info.version) + msg = _('item_version_{ver}_in_file_{file}')\ + .format({'ver': ver_str, 'file': ver_file}) + raise ValueError(msg) + + versioned_info = self.infos[item_type].get(identifier) + if versioned_info is None: + versioned_info = VersionedItemInfo() + self.infos[item_type][identifier] = versioned_info + + versioned_info.register(item_info) + + def _all_of_type(self, item_type: str) -> Iterable[ItemInfo]: + """Iterator over all registered versions of all mappings/resources.""" + for versioned_info in self.infos[item_type].values(): + for item_info in versioned_info.by_version.values(): + yield item_info + + def _report_missing(self) -> None: + """ + Use logger to print information about items that are referenced but + were not loaded. + """ + def report_missing_dependency(info: ResourceInfo, dep: str) -> None: + msg = _('no_dep_%(resource)s_%(ver)s_%(dep)s')\ + .format(dep=dep, resource=info.identifier, + ver=util.version_string(info.version)) + logging.error(msg) + + for resource_info in self._all_of_type('resource'): + for dep in resource_info.dependencies: + if dep not in self.infos['resource']: + report_missing_dependency(resource_info, dep) + + def report_missing_payload(info: MappingInfo, payload: str) -> None: + msg = _('no_payload_{mapping}_{ver}_{payload}')\ + .format(mapping=info.identifier, payload=payload, + ver=util.version_string(info.version)) + logging.error(msg) + + for mapping_info in self._all_of_type('mapping'): + for payload in mapping_info.payloads.values(): + if payload not in self.infos['resource']: + report_missing_payload(mapping_info, payload) + + def _finalize(self): + """ + Initialize structures needed to serve queries. Called once after all + data gets loaded. + """ + for infos_dict in self.infos.values(): + for versioned_info in infos_dict.values(): + versioned_info.known_versions.sort() + + for info in self._all_of_type('mapping'): + for pattern in info.payloads: + try: + PatternMapping(pattern, info).register(self.pattern_tree) + except Exception as e: + if current_app._hydrilla_werror: + raise e from None + msg = _('couldnt_register_{mapping}_{ver}_{pattern}')\ + .format(mapping=info.identifier, pattern=pattern, + ver=util.version_string(info.version)) + logging.error(msg) + + def query(self, url: str) -> list[MappingInfo]: + """ + Return a list of registered mappings that match url. + + If multiple versions of a mapping are applicable, only the most recent + is included in the result. + """ + deco = DeconstructedUrl(url) + + collected = {} + + domain_tree = self.pattern_tree.get(deco.proto) or PatternTreeNode() + + def process_mapping(pattern_mapping: PatternMapping) -> None: + if url[-1] != '/' and pattern_mapping.pattern[-1] == '/': + return + + info = pattern_mapping.mapping_info + + if info.identifier not in collected or \ + info.version > collected[info.identifier].version: + collected[info.identifier] = info + + for path_tree in domain_tree.search(deco.domain): + for matches_list in path_tree.search(deco.path): + for pattern_mapping in matches_list: + process_mapping(pattern_mapping) + + return list(collected.values()) + +bp = Blueprint('bp', __package__) + +def create_app(config_path: Path=(here / 'config.json'), flask_config: dict={}): + """Create the Flask instance.""" + config = load_config(config_path) + + app = Flask(__package__, static_url_path='/', + static_folder=config['malcontent_dir']) + app.config.update(flask_config) + + language = flask_config.get('lang', 'en') + translation = gettext.translation('hydrilla', localedir=(here / 'locales'), + languages=[language]) + + app._hydrilla_gettext = translation.gettext + + # https://stackoverflow.com/questions/9449101/how-to-stop-flask-from-initialising-twice-in-debug-mode + if app.debug and os.environ.get('WERKZEUG_RUN_MAIN') != 'true': + return app + + app._hydrilla_project_url = config['hydrilla_project_url'] + app._hydrilla_werror = config.get('werror', False) + if 'hydrilla_parent' in config: + raise MyNotImplError('hydrilla_parent', config_path.name) + + malcontent_dir = pathlib.Path(config['malcontent_dir']) + if not malcontent_dir.is_absolute(): + malcontent_dir = config_path.parent / malcontent_dir + with app.app_context(): + app._hydrilla_malcontent = Malcontent(malcontent_dir.resolve()) + + app.register_blueprint(bp) + + return app + +def _(text_key): + return current_app._hydrilla_gettext(text_key) + +def malcontent(): + return current_app._hydrilla_malcontent + +# TODO: override create_jinja_environment() method of Flask instead of wrapping +# Jinja environment +class MyEnvironment(Environment): + """ + A wrapper class around jinja2.Environment that causes GNU gettext function + (as '_' and '__'), url_for function and 'hydrilla_project_url' config option + to be passed to every call of each template's render() method. + """ + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def get_template(self, *args, **kwargs): + template = super().get_template(*args, **kwargs) + old_render = template.render + + def new_render(*args, **kwargs): + _ = current_app._hydrilla_gettext + project_url = current_app._hydrilla_project_url + + def escaping_gettext(text_key): + from markupsafe import escape + + return str(escape(_(text_key))) + + final_kwargs = { + '_': escaping_gettext, + '__': escaping_gettext, + 'url_for': url_for, + 'hydrilla_project_url' : project_url + } + final_kwargs.update(kwargs) + + return old_render(*args, **final_kwargs) + + template.render = new_render + + return template + +j2env = MyEnvironment(loader=PackageLoader(__package__), autoescape=False) + +indexpage = j2env.get_template('index.html') +@bp.route('/') +def index(): + return indexpage.render() + +identifier_json_re = re.compile(r'^([-0-9a-z.]+)\.json$') + +def get_resource_or_mapping(item_type: str, identifier: str) -> Response: + """ + Strip '.json' from 'identifier', look the item up and send its JSON + description. + """ + match = identifier_json_re.match(identifier) + if not match: + abort(404) + + identifier = match.group(1) + + versioned_info = malcontent().infos[item_type].get(identifier) + + info = versioned_info and versioned_info.get_by_ver() + if info is None: + abort(404) + + # no need for send_from_directory(); path is safe, constructed by us + return send_file(malcontent().malcontent_dir_path / item_type / info.path()) + +@bp.route('/mapping/<string:identifier_dot_json>') +def get_newest_mapping(identifier_dot_json: str) -> Response: + return get_resource_or_mapping('mapping', identifier_dot_json) + +@bp.route('/resource/<string:identifier_dot_json>') +def get_newest_resource(identifier_dot_json: str) -> Response: + return get_resource_or_mapping('resource', identifier_dot_json) + +@bp.route('/query') +def query(): + url = request.args['url'] + + mapping_refs = [i.as_query_result() for i in malcontent().query(url)] + result = { + 'api_schema_version': [1], + 'generated_by': { + 'name': 'hydrilla' + }, + 'mappings': mapping_refs + } + + return json.dumps(result) diff --git a/src/pydrilla/templates/base.html b/src/hydrilla/server/templates/base.html index 7b26b64..f95ce54 100644 --- a/src/pydrilla/templates/base.html +++ b/src/hydrilla/server/templates/base.html @@ -1,4 +1,4 @@ -{# SPDX-License-Identifier: CC-BY-NC-SA-4.0 +{# SPDX-License-Identifier: CC-BY-SA-4.0 OR AGPL-3.0-or-later Base HTML page template. @@ -115,7 +115,7 @@ in a proprietary program, I am not going to enforce this in court. Copyright © Wojtek Kosior. <br> This page was generated by Hydrilla which is free/libre software. - You can get a copy <a href="{{ hydrilla_sources_uri|e }}">here</a>. + You can get a copy <a href="{{ hydrilla_project_url|e }}">here</a>. </div> {% endblock %} </body> diff --git a/src/pydrilla/templates/index.html b/src/hydrilla/server/templates/index.html index 2555df0..3063239 100644 --- a/src/pydrilla/templates/index.html +++ b/src/hydrilla/server/templates/index.html @@ -1,4 +1,4 @@ -{# SPDX-License-Identifier: CC-BY-NC-SA-4.0 +{# SPDX-License-Identifier: CC-BY-SA-4.0 OR AGPL-3.0-or-later HTML index page template. diff --git a/src/pydrilla_dev_helper.py b/src/hydrilla_dev_helper.py index 88dc63e..925f414 100644 --- a/src/pydrilla_dev_helper.py +++ b/src/hydrilla_dev_helper.py @@ -37,24 +37,12 @@ import importlib def mypath(path_or_string): return Path(path_or_string).resolve() -debrel_regex = re.compile(r'^[^(]*\([^-]*-([^)]*)\)') - -def extract_debrel(debian_dir): - changelog_path = mypath(debian_dir) / 'changelog' - with open(changelog_path) as changelog_file: - try: - return debrel_regex.match(changelog_file.readline())[1] - except TypeError: - raise RuntimeException('Cannot extract debrel from %s.' % - changelog_path) - class Helper: - def __init__(self, project_root, app_package_name, version, locales_dir, + def __init__(self, project_root, app_package_name, locales_dir, locales=['en', 'pl'], default_locale='en', locale_domain=None, packages_root=None, debian_dir=None, config_path=None): self.project_root = mypath(project_root) self.app_package_name = app_package_name - self.version = version self.locales_dir = mypath(locales_dir) self.locales = locales self.default_locale = default_locale @@ -102,7 +90,7 @@ class Helper: command = ['msgfmt', po_path, '-o', mo_path] self.run_command(command, verbose=verbose, check=True) - self.locale_files_list.extend([po_path, mo_path]) + self.locale_files_list.extend([po_path, mo_path]) def locale_files(self): if self.locale_files_list is None: @@ -161,10 +149,37 @@ class Helper: # we exclude these from the source archive we produce bad_file_regex = re.compile(r'^\..*|build|debian|dist') + changelog_line_regex = re.compile(r''' + ^ # match from the beginning of each line + \s* # skip initial whitespace (if any) + (?P<source_name> # capture name + [^\s(]+ + ) + \s* # again skip whitespace (if any) + \( + (?P<version> # capture version which is enclosed in parantheses + [^)]+ + ) + - + (?P<debrel> # capture debrel part of version separately + [0-9]+ + ) + \) + ''', re.VERBOSE) + def make_tarballs(self, verbose=False): - name=self.app_package_name - ver=self.version - debrel=extract_debrel(self.debian_dir) + changelog_path = self.project_root / 'debian' / 'changelog' + with open(changelog_path, 'rt') as file_handle: + for line in file_handle.readlines(): + match = changelog_line_regex.match(line) + if match: + break + + if not match: + raise ValueError("Couldn't extract version from debian/changelog.") + + name, ver, debrel = \ + [match.group(gn) for gn in ('source_name', 'version', 'debrel')] source_dirname = f'{name}-{ver}' source_tarball_name = f'{name}_{ver}.orig.tar.gz' diff --git a/src/pydrilla/__init__.py b/src/pydrilla/__init__.py deleted file mode 100644 index 8d1565b..0000000 --- a/src/pydrilla/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .pydrilla import create_app diff --git a/src/pydrilla/config.json b/src/pydrilla/config.json deleted file mode 100644 index 6bb5440..0000000 --- a/src/pydrilla/config.json +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-License-Identifier: CC0-1.0 - -// Example Hydrilla config file. -// -// Copyright (C) 2021 Wojtek Kosior -// -// Available under the terms of Creative Commons Zero v1.0 Universal. - -{ - "content_dir": "/var/lib/hydrilla/content", - "static_resource_uri": "http://localhost:8000/", - "hydrilla_sources_uri": "https://git.koszko.org/pydrilla/", - "try_configs": ["/etc/pydrilla/config.json"] -} diff --git a/src/pydrilla/pydrilla.py b/src/pydrilla/pydrilla.py deleted file mode 100644 index d7aef76..0000000 --- a/src/pydrilla/pydrilla.py +++ /dev/null @@ -1,755 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later - -# Main repository logic. -# -# This file is part of Hydrilla -# -# Copyright (C) 2021 Wojtek Kosior -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see <https://www.gnu.org/licenses/>. -# -# -# I, Wojtek Kosior, thereby promise not to sue for violation of this -# file's license. Although I request that you do not make use this code -# in a proprietary program, I am not going to enforce this in court. - -from flask import Flask, Blueprint, current_app, url_for, abort, request, \ - redirect -from jinja2 import Environment, PackageLoader -import re -from hashlib import sha256 -import os -import pathlib -import json -import gettext -import logging - -SCHEMA_VERSION = [0, 2] - -strip_comment_re = re.compile(r''' -^ # match from the beginning of each line -( # catch the part before '//' comment - (?: # this group matches either a string or a single out-of-string character - [^"/] | - " - (?: # this group matches any in-a-string character - [^"\\] | # match any normal character - \\[^u] | # match any escaped character like '\f' or '\n' - \\u[a-fA-F0-9]{4} # match an escape - )* - " - )* -) -# expect either end-of-line or a comment: -# * unterminated strings will cause matching to fail -# * bad comment (with '/' instead of '//') will be indicated by second group -# having length 1 instead of 2 or 0 -(//?|$) -''', re.VERBOSE) - -def strip_json_comments(text): - processed = 0 - stripped_text = [] - for line in text.split('\n'): - match = strip_comment_re.match(line) - - if match is None: # unterminated string - # ignore this error, let json module report it - stripped = line - elif len(match[2]) == 1: - raise json.JSONDecodeError('bad comment', text, - processed + len(match[1])) - else: - stripped = match[1] - - stripped_text.append(stripped) - processed += len(line) + 1 - - return '\n'.join(stripped_text) - -here = pathlib.Path(__file__).resolve().parent - -bp = Blueprint('bp', __package__) - -def load_config(config_path): - config = {} - to_load = [config_path] - failures_ok = [False] - - while to_load: - path = to_load.pop() - can_fail = failures_ok.pop() - - try: - with open(config_path) as config_file: - new_config = json.loads(strip_json_comments(config_file.read())) - except Exception as e: - if can_fail: - continue - raise e from None - - config.update(new_config) - - for key, failure_ok in [('try_configs', True), ('use_configs', False)]: - paths = new_config.get(key, []) - paths.reverse() - to_load.extend(paths) - failures_ok.extend([failure_ok] * len(paths)) - - for key in ['try_configs', 'use_configs']: - if key in config: - config.pop(key) - - return config - -def get_content_file_path(path): - if os.path.sep != '/': - path.replace('/', os.path.sep) - - path = pathlib.Path(path) - if path.is_absolute(): - raise ValueError(_('path_is_absolute_{}').format(path)) - - return path - -class MyNotImplError(NotImplementedError): - '''Raised when a planned but not-yet-completed feature is used.''' - def __init__(self, what, where): - super().__init__(_('not_implemented_{what}_{where}') - .format(what=what, where=where)) - -def normalize_version(ver): - ''' - ver is an array of integers. Strip right-most zeroes from ver. - - Returns a *new* array. Doesn't modify its argument. - ''' - new_len = 0 - for i, num in enumerate(ver): - if num != 0: - new_len = i + 1 - - return ver[:new_len] - -def parse_version(ver_str): - ''' - Convert ver_str into an array representation, e.g. for ver_str="4.6.13.0" - return [4, 6, 13, 0]. - ''' - return [int(num) for num in ver_str.split('.')] - -def version_string(ver, rev=None): - ''' - ver is an array of integers. rev is an optional integer. Produce string - representation of version (optionally with revision number), like: - 1.2.3-5 - No version normalization is performed. - ''' - return '.'.join([str(n) for n in ver]) + ('' if rev is None else f'-{rev}') - -class VersionedContentItem: - '''Stores definitions of multiple versions of website content item.''' - def __init__(self): - self.uuid = None - self.identifier = None - self.by_version = {} - self.known_versions = [] - - def register_item(self, item): - '''Make item queryable by version. Perform sanity checks for uuid.''' - if self.identifier is None: - self.identifier = item['identifier'] - self.uuid = item['uuid'] - elif self.uuid != item['uuid']: - raise ValueError(_('uuid_mismatch_{identifier}') - .format(identifier=self.identifier)) - - ver = item['version'] - ver_str = version_string(ver) - - if ver_str in self.by_version: - raise ValueError(_('version_clash_{identifier}_{version}') - .format(identifier=self.identifier, - version=ver_str)) - - self.by_version[ver_str] = item - self.known_versions.append(ver) - - def get_by_ver(self, ver=None): - ''' - Find and return definition of the newest version of item. - - If ver is specified, instead find and return definition of that version - of the item (or None is absent). - ''' - ver = version_string(ver or self.known_versions[-1]) - - return self.by_version.get(ver) - - def get_all(self): - '''Return a list of all definitions of item, ordered by version.''' - return [self.by_version[version_string(ver)] - for ver in self.known_versions] - -class PatternTreeNode: - ''' - "Pattern Tree" is how we refer to the data structure used for querying - Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal - is to make it possible for given URL to quickly retrieve all known patterns - that match it. - ''' - def __init__(self): - self.wildcard_matches = [None, None, None] - self.literal_match = None - self.children = {} - - def search(self, segments): - ''' - Yields all matches of this segments sequence against the tree that - starts at this node. Results are produces in order from greatest to - lowest pattern specificity. - ''' - nodes = [self] - - for segment in segments: - next_node = nodes[-1].children.get(segment) - if next_node is None: - break - - nodes.append(next_node) - - nsegments = len(segments) - cond_literal = lambda: len(nodes) == nsegments - cond_wildcard = [ - lambda: len(nodes) + 1 == nsegments and segments[-1] != '*', - lambda: len(nodes) + 1 < nsegments, - lambda: len(nodes) + 1 != nsegments or segments[-1] != '***' - ] - - while nodes: - node = nodes.pop() - - for item, condition in [(node.literal_match, cond_literal), - *zip(node.wildcard_matches, cond_wildcard)]: - if item is not None and condition(): - yield item - - def add(self, segments, item_instantiator): - ''' - Make item queryable through (this branch of) the Pattern Tree. If there - was not yet any item associated with the tree path designated by - segments, create a new one using item_instantiator() function. Return - all items matching this path (both the ones that existed and the ones - just created). - ''' - node = self - segment = None - - for segment in segments: - wildcards = node.wildcard_matches - - child = node.children.get(segment) or PatternTreeNode() - node.children[segment] = child - node = child - - if node.literal_match is None: - node.literal_match = item_instantiator() - - if segment not in ('*', '**', '***'): - return [node.literal_match] - - if wildcards[len(segment) - 1] is None: - wildcards[len(segment) - 1] = item_instantiator() - - return [node.literal_match, wildcards[len(segment) - 1]] - -proto_regex = re.compile(r'^(?P<proto>\w+)://(?P<rest>.*)$') -user_re = r'[^/?#@]+@' # r'(?P<user>[^/?#@]+)@' # discarded for now -query_re = r'\??[^#]*' # r'\??(?P<query>[^#]*)' # discarded for now -domain_re = r'(?P<domain>[^/?#]+)' -path_re = r'(?P<path>[^?#]*)' -http_regex = re.compile(f'{domain_re}{path_re}{query_re}.*') -ftp_regex = re.compile(f'(?:{user_re})?{domain_re}{path_re}.*') - -class UrlError(ValueError): - pass - -class DeconstructedUrl: - '''Represents a deconstructed URL or URL pattern''' - def __init__(self, url): - self.url = url - - match = proto_regex.match(url) - if not match: - raise UrlError(_('invalid_URL_{}').format(url)) - - self.proto = match.group('proto') - if self.proto not in ('http', 'https', 'ftp'): - raise UrlError(_('disallowed_protocol_{}').format(proto)) - - if self.proto == 'ftp': - match = ftp_regex.match(match.group('rest')) - elif self.proto in ('http', 'https'): - match = http_regex.match(match.group('rest')) - - if not match: - raise UrlError(_('invalid_URL_{}').format(url)) - - self.domain = match.group('domain').split('.') - self.domain.reverse() - self.path = [*filter(None, match.group('path').split('/'))] - -class MappingItem: - ''' - A mapping, together with one of its patterns, as stored in Pattern Tree. - ''' - def __init__(self, pattern, mapping): - self.pattern = pattern - self.mapping = mapping - - def register(self, patterns_by_proto): - ''' - Make self queryable through the Pattern Tree that starts with the - protocols dictionary passed in the argument. - ''' - deco = DeconstructedUrl(self.pattern) - - domain_tree = patterns_by_proto.get(deco.proto) or PatternTreeNode() - patterns_by_proto[deco.proto] = domain_tree - - for path_tree in domain_tree.add(deco.domain, PatternTreeNode): - for match_list in path_tree.add(deco.path, list): - match_list.append(self) - -class Content: - '''Stores serveable website content.''' - def __init__(self, content_dir_path): - ''' - When an instance of Content is constructed, it searches - content_dir_path for custom serveable site content and loads it. - ''' - self.resources = {} - self.mappings = {} - self.licenses = {} - self.indexes = {} - self.definition_processors = { - 'resource': self._process_resource_or_mapping, - 'mapping': self._process_resource_or_mapping, - 'license': self._process_license - } - self.patterns_by_proto = {} - self.file_sha256sums = {} - - self.content_dir_path = pathlib.Path(content_dir_path).resolve() - - if not self.content_dir_path.is_dir(): - raise ValueError(_('content_dir_path_not_dir')) - - for subdir_path in self.content_dir_path.iterdir(): - if not subdir_path.is_dir(): - continue - try: - self._load_content_from_subdir(subdir_path, subdir_path.name) - except Exception as e: - if current_app._pydrilla_werror: - raise e from None - logging.error(_('couldnt_load_content_from_%s'), subdir_path, - exc_info=True) - - self._report_missing() - self._finalize() - - def _load_content_from_subdir(self, subdir_path, source_name): - ''' - Helper function used to load definitions from index.json of a - subdirectory of the content direcotory. - ''' - index_path = subdir_path / 'index.json' - with open(index_path) as index_file: - index = json.loads(strip_json_comments(index_file.read())) - - self._process_index(index, source_name) - - @staticmethod - def register_item(dict, item): - ''' - Helper function used to add a versioned item definition to content - data structures. - ''' - identifier = item['identifier'] - versioned_item = dict.get(identifier) - if versioned_item is None: - versioned_item = VersionedContentItem() - dict[identifier] = versioned_item - - versioned_item.register_item(item) - - @staticmethod - def _process_copyright_and_license(definition): - '''Helper function used by other _process_*() methods.''' - for field in ['copyright', 'licenses']: - if definition[field] == 'auto': - raise MyNotImplError(f'"{{field}}": "auto"', - definition['source_name']) - - def _get_file_sha256sum(self, path): - ''' - Compute sha256 of the file at path. Cache results on this Content - object. - ''' - path = path.resolve() - sha256sum = self.file_sha256sums.get(path) - - if sha256sum is None: - with open(path, mode='rb') as hashed_file: - sha256sum = sha256(hashed_file.read()).digest().hex() - self.file_sha256sums[path] = sha256sum - - return sha256sum - - def _add_file_sha256sum(self, source_name, file_object): - ''' - Expect file_object to be a dict with field "file" holding a file path - relative to content directory's subdirectory source_name. Compute or - fetch from cache the sha256 sum of that file and put it in file_object's - "sha256" field. - ''' - file_path = self.content_dir_path / source_name / file_object['file'] - file_object['sha256'] = self._get_file_sha256sum(file_path) - - def _process_resource_or_mapping(self, definition, index): - ''' - Sanitizes, autocompletes and registers serveable mapping/resource - definition. - ''' - definition['version'] = normalize_version(definition['version']) - - if definition['type'] == 'resource': - self._process_copyright_and_license(definition) - definition['dependencies'] = definition.get('dependencies', []) - self.register_item(self.resources, definition) - source_name = definition['source_name'] - for script in definition['scripts']: - self._add_file_sha256sum(source_name, script) - else: - self.register_item(self.mappings, definition) - - def _process_license(self, license, index): - '''Sanitizes and registers serveable license definition.''' - identifier = license['identifier'] - if identifier in self.licenses: - raise ValueError(_('license_clash_{}').format(identifier)) - - self.licenses[identifier] = license - - source_name = license['source_name'] - for legal_text in license['legal_text']: - self._add_file_sha256sum(source_name, legal_text) - - notice = license.get('notice') - if notice is not None: - self._add_file_sha256sum(source_name, notice) - - def _process_index(self, index, source_name): - ''' - Sanitizes, autocompletes and registers data from a loaded index.json - file. - ''' - schema_ver = normalize_version(index['schema_version']) - index['schema_version'] = schema_ver - if schema_ver != SCHEMA_VERSION: - raise ValueError('index_json_schema_mismatch_{found}_{required}' - .format(found=version_string(schema_ver), - required=version_string(SCHEMA_VERSION))) - - if source_name in self.indexes: - raise ValueError(_('source_name_clash_{}').format(source_name)) - - index['source_name'] = source_name - - self._process_copyright_and_license(index) - - self.indexes[source_name] = index - - for definition in index['definitions']: - try: - definition['source_name'] = source_name - definition['source_copyright'] = index['copyright'] - definition['source_licenses'] = index['licenses'] - processor = self.definition_processors[definition['type']] - processor(definition, index) - except Exception as e: - if current_app._pydrilla_werror: - raise e from None - logging.error(_('couldnt_load_definition_from_%s'), subdir_path, - exc_info=True) - @staticmethod - def all_items(versioned_items_dict): - '''Iterator over all registered versions of all items.''' - for versioned_item in versioned_items_dict.values(): - for item in versioned_item.by_version.values(): - yield item - - def _report_missing(self): - ''' - Use logger to print information about items that are referenced but - were not loaded. - ''' - def report_missing_license(object, object_type, lic): - if object_type == 'index': - logging.error(_('no_index_license_%(source)s_%(lic)s'), - source=object['source_name'], lic=lic) - return - - ver_str = version_string(object['version']) - kwargs = {object_type: object['identifier'], ver: ver_str, lic: lic} - if object_type == 'resource': - fmt = _('no_resource_license_%(resource)s_%(ver)s_%(lic)s') - else: - fmt = _('no_mapping_license_%(mapping)s_%(ver)s_%(lic)s') - - logging.error(fmt, **kwargs) - - for object_type, iterable in [ - ('index', self.indexes.values()), - ('resource', self.all_items(self.resources)) - ]: - for object in iterable: - to_process = [object['licenses']] - licenses = [] - while to_process: - term = to_process.pop() - - if type(term) is str: - if term not in ['or', 'and'] and \ - term not in self.licenses: - report_missing_license(object, object_type, lic) - continue - - to_process.extend(term) - - def report_missing_dependency(resource, dep): - logging.error(_('no_dep_%(resource)s_%(ver)s_%(dep)s'), - dep=dep, resource=resource['identifier'], - ver=version_string(resource['version'])) - - for resource in self.all_items(self.resources): - for dep in resource['dependencies']: - if dep not in self.resources: - report_missing_dependency(resource, dep) - - def report_missing_payload(mapping, payload): - logging.error(_('no_payload_%(mapping)s_%(ver)s_%(payload)s'), - mapping=mapping['identifier'], payload=payload, - ver=version_string(mapping['version'])) - - for mapping in self.all_items(self.mappings): - for payload in mapping['payloads']: - payload = payload['payload'] - if payload not in self.resources: - report_missing_payload(mapping, payload) - - def _finalize(self): - ''' - Initialize structures needed to serve queries. Called once after all - data gets loaded. - ''' - for dict in [self.resources, self.mappings]: - for versioned_item in dict.values(): - versioned_item.known_versions.sort() - - for mapping in self.all_items(self.mappings): - for payload in mapping['payloads']: - pattern = payload['pattern'] - try: - MappingItem(pattern, mapping)\ - .register(self.patterns_by_proto) - except Exception as e: - if current_app._pydrilla_werror: - raise e from None - logging.error( - _('couldnt_register_%(mapping)s_%(ver)s_%(pattern)s'), - mapping=mapping['identifier'], pattern=pattern, - ver=version_string(mapping['version']) - ) - - def query(self, url): - ''' - Return a list of registered mappings that match url. - - If multiple versions of a mapping are applicable, only the most recent - is included in the result. - ''' - deco = DeconstructedUrl(url) - - mappings = {} - - domain_tree = self.patterns_by_proto.get(deco.proto) \ - or PatternTreeNode() - - def process_item(item): - if url[-1] != '/' and item.pattern[-1] == '/': - return - - identifier = item.mapping['identifier'] - - if identifier not in mappings or \ - item.mapping['version'] > mappings[identifier]['version']: - mappings[identifier] = item.mapping - - for path_tree in domain_tree.search(deco.domain): - for item_list in path_tree.search(deco.path): - for item in item_list: - process_item(item) - - return list(mappings.values()) - -def create_app(config_path=(here / 'config.json'), flask_config={}): - app = Flask(__package__) - app.config.update(flask_config) - - language = flask_config.get('lang', 'en') - translation = gettext.translation('pydrilla', localedir=(here / 'locales'), - languages=[language]) - - app._pydrilla_gettext = translation.gettext - - # https://stackoverflow.com/questions/9449101/how-to-stop-flask-from-initialising-twice-in-debug-mode - if app.debug and os.environ.get('WERKZEUG_RUN_MAIN') != 'true': - return app - - config = load_config(config_path) - for key in ['static_resource_uri', 'content_dir', 'hydrilla_sources_uri']: - if key not in config: - raise ValueError(_('config_key_absent_{}').format(key)) - - app._pydrilla_static_resource_uri = config['static_resource_uri'] - if app._pydrilla_static_resource_uri[-1] != '/': - app._pydrilla_static_resource_uri += '/' - app._pydrilla_hydrilla_sources_uri = config['hydrilla_sources_uri'] - app._pydrilla_werror = config.get('werror', False) - if 'hydrilla_parent' in config: - raise MyNotImplError('hydrilla_parent', config_path.name) - - content_dir = pathlib.Path(config['content_dir']) - if not content_dir.is_absolute(): - content_dir = config_path.parent / content_dir - with app.app_context(): - app._pydrilla_content = Content(content_dir.resolve()) - - app.register_blueprint(bp) - - return app - -def _(text_key): - return current_app._pydrilla_gettext(text_key) - -def content(): - return current_app._pydrilla_content - -class MyEnvironment(Environment): - ''' - A wrapper class around jinja2.Environment that causes GNU gettext function - (as '_' and '__'), url_for function and 'hydrilla_sources_uri' config option - to be passed to every call of each template's render() method. - ''' - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def get_template(self, *args, **kwargs): - template = super().get_template(*args, **kwargs) - old_render = template.render - - def new_render(*args, **kwargs): - _ = current_app._pydrilla_gettext - sources_uri = current_app._pydrilla_hydrilla_sources_uri - - def escaping_gettext(text_key): - from markupsafe import escape - - return str(escape(_(text_key))) - - final_kwargs = { - '_': escaping_gettext, - '__': escaping_gettext, - 'url_for': url_for, - 'hydrilla_sources_uri' : sources_uri - } - final_kwargs.update(kwargs) - - return old_render(*args, **final_kwargs) - - template.render = new_render - - return template - -j2env = MyEnvironment(loader=PackageLoader(__package__), autoescape=False) - -indexpage = j2env.get_template('index.html') -@bp.route('/') -def index(): - return indexpage.render() - -def get_resource_or_mapping(identifier, get_dict): - ver = request.args.get('ver') - versioned_item = get_dict().get(identifier) - - if ver == 'all': - definition = versioned_item.get_all() if versioned_item else [] - else: - if ver is not None: - try: - ver = normalize_version(parse_version(ver)) - except: - abort(400) - - definition = versioned_item and versioned_item.get_by_ver(ver) - if definition is None: - abort(404) - - return json.dumps(definition) - -def get_license_or_source(identifier, get_dict): - definition = get_dict().get(identifier) - if definition is None: - abort(404) - - return json.dumps(definition) - -for item_type, get_dict, get_item in [ - ('resource', lambda: content().resources, get_resource_or_mapping), - ('mapping', lambda: content().mappings, get_resource_or_mapping), - ('license', lambda: content().licenses, get_license_or_source), - ('source', lambda: content().indexes, get_license_or_source) -]: - def _get_item(identifier, get_dict=get_dict, get_item=get_item): - return get_item(identifier, get_dict) - - bp.add_url_rule(f'/{item_type}s/<string:identifier>', item_type, _get_item) - -@bp.route('/query') -def query(): - url = request.args['url'] - - return json.dumps(content().query(url)) - -@bp.route('/sources/<string:identifier>/<path:path>') -def get_file(identifier, path): - if identifier not in content().indexes: - abort(404) - - new_uri = f'{current_app._pydrilla_static_resource_uri}{identifier}/{path}' - - return redirect(new_uri, code=301) diff --git a/src/test/__init__.py b/src/test/__init__.py index e69de29..d382ead 100644 --- a/src/test/__init__.py +++ b/src/test/__init__.py @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org> +# +# Available under the terms of Creative Commons Zero v1.0 Universal. diff --git a/src/test/development_config.json b/src/test/development_config.json index 30cf10d..c2382f7 100644 --- a/src/test/development_config.json +++ b/src/test/development_config.json @@ -2,7 +2,7 @@ // Hydrilla development config file. // -// Copyright (C) 2021 Wojtek Kosior +// Copyright (C) 2021, 2022 Wojtek Kosior // // Available under the terms of Creative Commons Zero v1.0 Universal. @@ -10,21 +10,18 @@ // unlike config.json, it shall not be included in distribution { // Relative paths now get resolved from config's containing direcotry. - "content_dir": "./example_content", - - // Except files from content_dir to be served there (used to redirect - // clients). - "static_resource_uri": "http://localhost:8000/", + "malcontent_dir": "./sample_malcontent", // Hydrilla will display this link to users as a place where they can // obtain sources for its software. This config option is meant to ease // compliance with the AGPL. - "hydrilla_sources_uri": "https://git.koszko.org/pydrilla/", + "hydrilla_project_url": "https://hydrillabugs.koszko.org/projects/hydrilla/wiki", - // Make Pydrilla error out on any warning + // Make Hydrilla error out on any warning "werror": true - // With the below we can make Pydrilla look for missing content items in + // With the below we can make hydrilla look for missing content items in // another instance instead of just erroring/warning. - // ,"hydrilla_parent": "https://api.hachette-hydrilla.org/0.2/" + // TODO: feature not implemented + // ,"hydrilla_parent": "https://api.hydrilla.koszko.org/1.0/" } diff --git a/src/test/example_content/hello/bye.js b/src/test/example_content/hello/bye.js deleted file mode 100644 index e6fd70c..0000000 --- a/src/test/example_content/hello/bye.js +++ /dev/null @@ -1,7 +0,0 @@ -// SPDX-License-Identifier: CC0-1.0 - -// Copyright (C) 2021 Wojtek Kosior -// -// Available under the terms of Creative Commons Zero v1.0 Universal. - -console.log(bye_message + "apple!"); diff --git a/src/test/example_content/hello/cc0.txt b/src/test/example_content/hello/cc0.txt deleted file mode 100644 index 0e259d4..0000000 --- a/src/test/example_content/hello/cc0.txt +++ /dev/null @@ -1,121 +0,0 @@ -Creative Commons Legal Code - -CC0 1.0 Universal - - CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE - LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN - ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS - INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES - REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS - PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM - THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED - HEREUNDER. - -Statement of Purpose - -The laws of most jurisdictions throughout the world automatically confer -exclusive Copyright and Related Rights (defined below) upon the creator -and subsequent owner(s) (each and all, an "owner") of an original work of -authorship and/or a database (each, a "Work"). - -Certain owners wish to permanently relinquish those rights to a Work for -the purpose of contributing to a commons of creative, cultural and -scientific works ("Commons") that the public can reliably and without fear -of later claims of infringement build upon, modify, incorporate in other -works, reuse and redistribute as freely as possible in any form whatsoever -and for any purposes, including without limitation commercial purposes. -These owners may contribute to the Commons to promote the ideal of a free -culture and the further production of creative, cultural and scientific -works, or to gain reputation or greater distribution for their Work in -part through the use and efforts of others. - -For these and/or other purposes and motivations, and without any -expectation of additional consideration or compensation, the person -associating CC0 with a Work (the "Affirmer"), to the extent that he or she -is an owner of Copyright and Related Rights in the Work, voluntarily -elects to apply CC0 to the Work and publicly distribute the Work under its -terms, with knowledge of his or her Copyright and Related Rights in the -Work and the meaning and intended legal effect of CC0 on those rights. - -1. Copyright and Related Rights. A Work made available under CC0 may be -protected by copyright and related or neighboring rights ("Copyright and -Related Rights"). Copyright and Related Rights include, but are not -limited to, the following: - - i. the right to reproduce, adapt, distribute, perform, display, - communicate, and translate a Work; - ii. moral rights retained by the original author(s) and/or performer(s); -iii. publicity and privacy rights pertaining to a person's image or - likeness depicted in a Work; - iv. rights protecting against unfair competition in regards to a Work, - subject to the limitations in paragraph 4(a), below; - v. rights protecting the extraction, dissemination, use and reuse of data - in a Work; - vi. database rights (such as those arising under Directive 96/9/EC of the - European Parliament and of the Council of 11 March 1996 on the legal - protection of databases, and under any national implementation - thereof, including any amended or successor version of such - directive); and -vii. other similar, equivalent or corresponding rights throughout the - world based on applicable law or treaty, and any national - implementations thereof. - -2. Waiver. To the greatest extent permitted by, but not in contravention -of, applicable law, Affirmer hereby overtly, fully, permanently, -irrevocably and unconditionally waives, abandons, and surrenders all of -Affirmer's Copyright and Related Rights and associated claims and causes -of action, whether now known or unknown (including existing as well as -future claims and causes of action), in the Work (i) in all territories -worldwide, (ii) for the maximum duration provided by applicable law or -treaty (including future time extensions), (iii) in any current or future -medium and for any number of copies, and (iv) for any purpose whatsoever, -including without limitation commercial, advertising or promotional -purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each -member of the public at large and to the detriment of Affirmer's heirs and -successors, fully intending that such Waiver shall not be subject to -revocation, rescission, cancellation, termination, or any other legal or -equitable action to disrupt the quiet enjoyment of the Work by the public -as contemplated by Affirmer's express Statement of Purpose. - -3. Public License Fallback. Should any part of the Waiver for any reason -be judged legally invalid or ineffective under applicable law, then the -Waiver shall be preserved to the maximum extent permitted taking into -account Affirmer's express Statement of Purpose. In addition, to the -extent the Waiver is so judged Affirmer hereby grants to each affected -person a royalty-free, non transferable, non sublicensable, non exclusive, -irrevocable and unconditional license to exercise Affirmer's Copyright and -Related Rights in the Work (i) in all territories worldwide, (ii) for the -maximum duration provided by applicable law or treaty (including future -time extensions), (iii) in any current or future medium and for any number -of copies, and (iv) for any purpose whatsoever, including without -limitation commercial, advertising or promotional purposes (the -"License"). The License shall be deemed effective as of the date CC0 was -applied by Affirmer to the Work. Should any part of the License for any -reason be judged legally invalid or ineffective under applicable law, such -partial invalidity or ineffectiveness shall not invalidate the remainder -of the License, and in such case Affirmer hereby affirms that he or she -will not (i) exercise any of his or her remaining Copyright and Related -Rights in the Work or (ii) assert any associated claims and causes of -action with respect to the Work, in either case contrary to Affirmer's -express Statement of Purpose. - -4. Limitations and Disclaimers. - - a. No trademark or patent rights held by Affirmer are waived, abandoned, - surrendered, licensed or otherwise affected by this document. - b. Affirmer offers the Work as-is and makes no representations or - warranties of any kind concerning the Work, express, implied, - statutory or otherwise, including without limitation warranties of - title, merchantability, fitness for a particular purpose, non - infringement, or the absence of latent or other defects, accuracy, or - the present or absence of errors, whether or not discoverable, all to - the greatest extent permissible under applicable law. - c. Affirmer disclaims responsibility for clearing rights of other persons - that may apply to the Work or any use thereof, including without - limitation any person's Copyright and Related Rights in the Work. - Further, Affirmer disclaims responsibility for obtaining any necessary - consents, permissions or other rights required for any use of the - Work. - d. Affirmer understands and acknowledges that Creative Commons is not a - party to this document and has no duty or obligation with respect to - this CC0 or use of the Work. diff --git a/src/test/example_content/hello/hello.js b/src/test/example_content/hello/hello.js deleted file mode 100644 index d87ea7f..0000000 --- a/src/test/example_content/hello/hello.js +++ /dev/null @@ -1,7 +0,0 @@ -// SPDX-License-Identifier: CC0-1.0 - -// Copyright (C) 2021 Wojtek Kosior -// -// Available under the terms of Creative Commons Zero v1.0 Universal. - -console.log(hello_message + "apple!"); diff --git a/src/test/example_content/hello/index.json b/src/test/example_content/hello/index.json deleted file mode 100644 index 16843cb..0000000 --- a/src/test/example_content/hello/index.json +++ /dev/null @@ -1,302 +0,0 @@ -// SPDX-License-Identifier: CC0-1.0 - -// Copyright (C) 2021 Wojtek Kosior -// Available under the terms of Creative Commons Zero v1.0 Universal. - -// This is an example index.json file describing Hydrilla site content. As you -// can see, for storing site content information Hydrilla utilizes JSON with an -// additional extension in the form of '//' comments support. Hydrilla shall -// look into each direct subdirectory of the content directory passed to it -// (via a cofig file option). If such subsirectory contains an index.json file, -// Hydrilla shall process it. - -// An index.json file conveys definitions of site resources, pattern->payload -// mappings and licenses thereof. The definitions may reference files under -// index.json's containing directory, using relative paths. This is how scripts, -// license texts, etc. are included. Unix paths (using '/' as separator) are -// assumed. It is not allowed for an index.json file to reference files outside -// its directory. - -// Certain objects are allowed to contain a "comment" field. Although '//' -// comments can be used in index.json files, they will be stripped when the file -// is processed. If a comment should be included in the JSON definitions served -// by Hydrilla API, it should be put in a "comment" field of the proper object. - -// Various kinds of objects contain version information. Version is always an -// array of integers, with major version number being the first array item. When -// applicable, a version is accompanied by a revision field which contains a -// positive integer. If versions specified by arrays of different length need to -// be compared, the shorter array gets padded with zeroes on the right. This -// means that for example version 1.3 could be given as both [1, 3] and -// [1, 3, 0, 0] (aka 1.3.0.0) and either would mean the same. - -{ - // Once our json schema changes, this version will change. Our software will - // be able to handle both current and older formats thanks to this - // information present in every index.json file. Different schema versions - // are always incompatible (e.g. a Hydrilla instance that understands schema - // version 0.2.0.0 will not understand version 0.2.0.1). Schemas that are - // backwards-compatible will be denoted by a different revision. - // We will try to make schema version match the version of Hydrilla software - // that introduced it. - "schema_version": [0, 2], - "schema_revision": 1, - - // Copyright of this json file. It's a list of copyright holder information - // objects. Alternatively, "auto" can be used to make Hydrilla attempt to - // extract copyright info from the comment at the beginning of the file. - "copyright": [ - // There can be multiple entries, one for each co-holder of the - // copyright. - { - // There can also be multiple years, like ["2021","2023-2024"]. - "years": ["2021"], - // Name of the copyright holder. Depending on the situation it can - // be just the first name, name+surname, a company name, a - // pseudonym, etc. - "holder": "Wojtek Kosior" - } - ], - - // License of this json file. Identifier has to be known to Hydrilla. Can - // be defined either in the same or another index.json file as a "license" - // item. It is possible to specify license combinations, like: - // [["Expat", "and", "Apache-2.0"], "or", "GPL-3.0-only"] - // Alternatively, "auto" can be used to make Hydrilla attempt to extract - // copyright info from this file's SPDX license identifier. - "licenses": "CC0-1.0", - - // Where this software/work initially comes from. In some cases (i.e. when - // the developer of content is also the one who packages it for Hydrilla) - // this might be the same as "package_url". - "upstream_url": "https://git.koszko.org/pydrilla/tree/src/test/example_content/hello", - - // Where sources for the packaging of this content can be found. - "package_url": "https://git.koszko.org/pydrilla/tree/src/test/example_content/hello", - - // Additional "comment" field can be used if needed. - // "comment": "" - - // List of actual site resources, pattern->payload mappings and licenses. - // Each of them is represented by an object. Meta-sites and replacement site - // interfaces will also belong here once they get implemented. - "definitions": [ - { - // Value of "type" can currently be one of: "resource", "license" - // and "mapping". The one we have here, "resource", defines a list - // of injectable scripts that can be used as a payload or as a - // dependency of another "resource". In the future CSS style sheets - // and WASM modules will also be composite parts of a "resource" as - // scripts are now. - "type": "resource", - - // Used when referring to this resource in "dependencies" list of - // another resource or in "payload" field of a mapping. Should - // be consize and can only use a restricted set of characters. It - // has to match: [-0-9a-zA-Z] - "identifier": "helloapple", - - // "long_name" should be used to specify a user-friendly alternative - // to an identifier. It should generally not collide with a long - // name of some resource with a different uuid and also shouldn't - // change in-between versions of the same resource, although - // exceptions to both rules might be considered. Long name is - // allowed to contain arbitrary unicode characters (within reason!). - "long_name": "Hello Apple", - - // Different versions (e.g. 1.0 and 1.3) of the same resource can be - // defined in separate index.json files. This makes it easy to - // accidently cause an identifier clash. To help detect it, we - // require that each resource has a uuid associated with it. Attempt - // to define multiple resources with the same identifier and - // different uuids will result in an error being reported. Defining - // multiple resources with different identifiers and the same uuid - // is disallowed for now (it may be later permitted if we consider - // it good for some use-case). - "uuid": "a6754dcb-58d8-4b7a-a245-24fd7ad4cd68", - - // Version should match the upstream version of the resource (e.g. a - // version of javascript library). Revision number starts as 1 for - // each new resource version and gets incremented by 1 each time a - // modification to the packaging of this version is done. Hydrilla - // will allow multiple definitions of the same resource to load, as - // long as their versions differ. Thanks to the "version" and - // "revision" fields, clients will know they have to update certain - // resource after it has been updated. If multiple definitions of - // the same version of given resource are provided, an error is - // generated (even if those definitions differ by revision number). - "version": [2021, 11, 10], - "revision": 1, - - // A short, meaningful description of what the resource is and/or - // what it does. - "description": "greets an apple", - - // If needed, a "comment" field can be added to provide some - // additional information. - // "comment": "this resource something something", - - // One should specify the copyright and licensing terms of the - // entire package. The format is the same as when specifying these - // for the index.json file, except "auto" cannot be used. - "copyright": [{"years": ["2021"], "holder": "Wojtek Kosior"}], - "licenses": "CC0-1.0", - - // Resource's "dependencies" array shall contain names of other - // resources that (in case of scripts at least) should get evaluated - // on a page before this resource's own scripts. - "dependencies": ["hello-message"], - - // Array of javascript files that belong to this resource. - "scripts": [ - { - // Script name. It should also be a valid file path relative - // to index.json's containing directory. - "file": "hello.js", - // Copyright and license info of a script file can be - // specified using the same format as in the case of the - // index.json file itself. If "copyright" or "license" is - // not provided, Hydrilla assumes it to be the same as the - // value specified for the resource itself. - "copyright": "auto", - "licenses": "auto" - }, { - "file": "bye.js" - } - ] - }, { - "type": "resource", - "identifier": "hello-message", - "long_name": "Hello Message", - "uuid": "1ec36229-298c-4b35-8105-c4f2e1b9811e", - "version": [2021, 11, 10], - "revision": 2, - "description": "define messages for saying hello and bye", - "copyright": [{"years": ["2021"], "holder": "Wojtek Kosior"}], - "licenses": "CC0-1.0", - // If "dependencies" is empty, it can also be omitted. - // "dependencies": [], - "scripts": [{"file": "message.js"}] - }, { - "type": "mapping", - - // Has similar function to resource's identifier. Should be consize - // and can only use a restricted set of characters. It has to match: - // [-0-9a-zA-Z] - // It can be the same as some resource identifier (those are - // different entities and are treated separately). - "identifier": "helloapple", - - // "long name" and "uuid" have the same meaning as in the case of - // resources. Uuids of a resource and a mapping can technically be - // the same, but it is recommended to avoid even this kind of - // repetition. - "long_name": "Hello Apple", - "uuid": "54d23bba-472e-42f5-9194-eaa24c0e3ee7", - - // "version" differs from its counterpart in resource in that it has - // no accompanying revision number. - "version": [2021, 11, 10], - - // A short, meaningful description of what the mapping does. - "description": "causes apple to get greeted on Hydrillabugs issue tracker", - - // A comment, if necessary. - // "comment": "blah blah because bleh" - - // The "payloads" array specifies, which payloads are to be - // applied to which URLs. - "payloads": [ - { - // Should be a valid Haketilo URL pattern. - "pattern": "https://hydrillabugs.koszko.org/***", - // Should be the name of an existing resource. The resource - // may, but doesn't have to, be defined in the same - // index.json file. - "payload": "helloapple" - }, - // More associations may follow. - { - "pattern": "https://hachettebugs.koszko.org/***", - "payload": "helloapple" - } - ] - }, { - "type": "license", - - // Will be used to refer to this license in other places. Should - // match the SPDX identifier if possible (despite that, please use - // "Expat" instead of "MIT" where possible). Unlike other definition - // types, "license" does not allow uuids to be used to avoid license - // id clashes. Any attempt to define multiple licenses with the same - // id will result in an error being reported. - "identifier": "CC0-1.0", - - // This long name must also be unique among all license definitions. - "long_name": "Creative Commons Zero v1.0 Universal", - - // We don't use "version" in license definitions. We do, however, - // use "revision" to indicate changes to the packaging of a license. - // Revision should be increased by 1 at each such change. - "revision": 2, - - "legal_text": [ - // Legal text can be available in multiple forms. Usually just - // plain .txt file is enough, though. - { - // "format" should match an agreed-upon MIME type if - // possible. - "format": "text/plain", - // Value of "file" should be a path relative to the - // directory of index.json file. - "file": "cc0.txt" - } - // If a markdown version of CC0 was provided, we could add this: - // { - // "format": "text/markdown", - // "file": "cc0.md" - // } - ] - - // If needed, a "comment" field can be added to clarify something. - // For example, when definind "Expat" license we could add: - // - // "comment": "Expat license is the most common form of the license often called \"MIT\". Many other forms of \"MIT\" license exist. Here the name \"Expat\" is used to avoid ambiguity." - - // If applicable, a "notice" can be included. It shall then be an - // object with "file" field containing a path (relative to - // index.json's directory) to a plain text file with that notice. - // - // "notice": { - // "file": "license-notice.txt" - // } - // - // This is needed for example in case of GNU licenses (both with and - // without exceptions). For instance, - // "GPL-3.0-or-later-with-html-exception" could have the following - // in its notice file: - // - // This program is free software: you can redistribute it and/or - // modify it under the terms of the GNU General Public License as - // published by the Free Software Foundation, either version 3 of - // the License, or (at your option) any later version. - // - // This program is distributed in the hope that it will be useful, - // but WITHOUT ANY WARRANTY; without even the implied warranty of - // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - // GNU General Public License for more details. - // - // As a special exception to the GPL, any HTML file which merely - // makes function calls to this code, and for that purpose - // includes it by reference shall be deemed a separate work for - // copyright law purposes. If you modify this code, you may extend - // this exception to your version of the code, but you are not - // obligated to do so. If you do not wish to do so, delete this - // exception statement from your version. - // - // You should have received a copy of the GNU General Public License - // along with this program. If not, see - // <https://www.gnu.org/licenses/>. - } - ] -} diff --git a/src/test/example_content/hello/message.js b/src/test/example_content/hello/message.js deleted file mode 100644 index da5966d..0000000 --- a/src/test/example_content/hello/message.js +++ /dev/null @@ -1,8 +0,0 @@ -// SPDX-License-Identifier: CC0-1.0 - -// Copyright (C) 2021 Wojtek Kosior -// -// Available under the terms of Creative Commons Zero v1.0 Universal. - -var hello_message = "hello, " -var bye_message = "bye, " diff --git a/src/test/source-package-example b/src/test/source-package-example new file mode 160000 +Subproject e571b3911f198e3feccc8d06390c79131f9cf09 diff --git a/src/test/test_pydrilla.py b/src/test/test_pydrilla.py deleted file mode 100644 index 50757a7..0000000 --- a/src/test/test_pydrilla.py +++ /dev/null @@ -1,153 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later - -# Repository tests -# -# This file is part of Hydrilla -# -# Copyright (C) 2021 Wojtek Kosior -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see <https://www.gnu.org/licenses/>. -# -# -# I, Wojtek Kosior, thereby promise not to sue for violation of this -# file's license. Although I request that you do not make use this code -# in a proprietary program, I am not going to enforce this in court. - -import pytest -import sys -import shutil -from pathlib import Path -from hashlib import sha256 -from os import mkdir, unlink, environ -import json -from markupsafe import escape - -from pydrilla import pydrilla, create_app - -test_dir = Path(__file__).resolve().parent -packages_dir = test_dir.parent -development_config_path = test_dir / 'development_config.json' -example_content_dir = test_dir / 'example_content' - -@pytest.fixture -def client(): - app = create_app(development_config_path, flask_config={'TESTING': True}) - - with app.test_client() as client: - yield client - -@pytest.fixture -def development_config(): - with open(development_config_path) as config_file: - yield json.loads(pydrilla.strip_json_comments(config_file.read())) - -def test_api_basic(client, development_config): - def verify_sha256sum(source_name, file_object): - with open(example_content_dir / source_name / file_object['file'], - mode='rb') as file: - assert sha256(file.read()).digest().hex() == file_object['sha256'] - - response = client.get('/') - assert b'html' in response.data - sources_uri = development_config['hydrilla_sources_uri'] - assert escape(sources_uri).encode() in response.data - - for item_type in ['mapping', 'resource']: - response = client.get(f'/{item_type}s/helloapple') - assert response.status_code == 200 - definition = json.loads(response.data.decode()) - assert definition['type'] == item_type - assert definition['source_name'] == 'hello' - assert definition['version'] == [2021, 11, 10] - if item_type == 'resource': - assert type(definition['scripts']) is list - assert len(definition['scripts']) > 0 - for script_file in definition['scripts']: - verify_sha256sum(definition['source_name'], script_file) - - response = client.get(f'/{item_type}s/helloapple?ver=2021.11.10.0') - assert response.status_code == 200 - assert definition == json.loads(response.data.decode()) - - response = client.get(f'/{item_type}s/helloapple?ver=2021.11.10.999') - assert response.status_code == 404 - - response = client.get(f'/{item_type}s/helloapple?ver=random_bad_input') - assert response.status_code == 400 - - response = client.get(f'/{item_type}s/random-bad-identifier') - assert response.status_code == 404 - - response = client.get(f'/{item_type}s/helloapple?ver=all') - assert response.status_code == 200 - definitions = json.loads(response.data.decode()) - assert type(definitions) is list - assert all([d['type'] == item_type for d in definitions]) - assert any([d['version'] == [2021, 11, 10] for d in definitions]) - - response = client.get('/licenses/CC0-1.0') - assert response.status_code == 200 - definition = json.loads(response.data.decode()) - assert definition['type'] == 'license' - assert definition['long_name'] == 'Creative Commons Zero v1.0 Universal' - assert definition['source_name'] == 'hello' - - assert type(definition['legal_text']) is list - assert len(definition['legal_text']) > 0 - for license_file in definition['legal_text']: - verify_sha256sum(definition['source_name'], license_file) - - response = client.get('/licenses/random-bad-identifier') - assert response.status_code == 404 - - response = client.get('/sources/hello') - assert response.status_code == 200 - definition = json.loads(response.data.decode()) - - assert definition['source_name'] == 'hello' - assert type(definition['schema_version']) is list - - response = client.get('/sources/random-bad-identifier') - assert response.status_code == 404 - - response = client.get('/query?url=https://hachettebugs.koszko.org') - assert response.status_code == 200 - definitions = json.loads(response.data.decode()) - assert type(definitions) is list - assert all([d['type'] == 'mapping' for d in definitions]) - assert any([p['pattern'] == 'https://hachettebugs.koszko.org/***' - for d in definitions for p in d['payloads']]) - - response = client.get('/query?url=https://random_bad_domain.org/something') - assert response.status_code == 200 - definitions = json.loads(response.data.decode()) - assert definitions == [] - - resource_uri = development_config['static_resource_uri'] - response = client.get('/sources/hello/hello.js') - assert response.status_code == 301 - assert response.location == resource_uri + 'hello/hello.js' - response = client.get('/sources/random-bad-identifier/hello.js') - assert response.status_code == 404 - response = client.get('/sources/hello/random/bad/path') - assert response.status_code == 301 - assert response.location == resource_uri + 'hello/random/bad/path' - -def test_normalize_version(): - assert pydrilla.normalize_version([4, 5, 3, 0, 0]) == [4, 5, 3] - assert pydrilla.normalize_version([1, 0, 5, 0]) == [1, 0, 5] - assert pydrilla.normalize_version([3, 3]) == [3, 3] - -def test_strip_json_comments(development_config): - assert development_config['static_resource_uri'] == 'http://localhost:8000/' diff --git a/src/test/test_server.py b/src/test/test_server.py new file mode 100644 index 0000000..def48dc --- /dev/null +++ b/src/test/test_server.py @@ -0,0 +1,199 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +# Repository tests +# +# This file is part of Hydrilla +# +# Copyright (C) 2021, 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +import pytest +import sys +import shutil +import json + +from pathlib import Path +from hashlib import sha256 +from tempfile import TemporaryDirectory +from typing import Iterable, Callable + +from flask.testing import FlaskClient +from markupsafe import escape + +from hydrilla import util as hydrilla_util +from hydrilla.builder import Build +from hydrilla.server import create_app + +here = Path(__file__).resolve().parent +config_path = here / 'development_config.json' +source_path = here / 'source-package-example' + +@pytest.fixture(scope="session") +def default_setup() -> Iterable[dict[str, Path]]: + with TemporaryDirectory() as tmpdir: + setup = { + 'malcontent_dir': Path(tmpdir) / 'sample_malcontent', + 'config_path': Path(tmpdir) / 'config.json', + 'containing_dir': Path(tmpdir) + } + + setup['config_path'].symlink_to(config_path) + + build = Build(source_path, Path('index.json')) + build.write_package_files(setup['malcontent_dir']) + + yield setup + +@pytest.fixture(scope="session") +def client(default_setup: dict[str, Path]) -> Iterable[FlaskClient]: + """Provide app client that serves the object from built sample package.""" + app = create_app(default_setup['config_path'], + flask_config={'TESTING': True}) + + with app.test_client() as client: + yield client + +@pytest.fixture(scope="session") +def development_config(default_setup) -> Iterable[dict]: + """Provide the contents of JSON config file fed to the client.""" + contents = default_setup['config_path'].read_text() + yield json.loads(hydrilla_util.strip_json_comments(contents)) + +def test_project_url(client: FlaskClient, development_config: dict) -> None: + """Fetch index.html and verify project URL fro config is present there.""" + response = client.get('/') + assert b'html' in response.data + project_url = development_config['hydrilla_project_url'] + assert escape(project_url).encode() in response.data + +@pytest.mark.parametrize('item_type', ['resource', 'mapping']) +def test_get_newest(client: FlaskClient, item_type: str) -> None: + """ + Verify that + GET '/{item_type}/{item_identifier}.json' + returns proper definition that is also served at: + GET '/{item_type}/{item_identifier}/{item_version}' + """ + response = client.get(f'/{item_type}/helloapple.json') + assert response.status_code == 200 + definition = json.loads(response.data.decode()) + assert definition['type'] == item_type + assert definition['identifier'] == 'helloapple' + + response = client.get(f'/{item_type}/helloapple/2021.11.10') + assert response.status_code == 200 + assert definition == json.loads(response.data.decode()) + + hydrilla_util.validator_for(f'api_{item_type}_description-1.schema.json')\ + .validate(definition) + +@pytest.mark.parametrize('item_type', ['resource', 'mapping']) +def test_get_nonexistent(client: FlaskClient, item_type: str) -> None: + """ + Verify that attempts to GET a JSON definition of a nonexistent item or item + version result in 404. + """ + response = client.get(f'/{item_type}/nonexistentapple.json') + assert response.status_code == 404 + response = client.get(f'/{item_type}/helloapple/1.2.3.999') + assert response.status_code == 404 + +@pytest.mark.parametrize('item_type', ['resource', 'mapping']) +def test_file_refs(client: FlaskClient, item_type: str) -> None: + """ + Verify that files referenced by definitions are accessible under their + proper URLs and that their hashes match. + """ + response = client.get(f'/{item_type}/helloapple/2021.11.10') + assert response.status_code == 200 + definition = json.loads(response.data.decode()) + + for file_ref in [*definition.get('scripts', []), + *definition['source_copyright']]: + hash_sum = file_ref["sha256"] + response = client.get(f'/file/sha256-{hash_sum}') + + assert response.status_code == 200 + assert sha256(response.data).digest().hex() == hash_sum + +def test_empty_query(client: FlaskClient) -> None: + """ + Verify that querying mappings for URL gives an empty list when there're no + mathes. + """ + response = client.get(f'/query?url=https://nonexiste.nt/example') + assert response.status_code == 200 + + response_object = json.loads(response.data.decode()) + + assert response_object['mappings'] == [] + + hydrilla_util.validator_for('api_query_result-1.schema.json')\ + .validate(response_object) + +def test_query(client: FlaskClient) -> None: + """ + Verify that querying mappings for URL gives a list with reference(s) the the + matching mapping(s). + """ + response = client.get(f'/query?url=https://hydrillabugs.koszko.org/') + assert response.status_code == 200 + + response_object = json.loads(response.data.decode()) + + assert response_object['mappings'] == [{ + 'identifier': 'helloapple', + 'long_name': 'Hello Apple', + 'version': [2021, 11, 10] + }] + + hydrilla_util.validator_for('api_query_result-1.schema.json')\ + .validate(response_object) + +def test_source(client: FlaskClient) -> None: + """Verify source descriptions are properly served.""" + response = client.get(f'/source/hello.json') + assert response.status_code == 200 + + description = json.loads(response.data.decode()) + assert description['source_name'] == 'hello' + + assert sorted([d['identifier'] for d in description['definitions']]) == \ + ['hello-message', 'helloapple', 'helloapple'] + + zipfile_hash = description['source_archives']['zip']['sha256'] + response = client.get(f'/source/hello.zip') + assert sha256(response.data).digest().hex() == zipfile_hash + + hydrilla_util.validator_for('api_source_description-1.schema.json')\ + .validate(description) + +def test_missing_source(client: FlaskClient) -> None: + """Verify requests for nonexistent sources result in 404.""" + response = client.get(f'/source/nonexistent.json') + assert response.status_code == 404 + + response = client.get(f'/source/nonexistent.zip') + assert response.status_code == 404 + +def test_normalize_version(): + assert hydrilla_util.normalize_version([4, 5, 3, 0, 0]) == [4, 5, 3] + assert hydrilla_util.normalize_version([1, 0, 5, 0]) == [1, 0, 5] + assert hydrilla_util.normalize_version([3, 3]) == [3, 3] |