aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorWojtek Kosior <koszko@koszko.org>2022-02-08 15:29:49 +0100
committerWojtek Kosior <koszko@koszko.org>2022-02-09 15:34:46 +0100
commit6676b4ed90e19e2fd6ee5f4242cf85f64db145d8 (patch)
tree42b45c6ed731abeab85e160b020bc57cab638fff /src
parent67631e6c5db6739f7a57958d222e5af7ebc364b0 (diff)
downloadhaketilo-hydrilla-6676b4ed90e19e2fd6ee5f4242cf85f64db145d8.tar.gz
haketilo-hydrilla-6676b4ed90e19e2fd6ee5f4242cf85f64db145d8.zip
rework Hydrilla to use a separate tool for building its source packages
* Hydrilla now depends on "Hydrilla builder" developed at: https://git.koszko.org/hydrilla-builder/ * Hydrilla repository is now REUSE-compliant * The debian packaging is temporarily not tested and likely to be broken * JSON schemas are now in use (through 'jsonschema' Python library) * This is not yet a release and some minor changes to the API on-fisk format are going to occur before that
Diffstat (limited to 'src')
-rw-r--r--src/hydrilla/__init__.py7
-rw-r--r--src/hydrilla/server/__init__.py7
-rw-r--r--src/hydrilla/server/config.json24
-rw-r--r--src/hydrilla/server/locales/en/LC_MESSAGES/hydrilla.po (renamed from src/pydrilla/locales/en/LC_MESSAGES/pydrilla.po)0
-rw-r--r--src/hydrilla/server/serve.py604
-rw-r--r--src/hydrilla/server/templates/base.html (renamed from src/pydrilla/templates/base.html)4
-rw-r--r--src/hydrilla/server/templates/index.html (renamed from src/pydrilla/templates/index.html)2
-rw-r--r--src/hydrilla_dev_helper.py (renamed from src/pydrilla_dev_helper.py)49
-rw-r--r--src/pydrilla/__init__.py1
-rw-r--r--src/pydrilla/config.json14
-rw-r--r--src/pydrilla/pydrilla.py755
-rw-r--r--src/test/__init__.py5
-rw-r--r--src/test/development_config.json17
-rw-r--r--src/test/example_content/hello/bye.js7
-rw-r--r--src/test/example_content/hello/cc0.txt121
-rw-r--r--src/test/example_content/hello/hello.js7
-rw-r--r--src/test/example_content/hello/index.json302
-rw-r--r--src/test/example_content/hello/message.js8
m---------src/test/source-package-example0
-rw-r--r--src/test/test_pydrilla.py153
-rw-r--r--src/test/test_server.py199
21 files changed, 888 insertions, 1398 deletions
diff --git a/src/hydrilla/__init__.py b/src/hydrilla/__init__.py
new file mode 100644
index 0000000..6aeb276
--- /dev/null
+++ b/src/hydrilla/__init__.py
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: 0BSD
+
+# Copyright (C) 2013-2020, PyPA
+
+# https://packaging.python.org/en/latest/guides/packaging-namespace-packages/#pkgutil-style-namespace-packages
+
+__path__ = __import__('pkgutil').extend_path(__path__, __name__)
diff --git a/src/hydrilla/server/__init__.py b/src/hydrilla/server/__init__.py
new file mode 100644
index 0000000..f5a799e
--- /dev/null
+++ b/src/hydrilla/server/__init__.py
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: CC0-1.0
+
+# Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org>
+#
+# Available under the terms of Creative Commons Zero v1.0 Universal.
+
+from .serve import create_app
diff --git a/src/hydrilla/server/config.json b/src/hydrilla/server/config.json
new file mode 100644
index 0000000..7c9f22b
--- /dev/null
+++ b/src/hydrilla/server/config.json
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: CC0-1.0
+
+// Default Hydrilla config file.
+//
+// Copyright (C) 2021, 2022 Wojtek Kosior
+//
+// Available under the terms of Creative Commons Zero v1.0 Universal.
+
+{
+ // Relative path to directory from which Hydrilla will load packages
+ // metadata and serve files.
+ // Deliberately avoiding word "content", see:
+ // http://www.gnu.org/philosophy/words-to-avoid.en.html#Content
+ "malcontent_dir": "/var/lib/hydrilla/malcontent",
+
+ // Hydrilla will display this link to users as a place where they can
+ // obtain sources for its software. This config option is meant to ease
+ // compliance with the AGPL.
+ "hydrilla_project_url": "https://hydrillabugs.koszko.org/projects/hydrilla/wiki",
+
+ // Tell Hydrilla to look for additional configuration in those files, in
+ // this order.
+ "try_configs": ["/etc/hydrilla/config.json"]
+}
diff --git a/src/pydrilla/locales/en/LC_MESSAGES/pydrilla.po b/src/hydrilla/server/locales/en/LC_MESSAGES/hydrilla.po
index f9e6a82..f9e6a82 100644
--- a/src/pydrilla/locales/en/LC_MESSAGES/pydrilla.po
+++ b/src/hydrilla/server/locales/en/LC_MESSAGES/hydrilla.po
diff --git a/src/hydrilla/server/serve.py b/src/hydrilla/server/serve.py
new file mode 100644
index 0000000..815ac63
--- /dev/null
+++ b/src/hydrilla/server/serve.py
@@ -0,0 +1,604 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+# Main repository logic.
+#
+# This file is part of Hydrilla
+#
+# Copyright (C) 2021, 2022 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+import re
+import os
+import pathlib
+import json
+import gettext
+import logging
+
+from pathlib import Path
+from hashlib import sha256
+from abc import ABC, abstractmethod
+from typing import Optional, Union, Iterable
+
+from flask import Flask, Blueprint, current_app, url_for, abort, request, \
+ redirect, send_file
+from jinja2 import Environment, PackageLoader
+from werkzeug import Response
+
+from .. import util
+
+here = pathlib.Path(__file__).resolve().parent
+
+def load_config(config_path: Path) -> dict:
+ config = {}
+ to_load = [config_path]
+ failures_ok = [False]
+
+ while to_load:
+ path = to_load.pop()
+ can_fail = failures_ok.pop()
+
+ try:
+ json_text = util.strip_json_comments(config_path.read_text())
+ new_config = json.loads(json_text)
+ except Exception as e:
+ if can_fail:
+ continue
+ raise e from None
+
+ config.update(new_config)
+
+ for key, failure_ok in [('try_configs', True), ('use_configs', False)]:
+ paths = new_config.get(key, [])
+ paths.reverse()
+ to_load.extend(paths)
+ failures_ok.extend([failure_ok] * len(paths))
+
+ for key in ('try_configs', 'use_configs'):
+ if key in config:
+ config.pop(key)
+
+ for key in ('malcontent_dir', 'hydrilla_project_url'):
+ if key not in config:
+ raise ValueError(_('config_key_absent_{}').format(key))
+
+ malcontent_path = Path(config['malcontent_dir'])
+ if not malcontent_path.is_absolute():
+ malcontent_path = config_path.parent / malcontent_path
+
+ config['malcontent_dir'] = str(malcontent_path.resolve())
+
+ return config
+
+class ItemInfo(ABC):
+ """Shortened data of a resource/mapping."""
+ def __init__(self, item_obj: dict):
+ """Initialize ItemInfo using item definition read from JSON."""
+ self.version = util.normalize_version(item_obj['version'])
+ self.identifier = item_obj['identifier']
+ self.uuid = item_obj['uuid']
+ self.long_name = item_obj['long_name']
+
+ def path(self) -> str:
+ """
+ Get a relative path to this item's JSON definition with respect to
+ directory containing items of this type.
+ """
+ return f'{self.identifier}/{util.version_string(self.version)}'
+
+class ResourceInfo(ItemInfo):
+ """Shortened data of a resource."""
+ def __init__(self, resource_obj: dict):
+ """Initialize ResourceInfo using resource definition read from JSON."""
+ super().__init__(resource_obj)
+
+ self.dependencies = resource_obj.get('dependencies', [])
+
+class MappingInfo(ItemInfo):
+ """Shortened data of a mapping."""
+ def __init__(self, mapping_obj: dict):
+ """Initialize MappingInfo using mapping definition read from JSON."""
+ super().__init__(mapping_obj)
+
+ self.payloads = {}
+ for pattern, res_ref in mapping_obj.get('payloads', {}).items():
+ self.payloads[pattern] = res_ref['identifier']
+
+ def as_query_result(self) -> str:
+ """
+ Produce a json.dump()-able object describing this mapping as one of a
+ collection of query results.
+ """
+ return {
+ 'version': self.version,
+ 'identifier': self.identifier,
+ 'long_name': self.long_name
+ }
+
+class VersionedItemInfo:
+ """Stores data of multiple versions of given resource/mapping."""
+ def __init__(self):
+ self.uuid = None
+ self.identifier = None
+ self.by_version = {}
+ self.known_versions = []
+
+ def register(self, item_info: ItemInfo) -> None:
+ """
+ Make item info queryable by version. Perform sanity checks for uuid.
+ """
+ if self.identifier is None:
+ self.identifier = item_info.identifier
+ self.uuid = item_info.uuid
+ elif self.uuid != item_info.uuid:
+ raise ValueError(_('uuid_mismatch_{identifier}')
+ .format(identifier=self.identifier))
+
+ ver = item_info.version
+ ver_str = util.version_string(ver)
+
+ if ver_str in self.by_version:
+ raise ValueError(_('version_clash_{identifier}_{version}')
+ .format(identifier=self.identifier,
+ version=ver_str))
+
+ self.by_version[ver_str] = item_info
+ self.known_versions.append(ver)
+
+ def get_by_ver(self, ver: Optional[list[int]]=None) -> Optional[ItemInfo]:
+ """
+ Find and return info of the newest version of item.
+
+ If ver is specified, instead find and return info of that version of the
+ item (or None if absent).
+ """
+ ver = util.version_string(ver or self.known_versions[-1])
+
+ return self.by_version.get(ver)
+
+ def get_all(self) -> list[ItemInfo]:
+ """
+ Return a list of item info for all its versions, from oldest ot newest.
+ """
+ return [self.by_version[util.version_string(ver)]
+ for ver in self.known_versions]
+
+class PatternTreeNode:
+ """
+ "Pattern Tree" is how we refer to the data structure used for querying
+ Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal
+ is to make it possible for given URL to quickly retrieve all known patterns
+ that match it.
+ """
+ def __init__(self):
+ self.wildcard_matches = [None, None, None]
+ self.literal_match = None
+ self.children = {}
+
+ def search(self, segments):
+ """
+ Yields all matches of this segments sequence against the tree that
+ starts at this node. Results are produces in order from greatest to
+ lowest pattern specificity.
+ """
+ nodes = [self]
+
+ for segment in segments:
+ next_node = nodes[-1].children.get(segment)
+ if next_node is None:
+ break
+
+ nodes.append(next_node)
+
+ nsegments = len(segments)
+ cond_literal = lambda: len(nodes) == nsegments
+ cond_wildcard = [
+ lambda: len(nodes) + 1 == nsegments and segments[-1] != '*',
+ lambda: len(nodes) + 1 < nsegments,
+ lambda: len(nodes) + 1 != nsegments or segments[-1] != '***'
+ ]
+
+ while nodes:
+ node = nodes.pop()
+
+ for item, condition in [(node.literal_match, cond_literal),
+ *zip(node.wildcard_matches, cond_wildcard)]:
+ if item is not None and condition():
+ yield item
+
+ def add(self, segments, item_instantiator):
+ """
+ Make item queryable through (this branch of) the Pattern Tree. If there
+ was not yet any item associated with the tree path designated by
+ segments, create a new one using item_instantiator() function. Return
+ all items matching this path (both the ones that existed and the ones
+ just created).
+ """
+ node = self
+ segment = None
+
+ for segment in segments:
+ wildcards = node.wildcard_matches
+
+ child = node.children.get(segment) or PatternTreeNode()
+ node.children[segment] = child
+ node = child
+
+ if node.literal_match is None:
+ node.literal_match = item_instantiator()
+
+ if segment not in ('*', '**', '***'):
+ return [node.literal_match]
+
+ if wildcards[len(segment) - 1] is None:
+ wildcards[len(segment) - 1] = item_instantiator()
+
+ return [node.literal_match, wildcards[len(segment) - 1]]
+
+proto_regex = re.compile(r'^(?P<proto>\w+)://(?P<rest>.*)$')
+user_re = r'[^/?#@]+@' # r'(?P<user>[^/?#@]+)@' # discarded for now
+query_re = r'\??[^#]*' # r'\??(?P<query>[^#]*)' # discarded for now
+domain_re = r'(?P<domain>[^/?#]+)'
+path_re = r'(?P<path>[^?#]*)'
+http_regex = re.compile(f'{domain_re}{path_re}{query_re}.*')
+ftp_regex = re.compile(f'(?:{user_re})?{domain_re}{path_re}.*')
+
+class UrlError(ValueError):
+ """Used to report a URL or URL pattern that is invalid or unsupported."""
+ pass
+
+class DeconstructedUrl:
+ """Represents a deconstructed URL or URL pattern"""
+ def __init__(self, url):
+ self.url = url
+
+ match = proto_regex.match(url)
+ if not match:
+ raise UrlError(_('invalid_URL_{}').format(url))
+
+ self.proto = match.group('proto')
+ if self.proto not in ('http', 'https', 'ftp'):
+ raise UrlError(_('disallowed_protocol_{}').format(proto))
+
+ if self.proto == 'ftp':
+ match = ftp_regex.match(match.group('rest'))
+ elif self.proto in ('http', 'https'):
+ match = http_regex.match(match.group('rest'))
+
+ if not match:
+ raise UrlError(_('invalid_URL_{}').format(url))
+
+ self.domain = match.group('domain').split('.')
+ self.domain.reverse()
+ self.path = [*filter(None, match.group('path').split('/'))]
+
+class PatternMapping:
+ """
+ A mapping info, together with one of its patterns, as stored in Pattern
+ Tree.
+ """
+ def __init__(self, pattern: str, mapping_info: MappingInfo):
+ self.pattern = pattern
+ self.mapping_info = mapping_info
+
+ def register(self, pattern_tree: dict):
+ """
+ Make self queryable through the Pattern Tree passed in the argument.
+ """
+ deco = DeconstructedUrl(self.pattern)
+
+ domain_tree = pattern_tree.get(deco.proto) or PatternTreeNode()
+ pattern_tree[deco.proto] = domain_tree
+
+ for path_tree in domain_tree.add(deco.domain, PatternTreeNode):
+ for match_list in path_tree.add(deco.path, list):
+ match_list.append(self)
+
+class Malcontent:
+ """
+ Instance of this class represents a directory with files that can be loaded
+ and served by Hydrilla.
+ """
+ def __init__(self, malcontent_dir_path: Union[Path, str]):
+ """
+ When an instance of Malcontent is constructed, it searches
+ malcontent_dir_path for serveable site-modifying packages and loads
+ them into its data structures.
+ """
+ self.infos = {'resource': {}, 'mapping': {}}
+ self.pattern_tree = {}
+
+ self.malcontent_dir_path = pathlib.Path(malcontent_dir_path).resolve()
+
+ if not self.malcontent_dir_path.is_dir():
+ raise ValueError(_('malcontent_dir_path_not_dir'))
+
+ for item_type in ('mapping', 'resource'):
+ type_path = self.malcontent_dir_path / item_type
+ if not type_path.is_dir():
+ continue
+
+ for subpath in type_path.iterdir():
+ if not subpath.is_dir():
+ continue
+
+ for ver_file in subpath.iterdir():
+ try:
+ self._load_item(item_type, ver_file)
+ except Exception as e:
+ if current_app._hydrilla_werror:
+ raise e from None
+
+ msg = _('couldnt_load_item_from_{}').format(ver_file)
+ logging.error(msg, exc_info=True)
+
+ self._report_missing()
+ self._finalize()
+
+ def _load_item(self, item_type: str, ver_file: Path) -> None:
+ """
+ Reads, validates and autocompletes serveable mapping/resource
+ definition, then registers information from it in data structures.
+ """
+ version = util.parse_version(ver_file.name)
+ identifier = ver_file.parent.name
+
+ with open(ver_file, 'rt') as file_handle:
+ item_json = json.load(file_handle)
+
+ util.validator_for(f'api_{item_type}_description-1.schema.json')\
+ .validate(item_json)
+
+ if item_type == 'resource':
+ item_info = ResourceInfo(item_json)
+ else:
+ item_info = MappingInfo(item_json)
+
+ if item_info.identifier != identifier:
+ msg = _('item_{item}_in_file_{file}')\
+ .format({'item': item_info.identifier, 'file': ver_file})
+ raise ValueError(msg)
+
+ if item_info.version != version:
+ ver_str = util.version_string(item_info.version)
+ msg = _('item_version_{ver}_in_file_{file}')\
+ .format({'ver': ver_str, 'file': ver_file})
+ raise ValueError(msg)
+
+ versioned_info = self.infos[item_type].get(identifier)
+ if versioned_info is None:
+ versioned_info = VersionedItemInfo()
+ self.infos[item_type][identifier] = versioned_info
+
+ versioned_info.register(item_info)
+
+ def _all_of_type(self, item_type: str) -> Iterable[ItemInfo]:
+ """Iterator over all registered versions of all mappings/resources."""
+ for versioned_info in self.infos[item_type].values():
+ for item_info in versioned_info.by_version.values():
+ yield item_info
+
+ def _report_missing(self) -> None:
+ """
+ Use logger to print information about items that are referenced but
+ were not loaded.
+ """
+ def report_missing_dependency(info: ResourceInfo, dep: str) -> None:
+ msg = _('no_dep_%(resource)s_%(ver)s_%(dep)s')\
+ .format(dep=dep, resource=info.identifier,
+ ver=util.version_string(info.version))
+ logging.error(msg)
+
+ for resource_info in self._all_of_type('resource'):
+ for dep in resource_info.dependencies:
+ if dep not in self.infos['resource']:
+ report_missing_dependency(resource_info, dep)
+
+ def report_missing_payload(info: MappingInfo, payload: str) -> None:
+ msg = _('no_payload_{mapping}_{ver}_{payload}')\
+ .format(mapping=info.identifier, payload=payload,
+ ver=util.version_string(info.version))
+ logging.error(msg)
+
+ for mapping_info in self._all_of_type('mapping'):
+ for payload in mapping_info.payloads.values():
+ if payload not in self.infos['resource']:
+ report_missing_payload(mapping_info, payload)
+
+ def _finalize(self):
+ """
+ Initialize structures needed to serve queries. Called once after all
+ data gets loaded.
+ """
+ for infos_dict in self.infos.values():
+ for versioned_info in infos_dict.values():
+ versioned_info.known_versions.sort()
+
+ for info in self._all_of_type('mapping'):
+ for pattern in info.payloads:
+ try:
+ PatternMapping(pattern, info).register(self.pattern_tree)
+ except Exception as e:
+ if current_app._hydrilla_werror:
+ raise e from None
+ msg = _('couldnt_register_{mapping}_{ver}_{pattern}')\
+ .format(mapping=info.identifier, pattern=pattern,
+ ver=util.version_string(info.version))
+ logging.error(msg)
+
+ def query(self, url: str) -> list[MappingInfo]:
+ """
+ Return a list of registered mappings that match url.
+
+ If multiple versions of a mapping are applicable, only the most recent
+ is included in the result.
+ """
+ deco = DeconstructedUrl(url)
+
+ collected = {}
+
+ domain_tree = self.pattern_tree.get(deco.proto) or PatternTreeNode()
+
+ def process_mapping(pattern_mapping: PatternMapping) -> None:
+ if url[-1] != '/' and pattern_mapping.pattern[-1] == '/':
+ return
+
+ info = pattern_mapping.mapping_info
+
+ if info.identifier not in collected or \
+ info.version > collected[info.identifier].version:
+ collected[info.identifier] = info
+
+ for path_tree in domain_tree.search(deco.domain):
+ for matches_list in path_tree.search(deco.path):
+ for pattern_mapping in matches_list:
+ process_mapping(pattern_mapping)
+
+ return list(collected.values())
+
+bp = Blueprint('bp', __package__)
+
+def create_app(config_path: Path=(here / 'config.json'), flask_config: dict={}):
+ """Create the Flask instance."""
+ config = load_config(config_path)
+
+ app = Flask(__package__, static_url_path='/',
+ static_folder=config['malcontent_dir'])
+ app.config.update(flask_config)
+
+ language = flask_config.get('lang', 'en')
+ translation = gettext.translation('hydrilla', localedir=(here / 'locales'),
+ languages=[language])
+
+ app._hydrilla_gettext = translation.gettext
+
+ # https://stackoverflow.com/questions/9449101/how-to-stop-flask-from-initialising-twice-in-debug-mode
+ if app.debug and os.environ.get('WERKZEUG_RUN_MAIN') != 'true':
+ return app
+
+ app._hydrilla_project_url = config['hydrilla_project_url']
+ app._hydrilla_werror = config.get('werror', False)
+ if 'hydrilla_parent' in config:
+ raise MyNotImplError('hydrilla_parent', config_path.name)
+
+ malcontent_dir = pathlib.Path(config['malcontent_dir'])
+ if not malcontent_dir.is_absolute():
+ malcontent_dir = config_path.parent / malcontent_dir
+ with app.app_context():
+ app._hydrilla_malcontent = Malcontent(malcontent_dir.resolve())
+
+ app.register_blueprint(bp)
+
+ return app
+
+def _(text_key):
+ return current_app._hydrilla_gettext(text_key)
+
+def malcontent():
+ return current_app._hydrilla_malcontent
+
+# TODO: override create_jinja_environment() method of Flask instead of wrapping
+# Jinja environment
+class MyEnvironment(Environment):
+ """
+ A wrapper class around jinja2.Environment that causes GNU gettext function
+ (as '_' and '__'), url_for function and 'hydrilla_project_url' config option
+ to be passed to every call of each template's render() method.
+ """
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ def get_template(self, *args, **kwargs):
+ template = super().get_template(*args, **kwargs)
+ old_render = template.render
+
+ def new_render(*args, **kwargs):
+ _ = current_app._hydrilla_gettext
+ project_url = current_app._hydrilla_project_url
+
+ def escaping_gettext(text_key):
+ from markupsafe import escape
+
+ return str(escape(_(text_key)))
+
+ final_kwargs = {
+ '_': escaping_gettext,
+ '__': escaping_gettext,
+ 'url_for': url_for,
+ 'hydrilla_project_url' : project_url
+ }
+ final_kwargs.update(kwargs)
+
+ return old_render(*args, **final_kwargs)
+
+ template.render = new_render
+
+ return template
+
+j2env = MyEnvironment(loader=PackageLoader(__package__), autoescape=False)
+
+indexpage = j2env.get_template('index.html')
+@bp.route('/')
+def index():
+ return indexpage.render()
+
+identifier_json_re = re.compile(r'^([-0-9a-z.]+)\.json$')
+
+def get_resource_or_mapping(item_type: str, identifier: str) -> Response:
+ """
+ Strip '.json' from 'identifier', look the item up and send its JSON
+ description.
+ """
+ match = identifier_json_re.match(identifier)
+ if not match:
+ abort(404)
+
+ identifier = match.group(1)
+
+ versioned_info = malcontent().infos[item_type].get(identifier)
+
+ info = versioned_info and versioned_info.get_by_ver()
+ if info is None:
+ abort(404)
+
+ # no need for send_from_directory(); path is safe, constructed by us
+ return send_file(malcontent().malcontent_dir_path / item_type / info.path())
+
+@bp.route('/mapping/<string:identifier_dot_json>')
+def get_newest_mapping(identifier_dot_json: str) -> Response:
+ return get_resource_or_mapping('mapping', identifier_dot_json)
+
+@bp.route('/resource/<string:identifier_dot_json>')
+def get_newest_resource(identifier_dot_json: str) -> Response:
+ return get_resource_or_mapping('resource', identifier_dot_json)
+
+@bp.route('/query')
+def query():
+ url = request.args['url']
+
+ mapping_refs = [i.as_query_result() for i in malcontent().query(url)]
+ result = {
+ 'api_schema_version': [1],
+ 'generated_by': {
+ 'name': 'hydrilla'
+ },
+ 'mappings': mapping_refs
+ }
+
+ return json.dumps(result)
diff --git a/src/pydrilla/templates/base.html b/src/hydrilla/server/templates/base.html
index 7b26b64..f95ce54 100644
--- a/src/pydrilla/templates/base.html
+++ b/src/hydrilla/server/templates/base.html
@@ -1,4 +1,4 @@
-{# SPDX-License-Identifier: CC-BY-NC-SA-4.0
+{# SPDX-License-Identifier: CC-BY-SA-4.0 OR AGPL-3.0-or-later
Base HTML page template.
@@ -115,7 +115,7 @@ in a proprietary program, I am not going to enforce this in court.
Copyright &copy; Wojtek Kosior.
<br>
This page was generated by Hydrilla which is free/libre software.
- You can get a copy <a href="{{ hydrilla_sources_uri|e }}">here</a>.
+ You can get a copy <a href="{{ hydrilla_project_url|e }}">here</a>.
</div>
{% endblock %}
</body>
diff --git a/src/pydrilla/templates/index.html b/src/hydrilla/server/templates/index.html
index 2555df0..3063239 100644
--- a/src/pydrilla/templates/index.html
+++ b/src/hydrilla/server/templates/index.html
@@ -1,4 +1,4 @@
-{# SPDX-License-Identifier: CC-BY-NC-SA-4.0
+{# SPDX-License-Identifier: CC-BY-SA-4.0 OR AGPL-3.0-or-later
HTML index page template.
diff --git a/src/pydrilla_dev_helper.py b/src/hydrilla_dev_helper.py
index 88dc63e..925f414 100644
--- a/src/pydrilla_dev_helper.py
+++ b/src/hydrilla_dev_helper.py
@@ -37,24 +37,12 @@ import importlib
def mypath(path_or_string):
return Path(path_or_string).resolve()
-debrel_regex = re.compile(r'^[^(]*\([^-]*-([^)]*)\)')
-
-def extract_debrel(debian_dir):
- changelog_path = mypath(debian_dir) / 'changelog'
- with open(changelog_path) as changelog_file:
- try:
- return debrel_regex.match(changelog_file.readline())[1]
- except TypeError:
- raise RuntimeException('Cannot extract debrel from %s.' %
- changelog_path)
-
class Helper:
- def __init__(self, project_root, app_package_name, version, locales_dir,
+ def __init__(self, project_root, app_package_name, locales_dir,
locales=['en', 'pl'], default_locale='en', locale_domain=None,
packages_root=None, debian_dir=None, config_path=None):
self.project_root = mypath(project_root)
self.app_package_name = app_package_name
- self.version = version
self.locales_dir = mypath(locales_dir)
self.locales = locales
self.default_locale = default_locale
@@ -102,7 +90,7 @@ class Helper:
command = ['msgfmt', po_path, '-o', mo_path]
self.run_command(command, verbose=verbose, check=True)
- self.locale_files_list.extend([po_path, mo_path])
+ self.locale_files_list.extend([po_path, mo_path])
def locale_files(self):
if self.locale_files_list is None:
@@ -161,10 +149,37 @@ class Helper:
# we exclude these from the source archive we produce
bad_file_regex = re.compile(r'^\..*|build|debian|dist')
+ changelog_line_regex = re.compile(r'''
+ ^ # match from the beginning of each line
+ \s* # skip initial whitespace (if any)
+ (?P<source_name> # capture name
+ [^\s(]+
+ )
+ \s* # again skip whitespace (if any)
+ \(
+ (?P<version> # capture version which is enclosed in parantheses
+ [^)]+
+ )
+ -
+ (?P<debrel> # capture debrel part of version separately
+ [0-9]+
+ )
+ \)
+ ''', re.VERBOSE)
+
def make_tarballs(self, verbose=False):
- name=self.app_package_name
- ver=self.version
- debrel=extract_debrel(self.debian_dir)
+ changelog_path = self.project_root / 'debian' / 'changelog'
+ with open(changelog_path, 'rt') as file_handle:
+ for line in file_handle.readlines():
+ match = changelog_line_regex.match(line)
+ if match:
+ break
+
+ if not match:
+ raise ValueError("Couldn't extract version from debian/changelog.")
+
+ name, ver, debrel = \
+ [match.group(gn) for gn in ('source_name', 'version', 'debrel')]
source_dirname = f'{name}-{ver}'
source_tarball_name = f'{name}_{ver}.orig.tar.gz'
diff --git a/src/pydrilla/__init__.py b/src/pydrilla/__init__.py
deleted file mode 100644
index 8d1565b..0000000
--- a/src/pydrilla/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .pydrilla import create_app
diff --git a/src/pydrilla/config.json b/src/pydrilla/config.json
deleted file mode 100644
index 6bb5440..0000000
--- a/src/pydrilla/config.json
+++ /dev/null
@@ -1,14 +0,0 @@
-// SPDX-License-Identifier: CC0-1.0
-
-// Example Hydrilla config file.
-//
-// Copyright (C) 2021 Wojtek Kosior
-//
-// Available under the terms of Creative Commons Zero v1.0 Universal.
-
-{
- "content_dir": "/var/lib/hydrilla/content",
- "static_resource_uri": "http://localhost:8000/",
- "hydrilla_sources_uri": "https://git.koszko.org/pydrilla/",
- "try_configs": ["/etc/pydrilla/config.json"]
-}
diff --git a/src/pydrilla/pydrilla.py b/src/pydrilla/pydrilla.py
deleted file mode 100644
index d7aef76..0000000
--- a/src/pydrilla/pydrilla.py
+++ /dev/null
@@ -1,755 +0,0 @@
-# SPDX-License-Identifier: AGPL-3.0-or-later
-
-# Main repository logic.
-#
-# This file is part of Hydrilla
-#
-# Copyright (C) 2021 Wojtek Kosior
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see <https://www.gnu.org/licenses/>.
-#
-#
-# I, Wojtek Kosior, thereby promise not to sue for violation of this
-# file's license. Although I request that you do not make use this code
-# in a proprietary program, I am not going to enforce this in court.
-
-from flask import Flask, Blueprint, current_app, url_for, abort, request, \
- redirect
-from jinja2 import Environment, PackageLoader
-import re
-from hashlib import sha256
-import os
-import pathlib
-import json
-import gettext
-import logging
-
-SCHEMA_VERSION = [0, 2]
-
-strip_comment_re = re.compile(r'''
-^ # match from the beginning of each line
-( # catch the part before '//' comment
- (?: # this group matches either a string or a single out-of-string character
- [^"/] |
- "
- (?: # this group matches any in-a-string character
- [^"\\] | # match any normal character
- \\[^u] | # match any escaped character like '\f' or '\n'
- \\u[a-fA-F0-9]{4} # match an escape
- )*
- "
- )*
-)
-# expect either end-of-line or a comment:
-# * unterminated strings will cause matching to fail
-# * bad comment (with '/' instead of '//') will be indicated by second group
-# having length 1 instead of 2 or 0
-(//?|$)
-''', re.VERBOSE)
-
-def strip_json_comments(text):
- processed = 0
- stripped_text = []
- for line in text.split('\n'):
- match = strip_comment_re.match(line)
-
- if match is None: # unterminated string
- # ignore this error, let json module report it
- stripped = line
- elif len(match[2]) == 1:
- raise json.JSONDecodeError('bad comment', text,
- processed + len(match[1]))
- else:
- stripped = match[1]
-
- stripped_text.append(stripped)
- processed += len(line) + 1
-
- return '\n'.join(stripped_text)
-
-here = pathlib.Path(__file__).resolve().parent
-
-bp = Blueprint('bp', __package__)
-
-def load_config(config_path):
- config = {}
- to_load = [config_path]
- failures_ok = [False]
-
- while to_load:
- path = to_load.pop()
- can_fail = failures_ok.pop()
-
- try:
- with open(config_path) as config_file:
- new_config = json.loads(strip_json_comments(config_file.read()))
- except Exception as e:
- if can_fail:
- continue
- raise e from None
-
- config.update(new_config)
-
- for key, failure_ok in [('try_configs', True), ('use_configs', False)]:
- paths = new_config.get(key, [])
- paths.reverse()
- to_load.extend(paths)
- failures_ok.extend([failure_ok] * len(paths))
-
- for key in ['try_configs', 'use_configs']:
- if key in config:
- config.pop(key)
-
- return config
-
-def get_content_file_path(path):
- if os.path.sep != '/':
- path.replace('/', os.path.sep)
-
- path = pathlib.Path(path)
- if path.is_absolute():
- raise ValueError(_('path_is_absolute_{}').format(path))
-
- return path
-
-class MyNotImplError(NotImplementedError):
- '''Raised when a planned but not-yet-completed feature is used.'''
- def __init__(self, what, where):
- super().__init__(_('not_implemented_{what}_{where}')
- .format(what=what, where=where))
-
-def normalize_version(ver):
- '''
- ver is an array of integers. Strip right-most zeroes from ver.
-
- Returns a *new* array. Doesn't modify its argument.
- '''
- new_len = 0
- for i, num in enumerate(ver):
- if num != 0:
- new_len = i + 1
-
- return ver[:new_len]
-
-def parse_version(ver_str):
- '''
- Convert ver_str into an array representation, e.g. for ver_str="4.6.13.0"
- return [4, 6, 13, 0].
- '''
- return [int(num) for num in ver_str.split('.')]
-
-def version_string(ver, rev=None):
- '''
- ver is an array of integers. rev is an optional integer. Produce string
- representation of version (optionally with revision number), like:
- 1.2.3-5
- No version normalization is performed.
- '''
- return '.'.join([str(n) for n in ver]) + ('' if rev is None else f'-{rev}')
-
-class VersionedContentItem:
- '''Stores definitions of multiple versions of website content item.'''
- def __init__(self):
- self.uuid = None
- self.identifier = None
- self.by_version = {}
- self.known_versions = []
-
- def register_item(self, item):
- '''Make item queryable by version. Perform sanity checks for uuid.'''
- if self.identifier is None:
- self.identifier = item['identifier']
- self.uuid = item['uuid']
- elif self.uuid != item['uuid']:
- raise ValueError(_('uuid_mismatch_{identifier}')
- .format(identifier=self.identifier))
-
- ver = item['version']
- ver_str = version_string(ver)
-
- if ver_str in self.by_version:
- raise ValueError(_('version_clash_{identifier}_{version}')
- .format(identifier=self.identifier,
- version=ver_str))
-
- self.by_version[ver_str] = item
- self.known_versions.append(ver)
-
- def get_by_ver(self, ver=None):
- '''
- Find and return definition of the newest version of item.
-
- If ver is specified, instead find and return definition of that version
- of the item (or None is absent).
- '''
- ver = version_string(ver or self.known_versions[-1])
-
- return self.by_version.get(ver)
-
- def get_all(self):
- '''Return a list of all definitions of item, ordered by version.'''
- return [self.by_version[version_string(ver)]
- for ver in self.known_versions]
-
-class PatternTreeNode:
- '''
- "Pattern Tree" is how we refer to the data structure used for querying
- Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal
- is to make it possible for given URL to quickly retrieve all known patterns
- that match it.
- '''
- def __init__(self):
- self.wildcard_matches = [None, None, None]
- self.literal_match = None
- self.children = {}
-
- def search(self, segments):
- '''
- Yields all matches of this segments sequence against the tree that
- starts at this node. Results are produces in order from greatest to
- lowest pattern specificity.
- '''
- nodes = [self]
-
- for segment in segments:
- next_node = nodes[-1].children.get(segment)
- if next_node is None:
- break
-
- nodes.append(next_node)
-
- nsegments = len(segments)
- cond_literal = lambda: len(nodes) == nsegments
- cond_wildcard = [
- lambda: len(nodes) + 1 == nsegments and segments[-1] != '*',
- lambda: len(nodes) + 1 < nsegments,
- lambda: len(nodes) + 1 != nsegments or segments[-1] != '***'
- ]
-
- while nodes:
- node = nodes.pop()
-
- for item, condition in [(node.literal_match, cond_literal),
- *zip(node.wildcard_matches, cond_wildcard)]:
- if item is not None and condition():
- yield item
-
- def add(self, segments, item_instantiator):
- '''
- Make item queryable through (this branch of) the Pattern Tree. If there
- was not yet any item associated with the tree path designated by
- segments, create a new one using item_instantiator() function. Return
- all items matching this path (both the ones that existed and the ones
- just created).
- '''
- node = self
- segment = None
-
- for segment in segments:
- wildcards = node.wildcard_matches
-
- child = node.children.get(segment) or PatternTreeNode()
- node.children[segment] = child
- node = child
-
- if node.literal_match is None:
- node.literal_match = item_instantiator()
-
- if segment not in ('*', '**', '***'):
- return [node.literal_match]
-
- if wildcards[len(segment) - 1] is None:
- wildcards[len(segment) - 1] = item_instantiator()
-
- return [node.literal_match, wildcards[len(segment) - 1]]
-
-proto_regex = re.compile(r'^(?P<proto>\w+)://(?P<rest>.*)$')
-user_re = r'[^/?#@]+@' # r'(?P<user>[^/?#@]+)@' # discarded for now
-query_re = r'\??[^#]*' # r'\??(?P<query>[^#]*)' # discarded for now
-domain_re = r'(?P<domain>[^/?#]+)'
-path_re = r'(?P<path>[^?#]*)'
-http_regex = re.compile(f'{domain_re}{path_re}{query_re}.*')
-ftp_regex = re.compile(f'(?:{user_re})?{domain_re}{path_re}.*')
-
-class UrlError(ValueError):
- pass
-
-class DeconstructedUrl:
- '''Represents a deconstructed URL or URL pattern'''
- def __init__(self, url):
- self.url = url
-
- match = proto_regex.match(url)
- if not match:
- raise UrlError(_('invalid_URL_{}').format(url))
-
- self.proto = match.group('proto')
- if self.proto not in ('http', 'https', 'ftp'):
- raise UrlError(_('disallowed_protocol_{}').format(proto))
-
- if self.proto == 'ftp':
- match = ftp_regex.match(match.group('rest'))
- elif self.proto in ('http', 'https'):
- match = http_regex.match(match.group('rest'))
-
- if not match:
- raise UrlError(_('invalid_URL_{}').format(url))
-
- self.domain = match.group('domain').split('.')
- self.domain.reverse()
- self.path = [*filter(None, match.group('path').split('/'))]
-
-class MappingItem:
- '''
- A mapping, together with one of its patterns, as stored in Pattern Tree.
- '''
- def __init__(self, pattern, mapping):
- self.pattern = pattern
- self.mapping = mapping
-
- def register(self, patterns_by_proto):
- '''
- Make self queryable through the Pattern Tree that starts with the
- protocols dictionary passed in the argument.
- '''
- deco = DeconstructedUrl(self.pattern)
-
- domain_tree = patterns_by_proto.get(deco.proto) or PatternTreeNode()
- patterns_by_proto[deco.proto] = domain_tree
-
- for path_tree in domain_tree.add(deco.domain, PatternTreeNode):
- for match_list in path_tree.add(deco.path, list):
- match_list.append(self)
-
-class Content:
- '''Stores serveable website content.'''
- def __init__(self, content_dir_path):
- '''
- When an instance of Content is constructed, it searches
- content_dir_path for custom serveable site content and loads it.
- '''
- self.resources = {}
- self.mappings = {}
- self.licenses = {}
- self.indexes = {}
- self.definition_processors = {
- 'resource': self._process_resource_or_mapping,
- 'mapping': self._process_resource_or_mapping,
- 'license': self._process_license
- }
- self.patterns_by_proto = {}
- self.file_sha256sums = {}
-
- self.content_dir_path = pathlib.Path(content_dir_path).resolve()
-
- if not self.content_dir_path.is_dir():
- raise ValueError(_('content_dir_path_not_dir'))
-
- for subdir_path in self.content_dir_path.iterdir():
- if not subdir_path.is_dir():
- continue
- try:
- self._load_content_from_subdir(subdir_path, subdir_path.name)
- except Exception as e:
- if current_app._pydrilla_werror:
- raise e from None
- logging.error(_('couldnt_load_content_from_%s'), subdir_path,
- exc_info=True)
-
- self._report_missing()
- self._finalize()
-
- def _load_content_from_subdir(self, subdir_path, source_name):
- '''
- Helper function used to load definitions from index.json of a
- subdirectory of the content direcotory.
- '''
- index_path = subdir_path / 'index.json'
- with open(index_path) as index_file:
- index = json.loads(strip_json_comments(index_file.read()))
-
- self._process_index(index, source_name)
-
- @staticmethod
- def register_item(dict, item):
- '''
- Helper function used to add a versioned item definition to content
- data structures.
- '''
- identifier = item['identifier']
- versioned_item = dict.get(identifier)
- if versioned_item is None:
- versioned_item = VersionedContentItem()
- dict[identifier] = versioned_item
-
- versioned_item.register_item(item)
-
- @staticmethod
- def _process_copyright_and_license(definition):
- '''Helper function used by other _process_*() methods.'''
- for field in ['copyright', 'licenses']:
- if definition[field] == 'auto':
- raise MyNotImplError(f'"{{field}}": "auto"',
- definition['source_name'])
-
- def _get_file_sha256sum(self, path):
- '''
- Compute sha256 of the file at path. Cache results on this Content
- object.
- '''
- path = path.resolve()
- sha256sum = self.file_sha256sums.get(path)
-
- if sha256sum is None:
- with open(path, mode='rb') as hashed_file:
- sha256sum = sha256(hashed_file.read()).digest().hex()
- self.file_sha256sums[path] = sha256sum
-
- return sha256sum
-
- def _add_file_sha256sum(self, source_name, file_object):
- '''
- Expect file_object to be a dict with field "file" holding a file path
- relative to content directory's subdirectory source_name. Compute or
- fetch from cache the sha256 sum of that file and put it in file_object's
- "sha256" field.
- '''
- file_path = self.content_dir_path / source_name / file_object['file']
- file_object['sha256'] = self._get_file_sha256sum(file_path)
-
- def _process_resource_or_mapping(self, definition, index):
- '''
- Sanitizes, autocompletes and registers serveable mapping/resource
- definition.
- '''
- definition['version'] = normalize_version(definition['version'])
-
- if definition['type'] == 'resource':
- self._process_copyright_and_license(definition)
- definition['dependencies'] = definition.get('dependencies', [])
- self.register_item(self.resources, definition)
- source_name = definition['source_name']
- for script in definition['scripts']:
- self._add_file_sha256sum(source_name, script)
- else:
- self.register_item(self.mappings, definition)
-
- def _process_license(self, license, index):
- '''Sanitizes and registers serveable license definition.'''
- identifier = license['identifier']
- if identifier in self.licenses:
- raise ValueError(_('license_clash_{}').format(identifier))
-
- self.licenses[identifier] = license
-
- source_name = license['source_name']
- for legal_text in license['legal_text']:
- self._add_file_sha256sum(source_name, legal_text)
-
- notice = license.get('notice')
- if notice is not None:
- self._add_file_sha256sum(source_name, notice)
-
- def _process_index(self, index, source_name):
- '''
- Sanitizes, autocompletes and registers data from a loaded index.json
- file.
- '''
- schema_ver = normalize_version(index['schema_version'])
- index['schema_version'] = schema_ver
- if schema_ver != SCHEMA_VERSION:
- raise ValueError('index_json_schema_mismatch_{found}_{required}'
- .format(found=version_string(schema_ver),
- required=version_string(SCHEMA_VERSION)))
-
- if source_name in self.indexes:
- raise ValueError(_('source_name_clash_{}').format(source_name))
-
- index['source_name'] = source_name
-
- self._process_copyright_and_license(index)
-
- self.indexes[source_name] = index
-
- for definition in index['definitions']:
- try:
- definition['source_name'] = source_name
- definition['source_copyright'] = index['copyright']
- definition['source_licenses'] = index['licenses']
- processor = self.definition_processors[definition['type']]
- processor(definition, index)
- except Exception as e:
- if current_app._pydrilla_werror:
- raise e from None
- logging.error(_('couldnt_load_definition_from_%s'), subdir_path,
- exc_info=True)
- @staticmethod
- def all_items(versioned_items_dict):
- '''Iterator over all registered versions of all items.'''
- for versioned_item in versioned_items_dict.values():
- for item in versioned_item.by_version.values():
- yield item
-
- def _report_missing(self):
- '''
- Use logger to print information about items that are referenced but
- were not loaded.
- '''
- def report_missing_license(object, object_type, lic):
- if object_type == 'index':
- logging.error(_('no_index_license_%(source)s_%(lic)s'),
- source=object['source_name'], lic=lic)
- return
-
- ver_str = version_string(object['version'])
- kwargs = {object_type: object['identifier'], ver: ver_str, lic: lic}
- if object_type == 'resource':
- fmt = _('no_resource_license_%(resource)s_%(ver)s_%(lic)s')
- else:
- fmt = _('no_mapping_license_%(mapping)s_%(ver)s_%(lic)s')
-
- logging.error(fmt, **kwargs)
-
- for object_type, iterable in [
- ('index', self.indexes.values()),
- ('resource', self.all_items(self.resources))
- ]:
- for object in iterable:
- to_process = [object['licenses']]
- licenses = []
- while to_process:
- term = to_process.pop()
-
- if type(term) is str:
- if term not in ['or', 'and'] and \
- term not in self.licenses:
- report_missing_license(object, object_type, lic)
- continue
-
- to_process.extend(term)
-
- def report_missing_dependency(resource, dep):
- logging.error(_('no_dep_%(resource)s_%(ver)s_%(dep)s'),
- dep=dep, resource=resource['identifier'],
- ver=version_string(resource['version']))
-
- for resource in self.all_items(self.resources):
- for dep in resource['dependencies']:
- if dep not in self.resources:
- report_missing_dependency(resource, dep)
-
- def report_missing_payload(mapping, payload):
- logging.error(_('no_payload_%(mapping)s_%(ver)s_%(payload)s'),
- mapping=mapping['identifier'], payload=payload,
- ver=version_string(mapping['version']))
-
- for mapping in self.all_items(self.mappings):
- for payload in mapping['payloads']:
- payload = payload['payload']
- if payload not in self.resources:
- report_missing_payload(mapping, payload)
-
- def _finalize(self):
- '''
- Initialize structures needed to serve queries. Called once after all
- data gets loaded.
- '''
- for dict in [self.resources, self.mappings]:
- for versioned_item in dict.values():
- versioned_item.known_versions.sort()
-
- for mapping in self.all_items(self.mappings):
- for payload in mapping['payloads']:
- pattern = payload['pattern']
- try:
- MappingItem(pattern, mapping)\
- .register(self.patterns_by_proto)
- except Exception as e:
- if current_app._pydrilla_werror:
- raise e from None
- logging.error(
- _('couldnt_register_%(mapping)s_%(ver)s_%(pattern)s'),
- mapping=mapping['identifier'], pattern=pattern,
- ver=version_string(mapping['version'])
- )
-
- def query(self, url):
- '''
- Return a list of registered mappings that match url.
-
- If multiple versions of a mapping are applicable, only the most recent
- is included in the result.
- '''
- deco = DeconstructedUrl(url)
-
- mappings = {}
-
- domain_tree = self.patterns_by_proto.get(deco.proto) \
- or PatternTreeNode()
-
- def process_item(item):
- if url[-1] != '/' and item.pattern[-1] == '/':
- return
-
- identifier = item.mapping['identifier']
-
- if identifier not in mappings or \
- item.mapping['version'] > mappings[identifier]['version']:
- mappings[identifier] = item.mapping
-
- for path_tree in domain_tree.search(deco.domain):
- for item_list in path_tree.search(deco.path):
- for item in item_list:
- process_item(item)
-
- return list(mappings.values())
-
-def create_app(config_path=(here / 'config.json'), flask_config={}):
- app = Flask(__package__)
- app.config.update(flask_config)
-
- language = flask_config.get('lang', 'en')
- translation = gettext.translation('pydrilla', localedir=(here / 'locales'),
- languages=[language])
-
- app._pydrilla_gettext = translation.gettext
-
- # https://stackoverflow.com/questions/9449101/how-to-stop-flask-from-initialising-twice-in-debug-mode
- if app.debug and os.environ.get('WERKZEUG_RUN_MAIN') != 'true':
- return app
-
- config = load_config(config_path)
- for key in ['static_resource_uri', 'content_dir', 'hydrilla_sources_uri']:
- if key not in config:
- raise ValueError(_('config_key_absent_{}').format(key))
-
- app._pydrilla_static_resource_uri = config['static_resource_uri']
- if app._pydrilla_static_resource_uri[-1] != '/':
- app._pydrilla_static_resource_uri += '/'
- app._pydrilla_hydrilla_sources_uri = config['hydrilla_sources_uri']
- app._pydrilla_werror = config.get('werror', False)
- if 'hydrilla_parent' in config:
- raise MyNotImplError('hydrilla_parent', config_path.name)
-
- content_dir = pathlib.Path(config['content_dir'])
- if not content_dir.is_absolute():
- content_dir = config_path.parent / content_dir
- with app.app_context():
- app._pydrilla_content = Content(content_dir.resolve())
-
- app.register_blueprint(bp)
-
- return app
-
-def _(text_key):
- return current_app._pydrilla_gettext(text_key)
-
-def content():
- return current_app._pydrilla_content
-
-class MyEnvironment(Environment):
- '''
- A wrapper class around jinja2.Environment that causes GNU gettext function
- (as '_' and '__'), url_for function and 'hydrilla_sources_uri' config option
- to be passed to every call of each template's render() method.
- '''
-
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
-
- def get_template(self, *args, **kwargs):
- template = super().get_template(*args, **kwargs)
- old_render = template.render
-
- def new_render(*args, **kwargs):
- _ = current_app._pydrilla_gettext
- sources_uri = current_app._pydrilla_hydrilla_sources_uri
-
- def escaping_gettext(text_key):
- from markupsafe import escape
-
- return str(escape(_(text_key)))
-
- final_kwargs = {
- '_': escaping_gettext,
- '__': escaping_gettext,
- 'url_for': url_for,
- 'hydrilla_sources_uri' : sources_uri
- }
- final_kwargs.update(kwargs)
-
- return old_render(*args, **final_kwargs)
-
- template.render = new_render
-
- return template
-
-j2env = MyEnvironment(loader=PackageLoader(__package__), autoescape=False)
-
-indexpage = j2env.get_template('index.html')
-@bp.route('/')
-def index():
- return indexpage.render()
-
-def get_resource_or_mapping(identifier, get_dict):
- ver = request.args.get('ver')
- versioned_item = get_dict().get(identifier)
-
- if ver == 'all':
- definition = versioned_item.get_all() if versioned_item else []
- else:
- if ver is not None:
- try:
- ver = normalize_version(parse_version(ver))
- except:
- abort(400)
-
- definition = versioned_item and versioned_item.get_by_ver(ver)
- if definition is None:
- abort(404)
-
- return json.dumps(definition)
-
-def get_license_or_source(identifier, get_dict):
- definition = get_dict().get(identifier)
- if definition is None:
- abort(404)
-
- return json.dumps(definition)
-
-for item_type, get_dict, get_item in [
- ('resource', lambda: content().resources, get_resource_or_mapping),
- ('mapping', lambda: content().mappings, get_resource_or_mapping),
- ('license', lambda: content().licenses, get_license_or_source),
- ('source', lambda: content().indexes, get_license_or_source)
-]:
- def _get_item(identifier, get_dict=get_dict, get_item=get_item):
- return get_item(identifier, get_dict)
-
- bp.add_url_rule(f'/{item_type}s/<string:identifier>', item_type, _get_item)
-
-@bp.route('/query')
-def query():
- url = request.args['url']
-
- return json.dumps(content().query(url))
-
-@bp.route('/sources/<string:identifier>/<path:path>')
-def get_file(identifier, path):
- if identifier not in content().indexes:
- abort(404)
-
- new_uri = f'{current_app._pydrilla_static_resource_uri}{identifier}/{path}'
-
- return redirect(new_uri, code=301)
diff --git a/src/test/__init__.py b/src/test/__init__.py
index e69de29..d382ead 100644
--- a/src/test/__init__.py
+++ b/src/test/__init__.py
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: CC0-1.0
+
+# Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org>
+#
+# Available under the terms of Creative Commons Zero v1.0 Universal.
diff --git a/src/test/development_config.json b/src/test/development_config.json
index 30cf10d..c2382f7 100644
--- a/src/test/development_config.json
+++ b/src/test/development_config.json
@@ -2,7 +2,7 @@
// Hydrilla development config file.
//
-// Copyright (C) 2021 Wojtek Kosior
+// Copyright (C) 2021, 2022 Wojtek Kosior
//
// Available under the terms of Creative Commons Zero v1.0 Universal.
@@ -10,21 +10,18 @@
// unlike config.json, it shall not be included in distribution
{
// Relative paths now get resolved from config's containing direcotry.
- "content_dir": "./example_content",
-
- // Except files from content_dir to be served there (used to redirect
- // clients).
- "static_resource_uri": "http://localhost:8000/",
+ "malcontent_dir": "./sample_malcontent",
// Hydrilla will display this link to users as a place where they can
// obtain sources for its software. This config option is meant to ease
// compliance with the AGPL.
- "hydrilla_sources_uri": "https://git.koszko.org/pydrilla/",
+ "hydrilla_project_url": "https://hydrillabugs.koszko.org/projects/hydrilla/wiki",
- // Make Pydrilla error out on any warning
+ // Make Hydrilla error out on any warning
"werror": true
- // With the below we can make Pydrilla look for missing content items in
+ // With the below we can make hydrilla look for missing content items in
// another instance instead of just erroring/warning.
- // ,"hydrilla_parent": "https://api.hachette-hydrilla.org/0.2/"
+ // TODO: feature not implemented
+ // ,"hydrilla_parent": "https://api.hydrilla.koszko.org/1.0/"
}
diff --git a/src/test/example_content/hello/bye.js b/src/test/example_content/hello/bye.js
deleted file mode 100644
index e6fd70c..0000000
--- a/src/test/example_content/hello/bye.js
+++ /dev/null
@@ -1,7 +0,0 @@
-// SPDX-License-Identifier: CC0-1.0
-
-// Copyright (C) 2021 Wojtek Kosior
-//
-// Available under the terms of Creative Commons Zero v1.0 Universal.
-
-console.log(bye_message + "apple!");
diff --git a/src/test/example_content/hello/cc0.txt b/src/test/example_content/hello/cc0.txt
deleted file mode 100644
index 0e259d4..0000000
--- a/src/test/example_content/hello/cc0.txt
+++ /dev/null
@@ -1,121 +0,0 @@
-Creative Commons Legal Code
-
-CC0 1.0 Universal
-
- CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
- LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
- ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
- INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
- REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
- PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
- THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
- HEREUNDER.
-
-Statement of Purpose
-
-The laws of most jurisdictions throughout the world automatically confer
-exclusive Copyright and Related Rights (defined below) upon the creator
-and subsequent owner(s) (each and all, an "owner") of an original work of
-authorship and/or a database (each, a "Work").
-
-Certain owners wish to permanently relinquish those rights to a Work for
-the purpose of contributing to a commons of creative, cultural and
-scientific works ("Commons") that the public can reliably and without fear
-of later claims of infringement build upon, modify, incorporate in other
-works, reuse and redistribute as freely as possible in any form whatsoever
-and for any purposes, including without limitation commercial purposes.
-These owners may contribute to the Commons to promote the ideal of a free
-culture and the further production of creative, cultural and scientific
-works, or to gain reputation or greater distribution for their Work in
-part through the use and efforts of others.
-
-For these and/or other purposes and motivations, and without any
-expectation of additional consideration or compensation, the person
-associating CC0 with a Work (the "Affirmer"), to the extent that he or she
-is an owner of Copyright and Related Rights in the Work, voluntarily
-elects to apply CC0 to the Work and publicly distribute the Work under its
-terms, with knowledge of his or her Copyright and Related Rights in the
-Work and the meaning and intended legal effect of CC0 on those rights.
-
-1. Copyright and Related Rights. A Work made available under CC0 may be
-protected by copyright and related or neighboring rights ("Copyright and
-Related Rights"). Copyright and Related Rights include, but are not
-limited to, the following:
-
- i. the right to reproduce, adapt, distribute, perform, display,
- communicate, and translate a Work;
- ii. moral rights retained by the original author(s) and/or performer(s);
-iii. publicity and privacy rights pertaining to a person's image or
- likeness depicted in a Work;
- iv. rights protecting against unfair competition in regards to a Work,
- subject to the limitations in paragraph 4(a), below;
- v. rights protecting the extraction, dissemination, use and reuse of data
- in a Work;
- vi. database rights (such as those arising under Directive 96/9/EC of the
- European Parliament and of the Council of 11 March 1996 on the legal
- protection of databases, and under any national implementation
- thereof, including any amended or successor version of such
- directive); and
-vii. other similar, equivalent or corresponding rights throughout the
- world based on applicable law or treaty, and any national
- implementations thereof.
-
-2. Waiver. To the greatest extent permitted by, but not in contravention
-of, applicable law, Affirmer hereby overtly, fully, permanently,
-irrevocably and unconditionally waives, abandons, and surrenders all of
-Affirmer's Copyright and Related Rights and associated claims and causes
-of action, whether now known or unknown (including existing as well as
-future claims and causes of action), in the Work (i) in all territories
-worldwide, (ii) for the maximum duration provided by applicable law or
-treaty (including future time extensions), (iii) in any current or future
-medium and for any number of copies, and (iv) for any purpose whatsoever,
-including without limitation commercial, advertising or promotional
-purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
-member of the public at large and to the detriment of Affirmer's heirs and
-successors, fully intending that such Waiver shall not be subject to
-revocation, rescission, cancellation, termination, or any other legal or
-equitable action to disrupt the quiet enjoyment of the Work by the public
-as contemplated by Affirmer's express Statement of Purpose.
-
-3. Public License Fallback. Should any part of the Waiver for any reason
-be judged legally invalid or ineffective under applicable law, then the
-Waiver shall be preserved to the maximum extent permitted taking into
-account Affirmer's express Statement of Purpose. In addition, to the
-extent the Waiver is so judged Affirmer hereby grants to each affected
-person a royalty-free, non transferable, non sublicensable, non exclusive,
-irrevocable and unconditional license to exercise Affirmer's Copyright and
-Related Rights in the Work (i) in all territories worldwide, (ii) for the
-maximum duration provided by applicable law or treaty (including future
-time extensions), (iii) in any current or future medium and for any number
-of copies, and (iv) for any purpose whatsoever, including without
-limitation commercial, advertising or promotional purposes (the
-"License"). The License shall be deemed effective as of the date CC0 was
-applied by Affirmer to the Work. Should any part of the License for any
-reason be judged legally invalid or ineffective under applicable law, such
-partial invalidity or ineffectiveness shall not invalidate the remainder
-of the License, and in such case Affirmer hereby affirms that he or she
-will not (i) exercise any of his or her remaining Copyright and Related
-Rights in the Work or (ii) assert any associated claims and causes of
-action with respect to the Work, in either case contrary to Affirmer's
-express Statement of Purpose.
-
-4. Limitations and Disclaimers.
-
- a. No trademark or patent rights held by Affirmer are waived, abandoned,
- surrendered, licensed or otherwise affected by this document.
- b. Affirmer offers the Work as-is and makes no representations or
- warranties of any kind concerning the Work, express, implied,
- statutory or otherwise, including without limitation warranties of
- title, merchantability, fitness for a particular purpose, non
- infringement, or the absence of latent or other defects, accuracy, or
- the present or absence of errors, whether or not discoverable, all to
- the greatest extent permissible under applicable law.
- c. Affirmer disclaims responsibility for clearing rights of other persons
- that may apply to the Work or any use thereof, including without
- limitation any person's Copyright and Related Rights in the Work.
- Further, Affirmer disclaims responsibility for obtaining any necessary
- consents, permissions or other rights required for any use of the
- Work.
- d. Affirmer understands and acknowledges that Creative Commons is not a
- party to this document and has no duty or obligation with respect to
- this CC0 or use of the Work.
diff --git a/src/test/example_content/hello/hello.js b/src/test/example_content/hello/hello.js
deleted file mode 100644
index d87ea7f..0000000
--- a/src/test/example_content/hello/hello.js
+++ /dev/null
@@ -1,7 +0,0 @@
-// SPDX-License-Identifier: CC0-1.0
-
-// Copyright (C) 2021 Wojtek Kosior
-//
-// Available under the terms of Creative Commons Zero v1.0 Universal.
-
-console.log(hello_message + "apple!");
diff --git a/src/test/example_content/hello/index.json b/src/test/example_content/hello/index.json
deleted file mode 100644
index 16843cb..0000000
--- a/src/test/example_content/hello/index.json
+++ /dev/null
@@ -1,302 +0,0 @@
-// SPDX-License-Identifier: CC0-1.0
-
-// Copyright (C) 2021 Wojtek Kosior
-// Available under the terms of Creative Commons Zero v1.0 Universal.
-
-// This is an example index.json file describing Hydrilla site content. As you
-// can see, for storing site content information Hydrilla utilizes JSON with an
-// additional extension in the form of '//' comments support. Hydrilla shall
-// look into each direct subdirectory of the content directory passed to it
-// (via a cofig file option). If such subsirectory contains an index.json file,
-// Hydrilla shall process it.
-
-// An index.json file conveys definitions of site resources, pattern->payload
-// mappings and licenses thereof. The definitions may reference files under
-// index.json's containing directory, using relative paths. This is how scripts,
-// license texts, etc. are included. Unix paths (using '/' as separator) are
-// assumed. It is not allowed for an index.json file to reference files outside
-// its directory.
-
-// Certain objects are allowed to contain a "comment" field. Although '//'
-// comments can be used in index.json files, they will be stripped when the file
-// is processed. If a comment should be included in the JSON definitions served
-// by Hydrilla API, it should be put in a "comment" field of the proper object.
-
-// Various kinds of objects contain version information. Version is always an
-// array of integers, with major version number being the first array item. When
-// applicable, a version is accompanied by a revision field which contains a
-// positive integer. If versions specified by arrays of different length need to
-// be compared, the shorter array gets padded with zeroes on the right. This
-// means that for example version 1.3 could be given as both [1, 3] and
-// [1, 3, 0, 0] (aka 1.3.0.0) and either would mean the same.
-
-{
- // Once our json schema changes, this version will change. Our software will
- // be able to handle both current and older formats thanks to this
- // information present in every index.json file. Different schema versions
- // are always incompatible (e.g. a Hydrilla instance that understands schema
- // version 0.2.0.0 will not understand version 0.2.0.1). Schemas that are
- // backwards-compatible will be denoted by a different revision.
- // We will try to make schema version match the version of Hydrilla software
- // that introduced it.
- "schema_version": [0, 2],
- "schema_revision": 1,
-
- // Copyright of this json file. It's a list of copyright holder information
- // objects. Alternatively, "auto" can be used to make Hydrilla attempt to
- // extract copyright info from the comment at the beginning of the file.
- "copyright": [
- // There can be multiple entries, one for each co-holder of the
- // copyright.
- {
- // There can also be multiple years, like ["2021","2023-2024"].
- "years": ["2021"],
- // Name of the copyright holder. Depending on the situation it can
- // be just the first name, name+surname, a company name, a
- // pseudonym, etc.
- "holder": "Wojtek Kosior"
- }
- ],
-
- // License of this json file. Identifier has to be known to Hydrilla. Can
- // be defined either in the same or another index.json file as a "license"
- // item. It is possible to specify license combinations, like:
- // [["Expat", "and", "Apache-2.0"], "or", "GPL-3.0-only"]
- // Alternatively, "auto" can be used to make Hydrilla attempt to extract
- // copyright info from this file's SPDX license identifier.
- "licenses": "CC0-1.0",
-
- // Where this software/work initially comes from. In some cases (i.e. when
- // the developer of content is also the one who packages it for Hydrilla)
- // this might be the same as "package_url".
- "upstream_url": "https://git.koszko.org/pydrilla/tree/src/test/example_content/hello",
-
- // Where sources for the packaging of this content can be found.
- "package_url": "https://git.koszko.org/pydrilla/tree/src/test/example_content/hello",
-
- // Additional "comment" field can be used if needed.
- // "comment": ""
-
- // List of actual site resources, pattern->payload mappings and licenses.
- // Each of them is represented by an object. Meta-sites and replacement site
- // interfaces will also belong here once they get implemented.
- "definitions": [
- {
- // Value of "type" can currently be one of: "resource", "license"
- // and "mapping". The one we have here, "resource", defines a list
- // of injectable scripts that can be used as a payload or as a
- // dependency of another "resource". In the future CSS style sheets
- // and WASM modules will also be composite parts of a "resource" as
- // scripts are now.
- "type": "resource",
-
- // Used when referring to this resource in "dependencies" list of
- // another resource or in "payload" field of a mapping. Should
- // be consize and can only use a restricted set of characters. It
- // has to match: [-0-9a-zA-Z]
- "identifier": "helloapple",
-
- // "long_name" should be used to specify a user-friendly alternative
- // to an identifier. It should generally not collide with a long
- // name of some resource with a different uuid and also shouldn't
- // change in-between versions of the same resource, although
- // exceptions to both rules might be considered. Long name is
- // allowed to contain arbitrary unicode characters (within reason!).
- "long_name": "Hello Apple",
-
- // Different versions (e.g. 1.0 and 1.3) of the same resource can be
- // defined in separate index.json files. This makes it easy to
- // accidently cause an identifier clash. To help detect it, we
- // require that each resource has a uuid associated with it. Attempt
- // to define multiple resources with the same identifier and
- // different uuids will result in an error being reported. Defining
- // multiple resources with different identifiers and the same uuid
- // is disallowed for now (it may be later permitted if we consider
- // it good for some use-case).
- "uuid": "a6754dcb-58d8-4b7a-a245-24fd7ad4cd68",
-
- // Version should match the upstream version of the resource (e.g. a
- // version of javascript library). Revision number starts as 1 for
- // each new resource version and gets incremented by 1 each time a
- // modification to the packaging of this version is done. Hydrilla
- // will allow multiple definitions of the same resource to load, as
- // long as their versions differ. Thanks to the "version" and
- // "revision" fields, clients will know they have to update certain
- // resource after it has been updated. If multiple definitions of
- // the same version of given resource are provided, an error is
- // generated (even if those definitions differ by revision number).
- "version": [2021, 11, 10],
- "revision": 1,
-
- // A short, meaningful description of what the resource is and/or
- // what it does.
- "description": "greets an apple",
-
- // If needed, a "comment" field can be added to provide some
- // additional information.
- // "comment": "this resource something something",
-
- // One should specify the copyright and licensing terms of the
- // entire package. The format is the same as when specifying these
- // for the index.json file, except "auto" cannot be used.
- "copyright": [{"years": ["2021"], "holder": "Wojtek Kosior"}],
- "licenses": "CC0-1.0",
-
- // Resource's "dependencies" array shall contain names of other
- // resources that (in case of scripts at least) should get evaluated
- // on a page before this resource's own scripts.
- "dependencies": ["hello-message"],
-
- // Array of javascript files that belong to this resource.
- "scripts": [
- {
- // Script name. It should also be a valid file path relative
- // to index.json's containing directory.
- "file": "hello.js",
- // Copyright and license info of a script file can be
- // specified using the same format as in the case of the
- // index.json file itself. If "copyright" or "license" is
- // not provided, Hydrilla assumes it to be the same as the
- // value specified for the resource itself.
- "copyright": "auto",
- "licenses": "auto"
- }, {
- "file": "bye.js"
- }
- ]
- }, {
- "type": "resource",
- "identifier": "hello-message",
- "long_name": "Hello Message",
- "uuid": "1ec36229-298c-4b35-8105-c4f2e1b9811e",
- "version": [2021, 11, 10],
- "revision": 2,
- "description": "define messages for saying hello and bye",
- "copyright": [{"years": ["2021"], "holder": "Wojtek Kosior"}],
- "licenses": "CC0-1.0",
- // If "dependencies" is empty, it can also be omitted.
- // "dependencies": [],
- "scripts": [{"file": "message.js"}]
- }, {
- "type": "mapping",
-
- // Has similar function to resource's identifier. Should be consize
- // and can only use a restricted set of characters. It has to match:
- // [-0-9a-zA-Z]
- // It can be the same as some resource identifier (those are
- // different entities and are treated separately).
- "identifier": "helloapple",
-
- // "long name" and "uuid" have the same meaning as in the case of
- // resources. Uuids of a resource and a mapping can technically be
- // the same, but it is recommended to avoid even this kind of
- // repetition.
- "long_name": "Hello Apple",
- "uuid": "54d23bba-472e-42f5-9194-eaa24c0e3ee7",
-
- // "version" differs from its counterpart in resource in that it has
- // no accompanying revision number.
- "version": [2021, 11, 10],
-
- // A short, meaningful description of what the mapping does.
- "description": "causes apple to get greeted on Hydrillabugs issue tracker",
-
- // A comment, if necessary.
- // "comment": "blah blah because bleh"
-
- // The "payloads" array specifies, which payloads are to be
- // applied to which URLs.
- "payloads": [
- {
- // Should be a valid Haketilo URL pattern.
- "pattern": "https://hydrillabugs.koszko.org/***",
- // Should be the name of an existing resource. The resource
- // may, but doesn't have to, be defined in the same
- // index.json file.
- "payload": "helloapple"
- },
- // More associations may follow.
- {
- "pattern": "https://hachettebugs.koszko.org/***",
- "payload": "helloapple"
- }
- ]
- }, {
- "type": "license",
-
- // Will be used to refer to this license in other places. Should
- // match the SPDX identifier if possible (despite that, please use
- // "Expat" instead of "MIT" where possible). Unlike other definition
- // types, "license" does not allow uuids to be used to avoid license
- // id clashes. Any attempt to define multiple licenses with the same
- // id will result in an error being reported.
- "identifier": "CC0-1.0",
-
- // This long name must also be unique among all license definitions.
- "long_name": "Creative Commons Zero v1.0 Universal",
-
- // We don't use "version" in license definitions. We do, however,
- // use "revision" to indicate changes to the packaging of a license.
- // Revision should be increased by 1 at each such change.
- "revision": 2,
-
- "legal_text": [
- // Legal text can be available in multiple forms. Usually just
- // plain .txt file is enough, though.
- {
- // "format" should match an agreed-upon MIME type if
- // possible.
- "format": "text/plain",
- // Value of "file" should be a path relative to the
- // directory of index.json file.
- "file": "cc0.txt"
- }
- // If a markdown version of CC0 was provided, we could add this:
- // {
- // "format": "text/markdown",
- // "file": "cc0.md"
- // }
- ]
-
- // If needed, a "comment" field can be added to clarify something.
- // For example, when definind "Expat" license we could add:
- //
- // "comment": "Expat license is the most common form of the license often called \"MIT\". Many other forms of \"MIT\" license exist. Here the name \"Expat\" is used to avoid ambiguity."
-
- // If applicable, a "notice" can be included. It shall then be an
- // object with "file" field containing a path (relative to
- // index.json's directory) to a plain text file with that notice.
- //
- // "notice": {
- // "file": "license-notice.txt"
- // }
- //
- // This is needed for example in case of GNU licenses (both with and
- // without exceptions). For instance,
- // "GPL-3.0-or-later-with-html-exception" could have the following
- // in its notice file:
- //
- // This program is free software: you can redistribute it and/or
- // modify it under the terms of the GNU General Public License as
- // published by the Free Software Foundation, either version 3 of
- // the License, or (at your option) any later version.
- //
- // This program is distributed in the hope that it will be useful,
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- // GNU General Public License for more details.
- //
- // As a special exception to the GPL, any HTML file which merely
- // makes function calls to this code, and for that purpose
- // includes it by reference shall be deemed a separate work for
- // copyright law purposes. If you modify this code, you may extend
- // this exception to your version of the code, but you are not
- // obligated to do so. If you do not wish to do so, delete this
- // exception statement from your version.
- //
- // You should have received a copy of the GNU General Public License
- // along with this program. If not, see
- // <https://www.gnu.org/licenses/>.
- }
- ]
-}
diff --git a/src/test/example_content/hello/message.js b/src/test/example_content/hello/message.js
deleted file mode 100644
index da5966d..0000000
--- a/src/test/example_content/hello/message.js
+++ /dev/null
@@ -1,8 +0,0 @@
-// SPDX-License-Identifier: CC0-1.0
-
-// Copyright (C) 2021 Wojtek Kosior
-//
-// Available under the terms of Creative Commons Zero v1.0 Universal.
-
-var hello_message = "hello, "
-var bye_message = "bye, "
diff --git a/src/test/source-package-example b/src/test/source-package-example
new file mode 160000
+Subproject e571b3911f198e3feccc8d06390c79131f9cf09
diff --git a/src/test/test_pydrilla.py b/src/test/test_pydrilla.py
deleted file mode 100644
index 50757a7..0000000
--- a/src/test/test_pydrilla.py
+++ /dev/null
@@ -1,153 +0,0 @@
-# SPDX-License-Identifier: AGPL-3.0-or-later
-
-# Repository tests
-#
-# This file is part of Hydrilla
-#
-# Copyright (C) 2021 Wojtek Kosior
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see <https://www.gnu.org/licenses/>.
-#
-#
-# I, Wojtek Kosior, thereby promise not to sue for violation of this
-# file's license. Although I request that you do not make use this code
-# in a proprietary program, I am not going to enforce this in court.
-
-import pytest
-import sys
-import shutil
-from pathlib import Path
-from hashlib import sha256
-from os import mkdir, unlink, environ
-import json
-from markupsafe import escape
-
-from pydrilla import pydrilla, create_app
-
-test_dir = Path(__file__).resolve().parent
-packages_dir = test_dir.parent
-development_config_path = test_dir / 'development_config.json'
-example_content_dir = test_dir / 'example_content'
-
-@pytest.fixture
-def client():
- app = create_app(development_config_path, flask_config={'TESTING': True})
-
- with app.test_client() as client:
- yield client
-
-@pytest.fixture
-def development_config():
- with open(development_config_path) as config_file:
- yield json.loads(pydrilla.strip_json_comments(config_file.read()))
-
-def test_api_basic(client, development_config):
- def verify_sha256sum(source_name, file_object):
- with open(example_content_dir / source_name / file_object['file'],
- mode='rb') as file:
- assert sha256(file.read()).digest().hex() == file_object['sha256']
-
- response = client.get('/')
- assert b'html' in response.data
- sources_uri = development_config['hydrilla_sources_uri']
- assert escape(sources_uri).encode() in response.data
-
- for item_type in ['mapping', 'resource']:
- response = client.get(f'/{item_type}s/helloapple')
- assert response.status_code == 200
- definition = json.loads(response.data.decode())
- assert definition['type'] == item_type
- assert definition['source_name'] == 'hello'
- assert definition['version'] == [2021, 11, 10]
- if item_type == 'resource':
- assert type(definition['scripts']) is list
- assert len(definition['scripts']) > 0
- for script_file in definition['scripts']:
- verify_sha256sum(definition['source_name'], script_file)
-
- response = client.get(f'/{item_type}s/helloapple?ver=2021.11.10.0')
- assert response.status_code == 200
- assert definition == json.loads(response.data.decode())
-
- response = client.get(f'/{item_type}s/helloapple?ver=2021.11.10.999')
- assert response.status_code == 404
-
- response = client.get(f'/{item_type}s/helloapple?ver=random_bad_input')
- assert response.status_code == 400
-
- response = client.get(f'/{item_type}s/random-bad-identifier')
- assert response.status_code == 404
-
- response = client.get(f'/{item_type}s/helloapple?ver=all')
- assert response.status_code == 200
- definitions = json.loads(response.data.decode())
- assert type(definitions) is list
- assert all([d['type'] == item_type for d in definitions])
- assert any([d['version'] == [2021, 11, 10] for d in definitions])
-
- response = client.get('/licenses/CC0-1.0')
- assert response.status_code == 200
- definition = json.loads(response.data.decode())
- assert definition['type'] == 'license'
- assert definition['long_name'] == 'Creative Commons Zero v1.0 Universal'
- assert definition['source_name'] == 'hello'
-
- assert type(definition['legal_text']) is list
- assert len(definition['legal_text']) > 0
- for license_file in definition['legal_text']:
- verify_sha256sum(definition['source_name'], license_file)
-
- response = client.get('/licenses/random-bad-identifier')
- assert response.status_code == 404
-
- response = client.get('/sources/hello')
- assert response.status_code == 200
- definition = json.loads(response.data.decode())
-
- assert definition['source_name'] == 'hello'
- assert type(definition['schema_version']) is list
-
- response = client.get('/sources/random-bad-identifier')
- assert response.status_code == 404
-
- response = client.get('/query?url=https://hachettebugs.koszko.org')
- assert response.status_code == 200
- definitions = json.loads(response.data.decode())
- assert type(definitions) is list
- assert all([d['type'] == 'mapping' for d in definitions])
- assert any([p['pattern'] == 'https://hachettebugs.koszko.org/***'
- for d in definitions for p in d['payloads']])
-
- response = client.get('/query?url=https://random_bad_domain.org/something')
- assert response.status_code == 200
- definitions = json.loads(response.data.decode())
- assert definitions == []
-
- resource_uri = development_config['static_resource_uri']
- response = client.get('/sources/hello/hello.js')
- assert response.status_code == 301
- assert response.location == resource_uri + 'hello/hello.js'
- response = client.get('/sources/random-bad-identifier/hello.js')
- assert response.status_code == 404
- response = client.get('/sources/hello/random/bad/path')
- assert response.status_code == 301
- assert response.location == resource_uri + 'hello/random/bad/path'
-
-def test_normalize_version():
- assert pydrilla.normalize_version([4, 5, 3, 0, 0]) == [4, 5, 3]
- assert pydrilla.normalize_version([1, 0, 5, 0]) == [1, 0, 5]
- assert pydrilla.normalize_version([3, 3]) == [3, 3]
-
-def test_strip_json_comments(development_config):
- assert development_config['static_resource_uri'] == 'http://localhost:8000/'
diff --git a/src/test/test_server.py b/src/test/test_server.py
new file mode 100644
index 0000000..def48dc
--- /dev/null
+++ b/src/test/test_server.py
@@ -0,0 +1,199 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+# Repository tests
+#
+# This file is part of Hydrilla
+#
+# Copyright (C) 2021, 2022 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+import pytest
+import sys
+import shutil
+import json
+
+from pathlib import Path
+from hashlib import sha256
+from tempfile import TemporaryDirectory
+from typing import Iterable, Callable
+
+from flask.testing import FlaskClient
+from markupsafe import escape
+
+from hydrilla import util as hydrilla_util
+from hydrilla.builder import Build
+from hydrilla.server import create_app
+
+here = Path(__file__).resolve().parent
+config_path = here / 'development_config.json'
+source_path = here / 'source-package-example'
+
+@pytest.fixture(scope="session")
+def default_setup() -> Iterable[dict[str, Path]]:
+ with TemporaryDirectory() as tmpdir:
+ setup = {
+ 'malcontent_dir': Path(tmpdir) / 'sample_malcontent',
+ 'config_path': Path(tmpdir) / 'config.json',
+ 'containing_dir': Path(tmpdir)
+ }
+
+ setup['config_path'].symlink_to(config_path)
+
+ build = Build(source_path, Path('index.json'))
+ build.write_package_files(setup['malcontent_dir'])
+
+ yield setup
+
+@pytest.fixture(scope="session")
+def client(default_setup: dict[str, Path]) -> Iterable[FlaskClient]:
+ """Provide app client that serves the object from built sample package."""
+ app = create_app(default_setup['config_path'],
+ flask_config={'TESTING': True})
+
+ with app.test_client() as client:
+ yield client
+
+@pytest.fixture(scope="session")
+def development_config(default_setup) -> Iterable[dict]:
+ """Provide the contents of JSON config file fed to the client."""
+ contents = default_setup['config_path'].read_text()
+ yield json.loads(hydrilla_util.strip_json_comments(contents))
+
+def test_project_url(client: FlaskClient, development_config: dict) -> None:
+ """Fetch index.html and verify project URL fro config is present there."""
+ response = client.get('/')
+ assert b'html' in response.data
+ project_url = development_config['hydrilla_project_url']
+ assert escape(project_url).encode() in response.data
+
+@pytest.mark.parametrize('item_type', ['resource', 'mapping'])
+def test_get_newest(client: FlaskClient, item_type: str) -> None:
+ """
+ Verify that
+ GET '/{item_type}/{item_identifier}.json'
+ returns proper definition that is also served at:
+ GET '/{item_type}/{item_identifier}/{item_version}'
+ """
+ response = client.get(f'/{item_type}/helloapple.json')
+ assert response.status_code == 200
+ definition = json.loads(response.data.decode())
+ assert definition['type'] == item_type
+ assert definition['identifier'] == 'helloapple'
+
+ response = client.get(f'/{item_type}/helloapple/2021.11.10')
+ assert response.status_code == 200
+ assert definition == json.loads(response.data.decode())
+
+ hydrilla_util.validator_for(f'api_{item_type}_description-1.schema.json')\
+ .validate(definition)
+
+@pytest.mark.parametrize('item_type', ['resource', 'mapping'])
+def test_get_nonexistent(client: FlaskClient, item_type: str) -> None:
+ """
+ Verify that attempts to GET a JSON definition of a nonexistent item or item
+ version result in 404.
+ """
+ response = client.get(f'/{item_type}/nonexistentapple.json')
+ assert response.status_code == 404
+ response = client.get(f'/{item_type}/helloapple/1.2.3.999')
+ assert response.status_code == 404
+
+@pytest.mark.parametrize('item_type', ['resource', 'mapping'])
+def test_file_refs(client: FlaskClient, item_type: str) -> None:
+ """
+ Verify that files referenced by definitions are accessible under their
+ proper URLs and that their hashes match.
+ """
+ response = client.get(f'/{item_type}/helloapple/2021.11.10')
+ assert response.status_code == 200
+ definition = json.loads(response.data.decode())
+
+ for file_ref in [*definition.get('scripts', []),
+ *definition['source_copyright']]:
+ hash_sum = file_ref["sha256"]
+ response = client.get(f'/file/sha256-{hash_sum}')
+
+ assert response.status_code == 200
+ assert sha256(response.data).digest().hex() == hash_sum
+
+def test_empty_query(client: FlaskClient) -> None:
+ """
+ Verify that querying mappings for URL gives an empty list when there're no
+ mathes.
+ """
+ response = client.get(f'/query?url=https://nonexiste.nt/example')
+ assert response.status_code == 200
+
+ response_object = json.loads(response.data.decode())
+
+ assert response_object['mappings'] == []
+
+ hydrilla_util.validator_for('api_query_result-1.schema.json')\
+ .validate(response_object)
+
+def test_query(client: FlaskClient) -> None:
+ """
+ Verify that querying mappings for URL gives a list with reference(s) the the
+ matching mapping(s).
+ """
+ response = client.get(f'/query?url=https://hydrillabugs.koszko.org/')
+ assert response.status_code == 200
+
+ response_object = json.loads(response.data.decode())
+
+ assert response_object['mappings'] == [{
+ 'identifier': 'helloapple',
+ 'long_name': 'Hello Apple',
+ 'version': [2021, 11, 10]
+ }]
+
+ hydrilla_util.validator_for('api_query_result-1.schema.json')\
+ .validate(response_object)
+
+def test_source(client: FlaskClient) -> None:
+ """Verify source descriptions are properly served."""
+ response = client.get(f'/source/hello.json')
+ assert response.status_code == 200
+
+ description = json.loads(response.data.decode())
+ assert description['source_name'] == 'hello'
+
+ assert sorted([d['identifier'] for d in description['definitions']]) == \
+ ['hello-message', 'helloapple', 'helloapple']
+
+ zipfile_hash = description['source_archives']['zip']['sha256']
+ response = client.get(f'/source/hello.zip')
+ assert sha256(response.data).digest().hex() == zipfile_hash
+
+ hydrilla_util.validator_for('api_source_description-1.schema.json')\
+ .validate(description)
+
+def test_missing_source(client: FlaskClient) -> None:
+ """Verify requests for nonexistent sources result in 404."""
+ response = client.get(f'/source/nonexistent.json')
+ assert response.status_code == 404
+
+ response = client.get(f'/source/nonexistent.zip')
+ assert response.status_code == 404
+
+def test_normalize_version():
+ assert hydrilla_util.normalize_version([4, 5, 3, 0, 0]) == [4, 5, 3]
+ assert hydrilla_util.normalize_version([1, 0, 5, 0]) == [1, 0, 5]
+ assert hydrilla_util.normalize_version([3, 3]) == [3, 3]