diff options
author | Wojtek Kosior <koszko@koszko.org> | 2021-11-13 20:33:57 +0100 |
---|---|---|
committer | Wojtek Kosior <koszko@koszko.org> | 2021-11-13 20:33:57 +0100 |
commit | a14ab0a7601ff5c197fe43d42410d8ed6bfd26a8 (patch) | |
tree | befa6fc0b1de552bae1e2a832a25cb0dd8f58412 /src | |
download | haketilo-hydrilla-a14ab0a7601ff5c197fe43d42410d8ed6bfd26a8.tar.gz haketilo-hydrilla-a14ab0a7601ff5c197fe43d42410d8ed6bfd26a8.zip |
initial commit
Diffstat (limited to 'src')
-rw-r--r-- | src/conftest.py | 0 | ||||
-rw-r--r-- | src/pydrilla/__init__.py | 1 | ||||
-rw-r--r-- | src/pydrilla/config.json | 13 | ||||
-rw-r--r-- | src/pydrilla/development_config.json | 24 | ||||
-rw-r--r-- | src/pydrilla/locales/en/LC_MESSAGES/pydrilla.po | 127 | ||||
-rw-r--r-- | src/pydrilla/pydrilla.py | 700 | ||||
-rw-r--r-- | src/pydrilla/templates/base.html | 94 | ||||
-rw-r--r-- | src/pydrilla/templates/index.html | 32 | ||||
-rw-r--r-- | src/pydrilla_dev_helper.py | 293 | ||||
-rw-r--r-- | src/test/__init__.py | 0 | ||||
-rw-r--r-- | src/test/test_pydrilla.py | 90 |
11 files changed, 1374 insertions, 0 deletions
diff --git a/src/conftest.py b/src/conftest.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/conftest.py diff --git a/src/pydrilla/__init__.py b/src/pydrilla/__init__.py new file mode 100644 index 0000000..8d1565b --- /dev/null +++ b/src/pydrilla/__init__.py @@ -0,0 +1 @@ +from .pydrilla import create_app diff --git a/src/pydrilla/config.json b/src/pydrilla/config.json new file mode 100644 index 0000000..a6c4bf0 --- /dev/null +++ b/src/pydrilla/config.json @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: CC0-1.0 + +// Example Hydrilla config file. +// +// Copyright (C) 2021 Wojtek Kosior +// +// Available under the terms of Creative Commons Zero v1.0 Universal. + +{ + "content_dir": "/var/lib/hydrilla/content", + "static_resource_uri": "http://localhost:8000/", + "try_configs": ["/etc/pydrilla/config.json"] +} diff --git a/src/pydrilla/development_config.json b/src/pydrilla/development_config.json new file mode 100644 index 0000000..1660edb --- /dev/null +++ b/src/pydrilla/development_config.json @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: CC0-1.0 + +// Hydrilla development config file. +// +// Copyright (C) 2021 Wojtek Kosior +// +// Available under the terms of Creative Commons Zero v1.0 Universal. + +// this config is meant to be used in development environment; +// unlike config.json, it shall not be included in distribution +{ + "content_dir": "./example_content", + + // Except files from content_dir to be served there (used to redirect + // clients). + "static_resource_uri": "http://localhost:8000/", + + // Make Pydrilla error out on any warning + "werror": true + + // With the below we can make Pydrilla look for missing content items in + // another instance instead of just erroring/warning. + // ,"hydrilla_parent": "https://api.hachette-hydrilla.org/0.2/" +} diff --git a/src/pydrilla/locales/en/LC_MESSAGES/pydrilla.po b/src/pydrilla/locales/en/LC_MESSAGES/pydrilla.po new file mode 100644 index 0000000..f9e6a82 --- /dev/null +++ b/src/pydrilla/locales/en/LC_MESSAGES/pydrilla.po @@ -0,0 +1,127 @@ +# SPDX-License-Identifier: CC0-1.0 + +# English localization +# +# This file is part of Hydrilla +# +# Copyright (C) 2021 Wojtek Kosior +# +# This file is free cultural work: you can redistribute it with or +# without modification under the terms of the CC0 1.0 Universal License +# as published by the Creative Commons Corporation. +# +# This file is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# CC0 1.0 Universal License for more details. + +msgid "" +msgstr "" +"Project-Id-Version: Hydrilla 0.2\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2021-11-13 19:03+0100\n" +"PO-Revision-Date: 2021-11-06 08:42+0100\n" +"Last-Translator: Wojtek Kosior <koszko@koszko.org>\n" +"Language-Team: English\n" +"Language: en\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +#: pydrilla.py:97 +msgid "path_is_absolute_{}" +msgstr "Provided path '{}' is absolute." + +#: pydrilla.py:104 +#, python-brace-format +msgid "not_implemented_{what}_{where}" +msgstr "" +"Attempt to use '{what}' in '{where}' but this feature is not yet implemented." + +#: pydrilla.py:194 +#, python-brace-format +msgid "uuid_mismatch_{identifier}" +msgstr "Two different uuids were specified for item '{identifier}'." + +#: pydrilla.py:201 +#, python-brace-format +msgid "version_clash_{identifier}_{version}" +msgstr "Version '{version}' specified more than once for item '{identifier}'." + +#: pydrilla.py:297 pydrilla.py:309 +msgid "invalid_URL_{}" +msgstr "Invalid URL/pattern: '{}'." + +#: pydrilla.py:301 +msgid "disallowed_protocol_{}" +msgstr "Disallowed protocol: '{}'." + +#: pydrilla.py:391 +msgid "license_clash_{}" +msgstr "License '{}' defined more than once." + +#: pydrilla.py:408 +msgid "source_name_clash_{}" +msgstr "Source name '{}' used more than once." + +#: pydrilla.py:426 +#, python-format +msgid "couldnt_load_definition_from_%s" +msgstr "Couldn't load definition from '%s'." + +#: pydrilla.py:442 +#, python-format +msgid "no_index_license_%(source)s_%(lic)s" +msgstr "Unknown license '%(lic)s' used by index.json of '%(source)s'." + +#: pydrilla.py:449 +#, python-format +msgid "no_resource_license_%(resource)s_%(ver)s_%(lic)s" +msgstr "" +"Unknown license '%(lic)s' used by resource '%(resource)s', version '%(ver)s'." + +#: pydrilla.py:451 +#, python-format +msgid "no_mapping_license_%(mapping)s_%(ver)s_%(lic)s" +msgstr "" +"Unknown license '%(lic)s' used by mapping '%(mapping)s', version '%(ver)s'." + +#: pydrilla.py:474 +#, python-format +msgid "no_dep_%(resource)s_%(ver)s_%(dep)s" +msgstr "" +"Unknown dependency '%(dep)s' of resource '%(resource)s', version '%(ver)s'." + +#: pydrilla.py:484 +#, python-format +msgid "no_payload_%(mapping)s_%(ver)s_%(payload)s" +msgstr "" +"Unknown payload '%(payload)s' of mapping '%(mapping)s', version '%(ver)s'." + +#: pydrilla.py:512 +#, python-format +msgid "couldnt_register_%(mapping)s_%(ver)s_%(pattern)s" +msgstr "" +"Couldn't register mapping '%(mapping)s', version '%(ver)s' (pattern " +"'%(pattern)s')." + +#: pydrilla.py:566 +msgid "content_dir_path_not_dir" +msgstr "Provided \"content_dir\" path does not name a direcotry." + +#: pydrilla.py:578 +#, python-format +msgid "couldnt_load_content_from_%s" +msgstr "Couldn't load content from '%s'." + +#: pydrilla.py:603 +msgid "config_key_absent_{}" +msgstr "Config key \"{}\" not provided." + +#: templates/index.html:4 +msgid "hydrilla_welcome" +msgstr "Welcome to Hydrilla!" + +#: templates/base.html:55 templates/base.html:61 +msgid "hydrilla" +msgstr "Hydrilla" diff --git a/src/pydrilla/pydrilla.py b/src/pydrilla/pydrilla.py new file mode 100644 index 0000000..caf05a2 --- /dev/null +++ b/src/pydrilla/pydrilla.py @@ -0,0 +1,700 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +# Main repository logic. +# +# This file is part of Hydrilla +# +# Copyright (C) 2021 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +from flask import Flask, Blueprint, current_app, url_for, abort, request +from jinja2 import Environment, PackageLoader +import re +#from hashlib import sha256 +import os +import pathlib +import json +import gettext +import logging + +SCHEMA_VERSION = [0, 2] + +strip_comment_re = re.compile(r''' +^ # match from the beginning of each line +( # catch the part before '//' comment + (?: # this group matches either a string or a single out-of-string character + [^"/] | + " + (?: # this group matches any in-a-string character + [^"\\] | # match any normal character + \\[^u] | # match any escaped character like '\f' or '\n' + \\u[a-fA-F0-9]{4} # match an escape + )* + " + )* +) +# expect either end-of-line or a comment: +# * unterminated strings will cause matching to fail +# * bad comment (with '/' instead of '//') will be indicated by second group +# having length 1 instead of 2 or 0 +(//?|$) +''', re.VERBOSE) + +def strip_json_comments(text): + processed = 0 + stripped_text = [] + for line in text.split('\n'): + match = strip_comment_re.match(line) + + if match is None: # unterminated string + # ignore this error, let json module report it + stripped = line + elif len(match[2]) == 1: + raise json.JSONDecodeError('bad comment', text, + processed + len(match[1])) + else: + stripped = match[1] + + stripped_text.append(stripped) + processed += len(line) + 1 + + return '\n'.join(stripped_text) + +here = pathlib.Path(__file__).resolve().parent + +bp = Blueprint('bp', __package__) + +def load_config(config_path): + config = {} + to_load = [config_path] + failures_ok = [False] + + while to_load: + path = to_load.pop() + can_fail = failures_ok.pop() + + try: + with open(config_path) as config_file: + new_config = json.loads(strip_json_comments(config_file.read())) + except Exception as e: + if can_fail: + continue + raise e from None + + config.update(new_config) + + for key, failure_ok in [('try_configs', True), ('use_configs', False)]: + paths = new_config.get(key, []) + paths.reverse() + to_load.extend(paths) + failures_ok.extend([failure_ok] * len(paths)) + + for key in ['try_configs', 'use_configs']: + if key in config: + config.pop(key) + + return config + +def get_content_file_path(path): + if os.path.sep != '/': + path.replace('/', os.path.sep) + + path = pathlib.Path(path) + if path.is_absolute(): + raise ValueError(_('path_is_absolute_{}').format(path)) + + return path + +class MyNotImplError(NotImplementedError): + '''Raised when a planned but not-yet-completed feature is used.''' + def __init__(self, what, where): + super().__init__(_('not_implemented_{what}_{where}') + .format(what=what, where=where)) + +def normalize_version(ver): + ''' + ver is an array of integers. Strip right-most zeroes from ver. + + Returns a *new* array. Doesn't modify its argument. + ''' + new_len = 0 + for i, num in enumerate(ver): + if num != 0: + new_len = i + 1 + + return ver[:new_len] + +def parse_version(ver_str): + ''' + Convert ver_str into an array representation, e.g. for ver_str="4.6.13.0" + return [4, 6, 13, 0]. + ''' + return [int(num) for num in ver_str.split('.')] + +def version_string(ver, rev=None): + ''' + ver is an array of integers. rev is an optional integer. Produce string + representation of version (optionally with revision number), like: + 1.2.3-5 + No version normalization is performed. + ''' + return '.'.join([str(n) for n in ver]) + ('' if rev is None else f'-{rev}') + +### pad_versions() and compare_versions() likely won't be needed + +# def pad_versions(ver1, ver2): +# ''' +# Each of the arguments is an array of integers. If one of the arrays is +# shorter than the other, right-pad it with zeroes to make it the same +# length as the other one. + +# Returns a tuple of *new* arrays. Doesn't modify its arguments. +# ''' +# if len(ver1) < len(ver2): +# ver2, ver1 = pad_versions(ver2, ver1) +# else: +# ver2 = [*ver2, *([0] * (len(ver1) - len(ver2)))] +# ver1 = [*ver1] + +# return ver1, ver2 + +# def compare_versions(ver1, ver2, rev1=1, rev2=1): +# ''' +# ver1 and ver2 are arrays of integers, with major version number being the +# first array item. If versions specified by arrays of different length need +# to be compared, the shorter array gets padded with zeroes on the right. +# This means that for example version 1.3 could be given as both [1, 3] and +# [1, 3, 0, 0] (aka 1.3.0.0) and either would mean the same. + +# rev1 and rev2 are revision numbers. They are appended to padded ver1 and +# ver2 arrays respectively before comparison. + +# This function returns -1, 0 or 1 when the first ver1 designates +# respectively a version lower than, equal to or greater than the one in +# ver2. +# ''' +# ver1, ver2 = pad_versions(ver1, ver2) +# ver1.append(rev1) +# ver2.append(rev2) + +# for n1, n2 in zip(ver1, ver2): +# if n1 < n2: +# return -1 +# if n1 > n2: +# return 1 + +# return 0 + +class VersionedContentItem: + '''Stores definitions of multiple versions of website content item.''' + def __init__(self): + self.uuid = None + self.identifier = None + self.by_version = {} + self.known_versions = [] + + def register_item(self, item): + if self.identifier is None: + self.identifier = item['identifier'] + self.uuid = item['uuid'] + elif self.uuid != item['uuid']: + raise ValueError(_('uuid_mismatch_{identifier}') + .format(identifier=self.identifier)) + + ver = item['version'] + ver_str = version_string(ver) + + if ver_str in self.by_version: + raise ValueError(_('version_clash_{identifier}_{version}') + .format(identifier=self.identifier, + version=ver_str)) + + self.by_version[ver_str] = item + self.known_versions.append(ver) + +class PatternTreeNode: + ''' + "Pattern Tree" is how we refer to the data structure used for querying + Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal + is to make it possible for given URL to quickly retrieve all known patterns + that match it. + ''' + def __init__(self): + self.wildcard_matches = [None, None, None] + self.literal_match = None + self.children = {} + + def search(self, segments): + ''' + Yields all matches of this segments sequence against the tree that + starts at this node. Results are produces in order from greatest to + lowest pattern specificity. + ''' + nodes = [self] + + for segment in segments: + next_node = nodes[-1].children.get(segment) + if next_node is None: + break + + nodes.append(next_node) + + nsegments = len(segments) + cond_literal = lambda: len(nodes) == nsegments + cond_wildcard = [ + lambda: len(nodes) + 1 == nsegments and segments[-1] != '*', + lambda: len(nodes) + 1 < nsegments, + lambda: len(nodes) + 1 != nsegments or segments[-1] != '***' + ] + + while nodes: + node = nodes.pop() + + for item, condition in [(node.literal_match, cond_literal), + *zip(node.wildcard_matches, cond_wildcard)]: + if item is not None and condition(): + yield item + + def add(self, segments, item_instantiator): + ''' + Make item queryable through (this branch of) the Pattern Tree. If there + was not yet any item associated with the tree path designated by + segments, create a new one using item_instantiator() function. Return + all items matching this path (both the ones that existed and the ones + just created). + ''' + node = self + + for i, segment in enumerate(segments): + wildcards = node.wildcard_matches + + child = node.children.get(segment) or PatternTreeNode() + node.children[segment] = child + node = child + + if node.literal_match is None: + node.literal_match = item_instantiator() + + if segment not in ('*', '**', '***'): + return [node.literal_match] + + if wildcards[len(segment) - 1] is None: + wildcards[len(segment) - 1] = item_instantiator() + + return [node.literal_match, wildcards[len(segment) - 1]] + +proto_regex = re.compile(r'^(?P<proto>\w+)://(?P<rest>.*)$') +user_re = r'[^/?#@]+@' # r'(?P<user>[^/?#@]+)@' # discarded for now +query_re = r'\??[^#]*' # r'\??(?P<query>[^#]*)' # discarded for now +domain_re = r'(?P<domain>[^/?#]+)' +path_re = r'(?P<path>[^?#]*)' +http_regex = re.compile(f'{domain_re}{path_re}{query_re}.*') +ftp_regex = re.compile(f'(?:{user_re})?{domain_re}{path_re}.*') + +class UrlError(ValueError): + pass + +class DeconstructedUrl: + '''Represents a deconstructed URL or URL pattern''' + def __init__(self, url): + self.url = url + + match = proto_regex.match(url) + if not match: + raise UrlError(_('invalid_URL_{}').format(url)) + + self.proto = match.group('proto') + if self.proto not in ('http', 'https', 'ftp'): + raise UrlError(_('disallowed_protocol_{}').format(proto)) + + if self.proto == 'ftp': + match = ftp_regex.match(match.group('rest')) + elif self.proto in ('http', 'https'): + match = http_regex.match(match.group('rest')) + + if not match: + raise UrlError(_('invalid_URL_{}').format(url)) + + self.domain = match.group('domain').split('.') + self.domain.reverse() + self.path = [*filter(None, match.group('path').split('/'))] + +class MappingItem: + ''' + A mapping, together with one of its patterns, as stored in Pattern Tree. + ''' + def __init__(self, pattern, mapping): + self.pattern = pattern + self.mapping = mapping + + def register(self, patterns_by_proto): + ''' + Make self queryable through the Pattern Tree that starts with the + protocols dictionary passed in the argument. + ''' + deco = DeconstructedUrl(self.pattern) + + domain_tree = patterns_by_proto.get(deco.proto) or PatternTreeNode() + patterns_by_proto[deco.proto] = domain_tree + + for path_tree in domain_tree.add(deco.domain, PatternTreeNode): + for match_list in path_tree.add(deco.path, list): + match_list.append(self) + +class Content: + '''Stores serveable website content.''' + def __init__(self): + self.resources = {} + self.mappings = {} + self.licenses = {} + self.indexes = {} + self.definition_processors = { + 'resource': self.process_resource_or_mapping, + 'mapping': self.process_resource_or_mapping, + 'license': self.process_license + } + self.patterns_by_proto = {} + + @staticmethod + def register_item(dict, item): + ''' + Helper function used to add a versioned item definition to content + data structures. + ''' + identifier = item['identifier'] + versioned_item = dict.get(identifier) + if versioned_item is None: + versioned_item = VersionedContentItem() + dict[identifier] = versioned_item + + versioned_item.register_item(item) + + @staticmethod + def _process_copyright_and_license(definition): + '''Helper function used by other process_*() methods.''' + for field in ['copyright', 'licenses']: + if definition[field] == 'auto': + raise MyNotImplError(f'"{{field}}": "auto"', + definition['source_name']) + + def process_resource_or_mapping(self, definition, index): + ''' + Sanitizes, autocompletes and registers serveable mapping/resource + definition. + ''' + definition['version'] = normalize_version(definition['version']) + + if definition['type'] == 'resource': + self._process_copyright_and_license(definition) + definition['dependencies'] = definition.get('dependencies', []) + self.register_item(self.resources, definition) + else: + self.register_item(self.mappings, definition) + + def process_license(self, license, index): + '''Sanitizes and registers serveable license definition.''' + identifier = license['identifier'] + if identifier in self.licenses: + raise ValueError(_('license_clash_{}').format(identifier)) + + self.licenses[identifier] = license + + def process_index(self, index, source_name): + ''' + Sanitizes, autocompletes and registers data from a loaded index.json + file. + ''' + schema_ver = normalize_version(index['schema_version']) + index['schema_version'] = schema_ver + if schema_ver != SCHEMA_VERSION: + raise ValueError('index_json_schema_mismatch_{found}_{required}' + .format(found=version_string(schema_ver), + required=version_string(SCHEMA_VERSION))) + + if source_name in self.indexes: + raise ValueError(_('source_name_clash_{}').format(source_name)) + + index['source_name'] = source_name + + self._process_copyright_and_license(index) + + self.indexes[source_name] = index + + for definition in index['definitions']: + try: + definition['source_name'] = source_name + definition['source_copyright'] = index['copyright'] + definition['source_licenses'] = index['licenses'] + processor = self.definition_processors[definition['type']] + processor(definition, index) + except Exception as e: + if current_app._pydrilla_werror: + raise e from None + logging.error(_('couldnt_load_definition_from_%s'), subdir_path, + exc_info=True) + @staticmethod + def all_items(versioned_items_dict): + '''Iterator over all registered versions of all items.''' + for versioned_item in versioned_items_dict.values(): + for item in versioned_item.by_version.values(): + yield item + + def report_missing(self): + ''' + Use logger to print information about items that are referenced but + were not loaded. + ''' + def report_missing_license(object, object_type, lic): + if object_type == 'index': + logging.error(_('no_index_license_%(source)s_%(lic)s'), + source=object['source_name'], lic=lic) + return + + ver_str = version_string(object['version']) + kwargs = {object_type: object['identifier'], ver: ver_str, lic: lic} + if object_type == 'resource': + fmt = _('no_resource_license_%(resource)s_%(ver)s_%(lic)s') + else: + fmt = _('no_mapping_license_%(mapping)s_%(ver)s_%(lic)s') + + logging.error(fmt, **kwargs) + + for object_type, iterable in [ + ('index', self.indexes.values()), + ('resource', self.all_items(self.resources)) + ]: + for object in iterable: + to_process = [object['licenses']] + licenses = [] + while to_process: + term = to_process.pop() + + if type(term) is str: + if term not in ['or', 'and'] and \ + term not in self.licenses: + report_missing_license(object, object_type, lic) + continue + + to_process.extend(term) + + def report_missing_dependency(resource, dep): + logging.error(_('no_dep_%(resource)s_%(ver)s_%(dep)s'), + dep=dep, resource=resource['identifier'], + ver=version_string(resource['version'])) + + for resource in self.all_items(self.resources): + for dep in resource['dependencies']: + if dep not in self.resources: + report_missing_dependency(resource, dep) + + def report_missing_payload(mapping, payload): + logging.error(_('no_payload_%(mapping)s_%(ver)s_%(payload)s'), + mapping=mapping['identifier'], payload=payload, + ver=version_string(mapping['version'])) + + for mapping in self.all_items(self.mappings): + for payload in mapping['payloads']: + payload = payload['payload'] + if payload not in self.resources: + report_missing_payload(mapping, payload) + + def finalize(self): + ''' + Initialize structures needed to serve queries. Called once after all + data gets loaded. + ''' + for dict in [self.resources, self.mappings]: + for versioned_item in dict.values(): + versioned_item.known_versions.sort() + + for mapping in self.all_items(self.mappings): + for payload in mapping['payloads']: + try: + MappingItem(pattern, mapping)\ + .register(self.patterns_by_proto) + except Exception as e: + if current_app._pydrilla_werror: + raise e from None + logging.error( + _('couldnt_register_%(mapping)s_%(ver)s_%(pattern)s'), + mapping=mapping['identifier'], pattern=pattern, + ver=version_string(mapping['version']) + ) + + def find_item(self, type, identifier, ver=None): + ''' + Find and return definition of the newest version of resource/mapping + named by identifier. If no such resource/mapping exists, return None. + + If ver is specified, instead find and return definition of that version + of the item (or None is absent). + ''' + dict = self.resources if type == 'resource' else self.mappings + versioned_item = dict.get(identifier) + if not versioned_item: + return None + + ver = version_string(ver or versioned_item.known_versions[-1]) + + return versioned_item.by_version.get(ver) + + def query(self, url, max=0): + ''' + Return return registered patterns and mappings (available as + MappingItems) that match url. The maximum number of items yielded may be + limited by using the optional max argument. Its default value, 0, causes + no limit to be imposed. + + If multiple versions of a mapping are applicable, only the most recent + is included in the result. + ''' + deco = DeconstructedUrl(url) + + domain_tree = self.patterns_by_proto.get(deco.proto) \ + or PatternTreeNode() + for path_tree in domain_tree.search(deco.domain): + for item in path_tree.search(deco.path): + if url[-1] == '/' or item.pattern[-1] != '/': + yield item + max -= 1 + if max == 0: + return + +def load_content_from_subdir(subdir_path, source_name, content): + index_path = subdir_path / 'index.json' + with open(index_path) as index_file: + index = json.loads(strip_json_comments(index_file.read())) + + content.process_index(index, source_name) + +def load_content(path): + path = pathlib.Path(path) + if not path.is_dir(): + raise ValueError(_('content_dir_path_not_dir')) + + content = Content() + + for subdir_path in path.iterdir(): + if not subdir_path.is_dir(): + continue + try: + load_content_from_subdir(subdir_path, subdir_path.name, content) + except Exception as e: + if current_app._pydrilla_werror: + raise e from None + logging.error(_('couldnt_load_content_from_%s'), subdir_path, + exc_info=True) + + content.report_missing() + content.finalize() + + return content + +def create_app(config_path=(here / 'config.json'), flask_config={}): + app = Flask(__package__) + app.config.update(flask_config) + + language = flask_config.get('lang', 'en') + translation = gettext.translation('pydrilla', localedir=(here / 'locales'), + languages=[language]) + + app._pydrilla_gettext = translation.gettext + + # https://stackoverflow.com/questions/9449101/how-to-stop-flask-from-initialising-twice-in-debug-mode + if app.debug and os.environ.get('WERKZEUG_RUN_MAIN') != 'true': + return app + + config = load_config(config_path) + for key in ['static_resource_uri', 'content_dir']: + if key not in config: + raise ValueError(_('config_key_absent_{}').format(key)) + + app._pydrilla_static_resource_uri = config['static_resource_uri'] + app._pydrilla_werror = config.get('werror', False) + if 'hydrilla_parent' in config: + raise MyNotImplError('hydrilla_parent', config_path.name) + with app.app_context(): + app._pydrilla_content = load_content(config['content_dir']) + + app.register_blueprint(bp) + + return app + +def _(text_key): + return current_app._pydrilla_gettext(text_key) + +def escaping_gettext(text_key): + from markupsafe import escape + + return str(escape(_(text_key))) + +class MyEnvironment(Environment): + ''' + A wrapper class around jinja2.Environment that causes GNU gettext function + (as '_' and '__') and url_for function to be passed to every call of each + template's render() method. + ''' + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def get_template(self, *args, **kwargs): + template = super().get_template(*args, **kwargs) + old_render = template.render + + def new_render(*args, **kwargs): + final_kwargs = { + '_': escaping_gettext, + '__': escaping_gettext, + 'url_for': url_for + } + final_kwargs.update(kwargs) + + return old_render(*args, **final_kwargs) + + template.render = new_render + + return template + +j2env = MyEnvironment(loader=PackageLoader(__package__), autoescape=False) + +indexpage = j2env.get_template('index.html') +@bp.route('/') +def index(): + return indexpage.render(content=current_app._pydrilla_resources_map) + +for item_type in ['resource', 'mapping']: + def item(identifier): + ver = request.args.get('ver') + if ver is not None: + try: + ver = normalize_version(parse_version(ver)) + except: + abort(400) + + item = current_app._pydrilla_content\ + .find_item(item_type, identifier, ver) + if item is None: + abort(404) + + return json.dumps(item) + + item.__name__ = item_type + 's' + bp.route(f'/{item_type}s/<string:identifier>')(item) diff --git a/src/pydrilla/templates/base.html b/src/pydrilla/templates/base.html new file mode 100644 index 0000000..6e7887e --- /dev/null +++ b/src/pydrilla/templates/base.html @@ -0,0 +1,94 @@ +{# SPDX-License-Identifier: CC-BY-NC-SA-4.0 + +Base HTML page template. + +This file is part of Hydrilla + +Copyright (C) 2021 Wojtek Kosior + +This file is free cultural work: you can redistribute it with or +without modification under the terms of the Creative Commons +Attribution Share Alike 4.0 International as published by the +Creative Commons Corporation. + +This file is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +Creative Commons Attribution Share Alike 4.0 International +License for more details. + + +I, Wojtek Kosior, thereby promise not to sue for violation of this +file's license. Although I request that you do not make use this code +in a proprietary program, I am not going to enforce this in court. +#} + +{% macro link_for(endpoint, text) -%} + <a href="{{ url_for(endpoint, **kwargs) }}" + {{ caller() if caller is defined }}> + {{ text }} + </a> +{%- endmacro %} + +<!DOCTYPE html> +<html> + <head> + {% block head %} + <meta http-equiv="Content-Security-Policy" content="script-src 'none';"> + <style> + {% block styles %} + html, body, div, h1, h2, h3, h4, h5, h6 { + margin: 0; + padding: 0; + } + + * { + color: #444; + } + + aside { + display: inline-block; + border-left: 0.2em solid #e44; + background-color: #edc; + padding: 0.2em; + } + + .nav { + background-color: #ddd; + } + + .nav>*:hover { + background-color: #999; + } + + .nav>* { + display: inline-block; + padding: 1em; + } + + .nav a { + text-decoration: none; + } + + .home_link { + font-weight: bold; + font-size: 1.5em; + padding: 0.5em; + } + {% endblock %} + </style> + <title>{% block title %}{{ _('hydrilla') }}{% endblock %}</title> + {% endblock %} + </head> + <body> + {% block body %} + <div class="nav"> + {% call link_for('bp.index', _('hydrilla')) %} + class="home_link" + {% endcall %} + </div> + {% block content %} + {% endblock %} + {% endblock %} + </body> +</html> diff --git a/src/pydrilla/templates/index.html b/src/pydrilla/templates/index.html new file mode 100644 index 0000000..71de8ba --- /dev/null +++ b/src/pydrilla/templates/index.html @@ -0,0 +1,32 @@ +{# SPDX-License-Identifier: CC-BY-NC-SA-4.0 + +HTML index page template. + +This file is part of Hydrilla + +Copyright (C) 2021 Wojtek Kosior + +This file is free cultural work: you can redistribute it with or +without modification under the terms of the Creative Commons +Attribution Share Alike 4.0 International as published by the +Creative Commons Corporation. + +This file is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +Creative Commons Attribution Share Alike 4.0 International +License for more details. + + +I, Wojtek Kosior, thereby promise not to sue for violation of this +file's license. Although I request that you do not make use this code +in a proprietary program, I am not going to enforce this in court. +#} + +{% extends 'base.html' %} +{% block body %} + {{ super() }} + <h2>{{ _('hydrilla_welcome') }}</h2> + <h4>content</h4> + {{ content }} +{% endblock %} diff --git a/src/pydrilla_dev_helper.py b/src/pydrilla_dev_helper.py new file mode 100644 index 0000000..88dc63e --- /dev/null +++ b/src/pydrilla_dev_helper.py @@ -0,0 +1,293 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Definitions of helper commands to use with setuptools +# +# This file is part of Hydrilla +# +# Copyright (C) 2021 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this file's +# license. Although I request that you do not make use this code in a +# proprietary program, I am not going to enforce this in court. + +from setuptools import Command +from setuptools.command.build_py import build_py +import sys +from pathlib import Path +import subprocess +import re +import os +import json +import importlib + +def mypath(path_or_string): + return Path(path_or_string).resolve() + +debrel_regex = re.compile(r'^[^(]*\([^-]*-([^)]*)\)') + +def extract_debrel(debian_dir): + changelog_path = mypath(debian_dir) / 'changelog' + with open(changelog_path) as changelog_file: + try: + return debrel_regex.match(changelog_file.readline())[1] + except TypeError: + raise RuntimeException('Cannot extract debrel from %s.' % + changelog_path) + +class Helper: + def __init__(self, project_root, app_package_name, version, locales_dir, + locales=['en', 'pl'], default_locale='en', locale_domain=None, + packages_root=None, debian_dir=None, config_path=None): + self.project_root = mypath(project_root) + self.app_package_name = app_package_name + self.version = version + self.locales_dir = mypath(locales_dir) + self.locales = locales + self.default_locale = default_locale + self.locale_domain = locale_domain or app_package_name + self.packages_root = mypath(packages_root or project_root / 'src') + self.app_package_dir = self.packages_root / app_package_name + self.debian_dir = mypath(debian_dir or project_root / 'debian') + self.config_path = config_path and mypath(config_path) + self.locale_files_list = None + + def run_command(self, command, verbose, runner=subprocess.run, **kwargs): + cwd = kwargs.get('cwd') + if cwd: + cwd = mypath(cwd) + where = f'from {cwd} ' + else: + cwd = Path.cwd().resolve() + where = '' + + str_command = [str(command[0])] + + for arg in command[1:]: + if isinstance(arg, Path): + try: + arg = str(arg.relative_to(cwd)) + except ValueError: + arg = str(arg) + + str_command.append(arg) + + if verbose: + print(f'{where}executing {" ".join(str_command)}') + runner(str_command, **kwargs) + + def create_mo_files(self, dry_run=False, verbose=False): + self.locale_files_list = [] + + for locale in self.locales: + messages_dir = self.locales_dir / locale / 'LC_MESSAGES' + + for po_path in messages_dir.glob('*.po'): + mo_path = po_path.with_suffix('.mo') + + if not dry_run: + command = ['msgfmt', po_path, '-o', mo_path] + self.run_command(command, verbose=verbose, check=True) + + self.locale_files_list.extend([po_path, mo_path]) + + def locale_files(self): + if self.locale_files_list is None: + self.create_mo_files(dry_run=True) + + return self.locale_files_list + + def locale_files_relative(self, to=None): + if to is None: + to = self.app_package_dir + + return [file.relative_to(to) for file in self.locale_files()] + + def flask_run(self, locale=None): + for var, val in (('ENV', 'development'), ('DEBUG', 'True')): + os.environ[f'FLASK_{var}'] = os.environ.get(f'FLASK_{var}', val) + + config = {'lang': locale or self.default_locale} + + sys.path.insert(0, str(self.packages_root)) + package = importlib.import_module(self.app_package_name) + + # make relative paths in json config resolve from project's directory + os.chdir(self.project_root) + + kwargs = {'config_path': self.config_path} if self.config_path else {} + package.create_app(flask_config=config, **kwargs).run() + + def update_po_files(self, verbose=False): + pot_path = self.locales_dir / f'{self.locale_domain}.pot' + rglob = self.app_package_dir.rglob + command = ['xgettext', '-d', self.locale_domain, '--language=Python', + '-o', pot_path, *rglob('*.py'), *rglob('*.html')] + + self.run_command(command, verbose=verbose, check=True, + cwd=self.app_package_dir) + + for locale in self.locales: + messages_dir = self.locales_dir / locale / 'LC_MESSAGES' + + for po_path in messages_dir.glob('*.po'): + if po_path.stem != self.app_package_name: + continue; + + if po_path.exists(): + command = ['msgmerge', '--update', po_path, pot_path] + else: + command = ['cp', po_path, pot_path] + + self.run_command(command, verbose=verbose, check=True) + + if (verbose): + print('removing generated .pot file') + pot_path.unlink() + + # we exclude these from the source archive we produce + bad_file_regex = re.compile(r'^\..*|build|debian|dist') + + def make_tarballs(self, verbose=False): + name=self.app_package_name + ver=self.version + debrel=extract_debrel(self.debian_dir) + + source_dirname = f'{name}-{ver}' + source_tarball_name = f'{name}_{ver}.orig.tar.gz' + debian_tarball_name = f'{name}_{ver}-{debrel}.debian.tar.gz' + + source_args = [f'--prefix={source_dirname}/', '-o', + self.project_root.parent / source_tarball_name, 'HEAD'] + + for filepath in self.project_root.iterdir(): + if not self.bad_file_regex.search(filepath.parts[-1]): + source_args.append(filepath) + + debian_args = ['-o', self.project_root.parent / debian_tarball_name, + 'HEAD', self.debian_dir] + + for args in [source_args, debian_args]: + command = ['git', 'archive', '--format=tar.gz', *args] + self.run_command(command, verbose=verbose, check=True) + + def commands(self): + helper = self + + class MsgfmtCommand(Command): + '''A custom command to run msgfmt on all .po files below '{}'.''' + + description = 'use msgfmt to generate .mo files from .po files' + user_options = [] + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + def run(self): + helper.create_mo_files(verbose=self.verbose) + + MsgfmtCommand.__doc__ = MsgfmtCommand.__doc__.format(helper.locales_dir) + + class RunCommand(Command): + ''' + A custom command to run the app using flask. + + This is similar in effect to: + PYTHONPATH='{packages_root}' FLASK_APP={app_package_name} \\ + FLASK_ENV=development flask run + ''' + + description = 'run the Flask app from source directory' + + user_options = [ + ('locale=', 'l', + "app locale (one of: %s; default: '%s')" % + (', '.join([f"'{l}'" for l in helper.locales]), + helper.default_locale)) + ] + + def initialize_options(self): + self.locale = helper.default_locale + + def finalize_options(self): + if self.locale not in helper.locales: + raise ValueError("Locale '%s' not supported" % self.lang) + + def run(self): + helper.flask_run(locale=self.locale) + + RunCommand.__doc__ = RunCommand.__doc__.format( + packages_root=self.packages_root, + app_package_name=self.app_package_name + ) + + class MsgmergeCommand(Command): + ''' + A custom command to run xgettext and msgmerge to update project's + .po files below '{}'. + ''' + + description = 'use xgettext and msgmerge to update (or generate) .po files for this project' + user_options = [] + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + def run(self): + helper.update_po_files(verbose=self.verbose) + + MsgmergeCommand.__doc__ = \ + MsgmergeCommand.__doc__.format(helper.locales_dir) + + class TarballsCommand(Command): + ''' + A custom command to run git archive to create debian tarballs of + this project. + ''' + + description = 'use git archive to create .orig.tar.gz and .debian.tar.gz files for this project' + user_options = [] + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + def run(self): + helper.make_tarballs(verbose=self.verbose) + + class BuildCommand(build_py): + ''' + The build command but runs the custom msgfmt command before build. + ''' + def run(self, *args, **kwargs): + self.run_command('msgfmt') + super().run(*args, **kwargs) + + return { + 'msgfmt': MsgfmtCommand, + 'run': RunCommand, + 'msgmerge': MsgmergeCommand, + 'tarballs': TarballsCommand, + 'build_py': BuildCommand + } diff --git a/src/test/__init__.py b/src/test/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/test/__init__.py diff --git a/src/test/test_pydrilla.py b/src/test/test_pydrilla.py new file mode 100644 index 0000000..0ed5fa9 --- /dev/null +++ b/src/test/test_pydrilla.py @@ -0,0 +1,90 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +# Repository tests +# +# This file is part of Hydrilla +# +# Copyright (C) 2021 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +import pytest +import sys +import shutil +from pathlib import Path +from os import mkdir, unlink, environ +import json + +import pydrilla + +test_dir = Path(__file__).resolve().parent +pydrilla_dir = Path(hydrilla.__file__).resolve().parent +test_config_path = test_dir / 'config.json' + +@pytest.fixture +def client(): + app = pydrilla.create_app(test_config_path, flask_config={'TESTING': True}) + + with app.test_client() as client: + yield client + +def test_basic(client): + response = client.get('/') + assert b'html' in response.data + +def test_normalize_version(): + assert pydrilla.normalize_version([4, 5, 3, 0, 0]) == [4, 5, 3] + assert pydrilla.normalize_version([1, 0, 5, 0]) == [1, 0, 5] + assert pydrilla.normalize_version([3, 3]) == [3, 3] + +### pad_versions() and compare_versions() likely won't be needed + +# def test_compare_versions(): +# compare_versions = pydrilla.compare_versions +# # without revision +# assert compare_versions([43], [43]) == 0 +# assert compare_versions([54], [34]) == 1 +# assert compare_versions([1], [3]) == -1 +# assert compare_versions([10, 2], [10, 2]) == 0 +# assert compare_versions([11, 6], [11, 2]) == 1 +# assert compare_versions([3, 0], [3, 8]) == -1 +# assert compare_versions([1, 2, 3], [1, 2]) == 1 +# assert compare_versions([1, 2], [1, 2, 3]) == -1 +# assert compare_versions([1], [1, 0, 0]) == 0 + +# # with revision +# assert compare_versions([43], [43], rev2=3) == -1 +# assert compare_versions([54], [34]), rev2=41) == 1 +# assert compare_versions([1], [3]), rev1=6) == -1 +# assert compare_versions([10, 2], [10, 2]), rev1=8, rev2=5) == 1 +# assert compare_versions([11, 6], [11, 2]), rev2=19) == 1 +# assert compare_versions([3, 0], [3, 8]), rev2=5) == -1 +# assert compare_versions([1, 2, 3], [1, 2]), rev1=4) == 1 +# assert compare_versions([1, 2], [1, 2, 3]), rev2=7) == -1 +# assert compare_versions([1], [1, 0, 0]), rev2=9, rev1=9) == 0 + +# from functools import cmp_to_key + +# versions = [[43], [54], [3, 0], [34], [3], [1], [4, 5, 3], [1, 0, 5], +# [3, 3], [10, 2], [11, 2], [11, 6], [3, 8], [1, 2], [1, 2, 3], +# [1, 0, 0]] +# versions.sort(cmp_to_key(compare_versions)) +# assert versions == [[1], [1, 0, 0], [1, 0, 5], [1, 2], [1, 2, 3], [3, 0], +# [3], [3, 3], [3, 8], [4, 5, 3], [10, 2], [11, 2], +# [11, 6], [34], [43], [54]] |