aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorWojtek Kosior <koszko@koszko.org>2021-11-13 20:33:57 +0100
committerWojtek Kosior <koszko@koszko.org>2021-11-13 20:33:57 +0100
commita14ab0a7601ff5c197fe43d42410d8ed6bfd26a8 (patch)
treebefa6fc0b1de552bae1e2a832a25cb0dd8f58412 /src
downloadhaketilo-hydrilla-a14ab0a7601ff5c197fe43d42410d8ed6bfd26a8.tar.gz
haketilo-hydrilla-a14ab0a7601ff5c197fe43d42410d8ed6bfd26a8.zip
initial commit
Diffstat (limited to 'src')
-rw-r--r--src/conftest.py0
-rw-r--r--src/pydrilla/__init__.py1
-rw-r--r--src/pydrilla/config.json13
-rw-r--r--src/pydrilla/development_config.json24
-rw-r--r--src/pydrilla/locales/en/LC_MESSAGES/pydrilla.po127
-rw-r--r--src/pydrilla/pydrilla.py700
-rw-r--r--src/pydrilla/templates/base.html94
-rw-r--r--src/pydrilla/templates/index.html32
-rw-r--r--src/pydrilla_dev_helper.py293
-rw-r--r--src/test/__init__.py0
-rw-r--r--src/test/test_pydrilla.py90
11 files changed, 1374 insertions, 0 deletions
diff --git a/src/conftest.py b/src/conftest.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/conftest.py
diff --git a/src/pydrilla/__init__.py b/src/pydrilla/__init__.py
new file mode 100644
index 0000000..8d1565b
--- /dev/null
+++ b/src/pydrilla/__init__.py
@@ -0,0 +1 @@
+from .pydrilla import create_app
diff --git a/src/pydrilla/config.json b/src/pydrilla/config.json
new file mode 100644
index 0000000..a6c4bf0
--- /dev/null
+++ b/src/pydrilla/config.json
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: CC0-1.0
+
+// Example Hydrilla config file.
+//
+// Copyright (C) 2021 Wojtek Kosior
+//
+// Available under the terms of Creative Commons Zero v1.0 Universal.
+
+{
+ "content_dir": "/var/lib/hydrilla/content",
+ "static_resource_uri": "http://localhost:8000/",
+ "try_configs": ["/etc/pydrilla/config.json"]
+}
diff --git a/src/pydrilla/development_config.json b/src/pydrilla/development_config.json
new file mode 100644
index 0000000..1660edb
--- /dev/null
+++ b/src/pydrilla/development_config.json
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: CC0-1.0
+
+// Hydrilla development config file.
+//
+// Copyright (C) 2021 Wojtek Kosior
+//
+// Available under the terms of Creative Commons Zero v1.0 Universal.
+
+// this config is meant to be used in development environment;
+// unlike config.json, it shall not be included in distribution
+{
+ "content_dir": "./example_content",
+
+ // Except files from content_dir to be served there (used to redirect
+ // clients).
+ "static_resource_uri": "http://localhost:8000/",
+
+ // Make Pydrilla error out on any warning
+ "werror": true
+
+ // With the below we can make Pydrilla look for missing content items in
+ // another instance instead of just erroring/warning.
+ // ,"hydrilla_parent": "https://api.hachette-hydrilla.org/0.2/"
+}
diff --git a/src/pydrilla/locales/en/LC_MESSAGES/pydrilla.po b/src/pydrilla/locales/en/LC_MESSAGES/pydrilla.po
new file mode 100644
index 0000000..f9e6a82
--- /dev/null
+++ b/src/pydrilla/locales/en/LC_MESSAGES/pydrilla.po
@@ -0,0 +1,127 @@
+# SPDX-License-Identifier: CC0-1.0
+
+# English localization
+#
+# This file is part of Hydrilla
+#
+# Copyright (C) 2021 Wojtek Kosior
+#
+# This file is free cultural work: you can redistribute it with or
+# without modification under the terms of the CC0 1.0 Universal License
+# as published by the Creative Commons Corporation.
+#
+# This file is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# CC0 1.0 Universal License for more details.
+
+msgid ""
+msgstr ""
+"Project-Id-Version: Hydrilla 0.2\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2021-11-13 19:03+0100\n"
+"PO-Revision-Date: 2021-11-06 08:42+0100\n"
+"Last-Translator: Wojtek Kosior <koszko@koszko.org>\n"
+"Language-Team: English\n"
+"Language: en\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#: pydrilla.py:97
+msgid "path_is_absolute_{}"
+msgstr "Provided path '{}' is absolute."
+
+#: pydrilla.py:104
+#, python-brace-format
+msgid "not_implemented_{what}_{where}"
+msgstr ""
+"Attempt to use '{what}' in '{where}' but this feature is not yet implemented."
+
+#: pydrilla.py:194
+#, python-brace-format
+msgid "uuid_mismatch_{identifier}"
+msgstr "Two different uuids were specified for item '{identifier}'."
+
+#: pydrilla.py:201
+#, python-brace-format
+msgid "version_clash_{identifier}_{version}"
+msgstr "Version '{version}' specified more than once for item '{identifier}'."
+
+#: pydrilla.py:297 pydrilla.py:309
+msgid "invalid_URL_{}"
+msgstr "Invalid URL/pattern: '{}'."
+
+#: pydrilla.py:301
+msgid "disallowed_protocol_{}"
+msgstr "Disallowed protocol: '{}'."
+
+#: pydrilla.py:391
+msgid "license_clash_{}"
+msgstr "License '{}' defined more than once."
+
+#: pydrilla.py:408
+msgid "source_name_clash_{}"
+msgstr "Source name '{}' used more than once."
+
+#: pydrilla.py:426
+#, python-format
+msgid "couldnt_load_definition_from_%s"
+msgstr "Couldn't load definition from '%s'."
+
+#: pydrilla.py:442
+#, python-format
+msgid "no_index_license_%(source)s_%(lic)s"
+msgstr "Unknown license '%(lic)s' used by index.json of '%(source)s'."
+
+#: pydrilla.py:449
+#, python-format
+msgid "no_resource_license_%(resource)s_%(ver)s_%(lic)s"
+msgstr ""
+"Unknown license '%(lic)s' used by resource '%(resource)s', version '%(ver)s'."
+
+#: pydrilla.py:451
+#, python-format
+msgid "no_mapping_license_%(mapping)s_%(ver)s_%(lic)s"
+msgstr ""
+"Unknown license '%(lic)s' used by mapping '%(mapping)s', version '%(ver)s'."
+
+#: pydrilla.py:474
+#, python-format
+msgid "no_dep_%(resource)s_%(ver)s_%(dep)s"
+msgstr ""
+"Unknown dependency '%(dep)s' of resource '%(resource)s', version '%(ver)s'."
+
+#: pydrilla.py:484
+#, python-format
+msgid "no_payload_%(mapping)s_%(ver)s_%(payload)s"
+msgstr ""
+"Unknown payload '%(payload)s' of mapping '%(mapping)s', version '%(ver)s'."
+
+#: pydrilla.py:512
+#, python-format
+msgid "couldnt_register_%(mapping)s_%(ver)s_%(pattern)s"
+msgstr ""
+"Couldn't register mapping '%(mapping)s', version '%(ver)s' (pattern "
+"'%(pattern)s')."
+
+#: pydrilla.py:566
+msgid "content_dir_path_not_dir"
+msgstr "Provided \"content_dir\" path does not name a direcotry."
+
+#: pydrilla.py:578
+#, python-format
+msgid "couldnt_load_content_from_%s"
+msgstr "Couldn't load content from '%s'."
+
+#: pydrilla.py:603
+msgid "config_key_absent_{}"
+msgstr "Config key \"{}\" not provided."
+
+#: templates/index.html:4
+msgid "hydrilla_welcome"
+msgstr "Welcome to Hydrilla!"
+
+#: templates/base.html:55 templates/base.html:61
+msgid "hydrilla"
+msgstr "Hydrilla"
diff --git a/src/pydrilla/pydrilla.py b/src/pydrilla/pydrilla.py
new file mode 100644
index 0000000..caf05a2
--- /dev/null
+++ b/src/pydrilla/pydrilla.py
@@ -0,0 +1,700 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+# Main repository logic.
+#
+# This file is part of Hydrilla
+#
+# Copyright (C) 2021 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+from flask import Flask, Blueprint, current_app, url_for, abort, request
+from jinja2 import Environment, PackageLoader
+import re
+#from hashlib import sha256
+import os
+import pathlib
+import json
+import gettext
+import logging
+
+SCHEMA_VERSION = [0, 2]
+
+strip_comment_re = re.compile(r'''
+^ # match from the beginning of each line
+( # catch the part before '//' comment
+ (?: # this group matches either a string or a single out-of-string character
+ [^"/] |
+ "
+ (?: # this group matches any in-a-string character
+ [^"\\] | # match any normal character
+ \\[^u] | # match any escaped character like '\f' or '\n'
+ \\u[a-fA-F0-9]{4} # match an escape
+ )*
+ "
+ )*
+)
+# expect either end-of-line or a comment:
+# * unterminated strings will cause matching to fail
+# * bad comment (with '/' instead of '//') will be indicated by second group
+# having length 1 instead of 2 or 0
+(//?|$)
+''', re.VERBOSE)
+
+def strip_json_comments(text):
+ processed = 0
+ stripped_text = []
+ for line in text.split('\n'):
+ match = strip_comment_re.match(line)
+
+ if match is None: # unterminated string
+ # ignore this error, let json module report it
+ stripped = line
+ elif len(match[2]) == 1:
+ raise json.JSONDecodeError('bad comment', text,
+ processed + len(match[1]))
+ else:
+ stripped = match[1]
+
+ stripped_text.append(stripped)
+ processed += len(line) + 1
+
+ return '\n'.join(stripped_text)
+
+here = pathlib.Path(__file__).resolve().parent
+
+bp = Blueprint('bp', __package__)
+
+def load_config(config_path):
+ config = {}
+ to_load = [config_path]
+ failures_ok = [False]
+
+ while to_load:
+ path = to_load.pop()
+ can_fail = failures_ok.pop()
+
+ try:
+ with open(config_path) as config_file:
+ new_config = json.loads(strip_json_comments(config_file.read()))
+ except Exception as e:
+ if can_fail:
+ continue
+ raise e from None
+
+ config.update(new_config)
+
+ for key, failure_ok in [('try_configs', True), ('use_configs', False)]:
+ paths = new_config.get(key, [])
+ paths.reverse()
+ to_load.extend(paths)
+ failures_ok.extend([failure_ok] * len(paths))
+
+ for key in ['try_configs', 'use_configs']:
+ if key in config:
+ config.pop(key)
+
+ return config
+
+def get_content_file_path(path):
+ if os.path.sep != '/':
+ path.replace('/', os.path.sep)
+
+ path = pathlib.Path(path)
+ if path.is_absolute():
+ raise ValueError(_('path_is_absolute_{}').format(path))
+
+ return path
+
+class MyNotImplError(NotImplementedError):
+ '''Raised when a planned but not-yet-completed feature is used.'''
+ def __init__(self, what, where):
+ super().__init__(_('not_implemented_{what}_{where}')
+ .format(what=what, where=where))
+
+def normalize_version(ver):
+ '''
+ ver is an array of integers. Strip right-most zeroes from ver.
+
+ Returns a *new* array. Doesn't modify its argument.
+ '''
+ new_len = 0
+ for i, num in enumerate(ver):
+ if num != 0:
+ new_len = i + 1
+
+ return ver[:new_len]
+
+def parse_version(ver_str):
+ '''
+ Convert ver_str into an array representation, e.g. for ver_str="4.6.13.0"
+ return [4, 6, 13, 0].
+ '''
+ return [int(num) for num in ver_str.split('.')]
+
+def version_string(ver, rev=None):
+ '''
+ ver is an array of integers. rev is an optional integer. Produce string
+ representation of version (optionally with revision number), like:
+ 1.2.3-5
+ No version normalization is performed.
+ '''
+ return '.'.join([str(n) for n in ver]) + ('' if rev is None else f'-{rev}')
+
+### pad_versions() and compare_versions() likely won't be needed
+
+# def pad_versions(ver1, ver2):
+# '''
+# Each of the arguments is an array of integers. If one of the arrays is
+# shorter than the other, right-pad it with zeroes to make it the same
+# length as the other one.
+
+# Returns a tuple of *new* arrays. Doesn't modify its arguments.
+# '''
+# if len(ver1) < len(ver2):
+# ver2, ver1 = pad_versions(ver2, ver1)
+# else:
+# ver2 = [*ver2, *([0] * (len(ver1) - len(ver2)))]
+# ver1 = [*ver1]
+
+# return ver1, ver2
+
+# def compare_versions(ver1, ver2, rev1=1, rev2=1):
+# '''
+# ver1 and ver2 are arrays of integers, with major version number being the
+# first array item. If versions specified by arrays of different length need
+# to be compared, the shorter array gets padded with zeroes on the right.
+# This means that for example version 1.3 could be given as both [1, 3] and
+# [1, 3, 0, 0] (aka 1.3.0.0) and either would mean the same.
+
+# rev1 and rev2 are revision numbers. They are appended to padded ver1 and
+# ver2 arrays respectively before comparison.
+
+# This function returns -1, 0 or 1 when the first ver1 designates
+# respectively a version lower than, equal to or greater than the one in
+# ver2.
+# '''
+# ver1, ver2 = pad_versions(ver1, ver2)
+# ver1.append(rev1)
+# ver2.append(rev2)
+
+# for n1, n2 in zip(ver1, ver2):
+# if n1 < n2:
+# return -1
+# if n1 > n2:
+# return 1
+
+# return 0
+
+class VersionedContentItem:
+ '''Stores definitions of multiple versions of website content item.'''
+ def __init__(self):
+ self.uuid = None
+ self.identifier = None
+ self.by_version = {}
+ self.known_versions = []
+
+ def register_item(self, item):
+ if self.identifier is None:
+ self.identifier = item['identifier']
+ self.uuid = item['uuid']
+ elif self.uuid != item['uuid']:
+ raise ValueError(_('uuid_mismatch_{identifier}')
+ .format(identifier=self.identifier))
+
+ ver = item['version']
+ ver_str = version_string(ver)
+
+ if ver_str in self.by_version:
+ raise ValueError(_('version_clash_{identifier}_{version}')
+ .format(identifier=self.identifier,
+ version=ver_str))
+
+ self.by_version[ver_str] = item
+ self.known_versions.append(ver)
+
+class PatternTreeNode:
+ '''
+ "Pattern Tree" is how we refer to the data structure used for querying
+ Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal
+ is to make it possible for given URL to quickly retrieve all known patterns
+ that match it.
+ '''
+ def __init__(self):
+ self.wildcard_matches = [None, None, None]
+ self.literal_match = None
+ self.children = {}
+
+ def search(self, segments):
+ '''
+ Yields all matches of this segments sequence against the tree that
+ starts at this node. Results are produces in order from greatest to
+ lowest pattern specificity.
+ '''
+ nodes = [self]
+
+ for segment in segments:
+ next_node = nodes[-1].children.get(segment)
+ if next_node is None:
+ break
+
+ nodes.append(next_node)
+
+ nsegments = len(segments)
+ cond_literal = lambda: len(nodes) == nsegments
+ cond_wildcard = [
+ lambda: len(nodes) + 1 == nsegments and segments[-1] != '*',
+ lambda: len(nodes) + 1 < nsegments,
+ lambda: len(nodes) + 1 != nsegments or segments[-1] != '***'
+ ]
+
+ while nodes:
+ node = nodes.pop()
+
+ for item, condition in [(node.literal_match, cond_literal),
+ *zip(node.wildcard_matches, cond_wildcard)]:
+ if item is not None and condition():
+ yield item
+
+ def add(self, segments, item_instantiator):
+ '''
+ Make item queryable through (this branch of) the Pattern Tree. If there
+ was not yet any item associated with the tree path designated by
+ segments, create a new one using item_instantiator() function. Return
+ all items matching this path (both the ones that existed and the ones
+ just created).
+ '''
+ node = self
+
+ for i, segment in enumerate(segments):
+ wildcards = node.wildcard_matches
+
+ child = node.children.get(segment) or PatternTreeNode()
+ node.children[segment] = child
+ node = child
+
+ if node.literal_match is None:
+ node.literal_match = item_instantiator()
+
+ if segment not in ('*', '**', '***'):
+ return [node.literal_match]
+
+ if wildcards[len(segment) - 1] is None:
+ wildcards[len(segment) - 1] = item_instantiator()
+
+ return [node.literal_match, wildcards[len(segment) - 1]]
+
+proto_regex = re.compile(r'^(?P<proto>\w+)://(?P<rest>.*)$')
+user_re = r'[^/?#@]+@' # r'(?P<user>[^/?#@]+)@' # discarded for now
+query_re = r'\??[^#]*' # r'\??(?P<query>[^#]*)' # discarded for now
+domain_re = r'(?P<domain>[^/?#]+)'
+path_re = r'(?P<path>[^?#]*)'
+http_regex = re.compile(f'{domain_re}{path_re}{query_re}.*')
+ftp_regex = re.compile(f'(?:{user_re})?{domain_re}{path_re}.*')
+
+class UrlError(ValueError):
+ pass
+
+class DeconstructedUrl:
+ '''Represents a deconstructed URL or URL pattern'''
+ def __init__(self, url):
+ self.url = url
+
+ match = proto_regex.match(url)
+ if not match:
+ raise UrlError(_('invalid_URL_{}').format(url))
+
+ self.proto = match.group('proto')
+ if self.proto not in ('http', 'https', 'ftp'):
+ raise UrlError(_('disallowed_protocol_{}').format(proto))
+
+ if self.proto == 'ftp':
+ match = ftp_regex.match(match.group('rest'))
+ elif self.proto in ('http', 'https'):
+ match = http_regex.match(match.group('rest'))
+
+ if not match:
+ raise UrlError(_('invalid_URL_{}').format(url))
+
+ self.domain = match.group('domain').split('.')
+ self.domain.reverse()
+ self.path = [*filter(None, match.group('path').split('/'))]
+
+class MappingItem:
+ '''
+ A mapping, together with one of its patterns, as stored in Pattern Tree.
+ '''
+ def __init__(self, pattern, mapping):
+ self.pattern = pattern
+ self.mapping = mapping
+
+ def register(self, patterns_by_proto):
+ '''
+ Make self queryable through the Pattern Tree that starts with the
+ protocols dictionary passed in the argument.
+ '''
+ deco = DeconstructedUrl(self.pattern)
+
+ domain_tree = patterns_by_proto.get(deco.proto) or PatternTreeNode()
+ patterns_by_proto[deco.proto] = domain_tree
+
+ for path_tree in domain_tree.add(deco.domain, PatternTreeNode):
+ for match_list in path_tree.add(deco.path, list):
+ match_list.append(self)
+
+class Content:
+ '''Stores serveable website content.'''
+ def __init__(self):
+ self.resources = {}
+ self.mappings = {}
+ self.licenses = {}
+ self.indexes = {}
+ self.definition_processors = {
+ 'resource': self.process_resource_or_mapping,
+ 'mapping': self.process_resource_or_mapping,
+ 'license': self.process_license
+ }
+ self.patterns_by_proto = {}
+
+ @staticmethod
+ def register_item(dict, item):
+ '''
+ Helper function used to add a versioned item definition to content
+ data structures.
+ '''
+ identifier = item['identifier']
+ versioned_item = dict.get(identifier)
+ if versioned_item is None:
+ versioned_item = VersionedContentItem()
+ dict[identifier] = versioned_item
+
+ versioned_item.register_item(item)
+
+ @staticmethod
+ def _process_copyright_and_license(definition):
+ '''Helper function used by other process_*() methods.'''
+ for field in ['copyright', 'licenses']:
+ if definition[field] == 'auto':
+ raise MyNotImplError(f'"{{field}}": "auto"',
+ definition['source_name'])
+
+ def process_resource_or_mapping(self, definition, index):
+ '''
+ Sanitizes, autocompletes and registers serveable mapping/resource
+ definition.
+ '''
+ definition['version'] = normalize_version(definition['version'])
+
+ if definition['type'] == 'resource':
+ self._process_copyright_and_license(definition)
+ definition['dependencies'] = definition.get('dependencies', [])
+ self.register_item(self.resources, definition)
+ else:
+ self.register_item(self.mappings, definition)
+
+ def process_license(self, license, index):
+ '''Sanitizes and registers serveable license definition.'''
+ identifier = license['identifier']
+ if identifier in self.licenses:
+ raise ValueError(_('license_clash_{}').format(identifier))
+
+ self.licenses[identifier] = license
+
+ def process_index(self, index, source_name):
+ '''
+ Sanitizes, autocompletes and registers data from a loaded index.json
+ file.
+ '''
+ schema_ver = normalize_version(index['schema_version'])
+ index['schema_version'] = schema_ver
+ if schema_ver != SCHEMA_VERSION:
+ raise ValueError('index_json_schema_mismatch_{found}_{required}'
+ .format(found=version_string(schema_ver),
+ required=version_string(SCHEMA_VERSION)))
+
+ if source_name in self.indexes:
+ raise ValueError(_('source_name_clash_{}').format(source_name))
+
+ index['source_name'] = source_name
+
+ self._process_copyright_and_license(index)
+
+ self.indexes[source_name] = index
+
+ for definition in index['definitions']:
+ try:
+ definition['source_name'] = source_name
+ definition['source_copyright'] = index['copyright']
+ definition['source_licenses'] = index['licenses']
+ processor = self.definition_processors[definition['type']]
+ processor(definition, index)
+ except Exception as e:
+ if current_app._pydrilla_werror:
+ raise e from None
+ logging.error(_('couldnt_load_definition_from_%s'), subdir_path,
+ exc_info=True)
+ @staticmethod
+ def all_items(versioned_items_dict):
+ '''Iterator over all registered versions of all items.'''
+ for versioned_item in versioned_items_dict.values():
+ for item in versioned_item.by_version.values():
+ yield item
+
+ def report_missing(self):
+ '''
+ Use logger to print information about items that are referenced but
+ were not loaded.
+ '''
+ def report_missing_license(object, object_type, lic):
+ if object_type == 'index':
+ logging.error(_('no_index_license_%(source)s_%(lic)s'),
+ source=object['source_name'], lic=lic)
+ return
+
+ ver_str = version_string(object['version'])
+ kwargs = {object_type: object['identifier'], ver: ver_str, lic: lic}
+ if object_type == 'resource':
+ fmt = _('no_resource_license_%(resource)s_%(ver)s_%(lic)s')
+ else:
+ fmt = _('no_mapping_license_%(mapping)s_%(ver)s_%(lic)s')
+
+ logging.error(fmt, **kwargs)
+
+ for object_type, iterable in [
+ ('index', self.indexes.values()),
+ ('resource', self.all_items(self.resources))
+ ]:
+ for object in iterable:
+ to_process = [object['licenses']]
+ licenses = []
+ while to_process:
+ term = to_process.pop()
+
+ if type(term) is str:
+ if term not in ['or', 'and'] and \
+ term not in self.licenses:
+ report_missing_license(object, object_type, lic)
+ continue
+
+ to_process.extend(term)
+
+ def report_missing_dependency(resource, dep):
+ logging.error(_('no_dep_%(resource)s_%(ver)s_%(dep)s'),
+ dep=dep, resource=resource['identifier'],
+ ver=version_string(resource['version']))
+
+ for resource in self.all_items(self.resources):
+ for dep in resource['dependencies']:
+ if dep not in self.resources:
+ report_missing_dependency(resource, dep)
+
+ def report_missing_payload(mapping, payload):
+ logging.error(_('no_payload_%(mapping)s_%(ver)s_%(payload)s'),
+ mapping=mapping['identifier'], payload=payload,
+ ver=version_string(mapping['version']))
+
+ for mapping in self.all_items(self.mappings):
+ for payload in mapping['payloads']:
+ payload = payload['payload']
+ if payload not in self.resources:
+ report_missing_payload(mapping, payload)
+
+ def finalize(self):
+ '''
+ Initialize structures needed to serve queries. Called once after all
+ data gets loaded.
+ '''
+ for dict in [self.resources, self.mappings]:
+ for versioned_item in dict.values():
+ versioned_item.known_versions.sort()
+
+ for mapping in self.all_items(self.mappings):
+ for payload in mapping['payloads']:
+ try:
+ MappingItem(pattern, mapping)\
+ .register(self.patterns_by_proto)
+ except Exception as e:
+ if current_app._pydrilla_werror:
+ raise e from None
+ logging.error(
+ _('couldnt_register_%(mapping)s_%(ver)s_%(pattern)s'),
+ mapping=mapping['identifier'], pattern=pattern,
+ ver=version_string(mapping['version'])
+ )
+
+ def find_item(self, type, identifier, ver=None):
+ '''
+ Find and return definition of the newest version of resource/mapping
+ named by identifier. If no such resource/mapping exists, return None.
+
+ If ver is specified, instead find and return definition of that version
+ of the item (or None is absent).
+ '''
+ dict = self.resources if type == 'resource' else self.mappings
+ versioned_item = dict.get(identifier)
+ if not versioned_item:
+ return None
+
+ ver = version_string(ver or versioned_item.known_versions[-1])
+
+ return versioned_item.by_version.get(ver)
+
+ def query(self, url, max=0):
+ '''
+ Return return registered patterns and mappings (available as
+ MappingItems) that match url. The maximum number of items yielded may be
+ limited by using the optional max argument. Its default value, 0, causes
+ no limit to be imposed.
+
+ If multiple versions of a mapping are applicable, only the most recent
+ is included in the result.
+ '''
+ deco = DeconstructedUrl(url)
+
+ domain_tree = self.patterns_by_proto.get(deco.proto) \
+ or PatternTreeNode()
+ for path_tree in domain_tree.search(deco.domain):
+ for item in path_tree.search(deco.path):
+ if url[-1] == '/' or item.pattern[-1] != '/':
+ yield item
+ max -= 1
+ if max == 0:
+ return
+
+def load_content_from_subdir(subdir_path, source_name, content):
+ index_path = subdir_path / 'index.json'
+ with open(index_path) as index_file:
+ index = json.loads(strip_json_comments(index_file.read()))
+
+ content.process_index(index, source_name)
+
+def load_content(path):
+ path = pathlib.Path(path)
+ if not path.is_dir():
+ raise ValueError(_('content_dir_path_not_dir'))
+
+ content = Content()
+
+ for subdir_path in path.iterdir():
+ if not subdir_path.is_dir():
+ continue
+ try:
+ load_content_from_subdir(subdir_path, subdir_path.name, content)
+ except Exception as e:
+ if current_app._pydrilla_werror:
+ raise e from None
+ logging.error(_('couldnt_load_content_from_%s'), subdir_path,
+ exc_info=True)
+
+ content.report_missing()
+ content.finalize()
+
+ return content
+
+def create_app(config_path=(here / 'config.json'), flask_config={}):
+ app = Flask(__package__)
+ app.config.update(flask_config)
+
+ language = flask_config.get('lang', 'en')
+ translation = gettext.translation('pydrilla', localedir=(here / 'locales'),
+ languages=[language])
+
+ app._pydrilla_gettext = translation.gettext
+
+ # https://stackoverflow.com/questions/9449101/how-to-stop-flask-from-initialising-twice-in-debug-mode
+ if app.debug and os.environ.get('WERKZEUG_RUN_MAIN') != 'true':
+ return app
+
+ config = load_config(config_path)
+ for key in ['static_resource_uri', 'content_dir']:
+ if key not in config:
+ raise ValueError(_('config_key_absent_{}').format(key))
+
+ app._pydrilla_static_resource_uri = config['static_resource_uri']
+ app._pydrilla_werror = config.get('werror', False)
+ if 'hydrilla_parent' in config:
+ raise MyNotImplError('hydrilla_parent', config_path.name)
+ with app.app_context():
+ app._pydrilla_content = load_content(config['content_dir'])
+
+ app.register_blueprint(bp)
+
+ return app
+
+def _(text_key):
+ return current_app._pydrilla_gettext(text_key)
+
+def escaping_gettext(text_key):
+ from markupsafe import escape
+
+ return str(escape(_(text_key)))
+
+class MyEnvironment(Environment):
+ '''
+ A wrapper class around jinja2.Environment that causes GNU gettext function
+ (as '_' and '__') and url_for function to be passed to every call of each
+ template's render() method.
+ '''
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ def get_template(self, *args, **kwargs):
+ template = super().get_template(*args, **kwargs)
+ old_render = template.render
+
+ def new_render(*args, **kwargs):
+ final_kwargs = {
+ '_': escaping_gettext,
+ '__': escaping_gettext,
+ 'url_for': url_for
+ }
+ final_kwargs.update(kwargs)
+
+ return old_render(*args, **final_kwargs)
+
+ template.render = new_render
+
+ return template
+
+j2env = MyEnvironment(loader=PackageLoader(__package__), autoescape=False)
+
+indexpage = j2env.get_template('index.html')
+@bp.route('/')
+def index():
+ return indexpage.render(content=current_app._pydrilla_resources_map)
+
+for item_type in ['resource', 'mapping']:
+ def item(identifier):
+ ver = request.args.get('ver')
+ if ver is not None:
+ try:
+ ver = normalize_version(parse_version(ver))
+ except:
+ abort(400)
+
+ item = current_app._pydrilla_content\
+ .find_item(item_type, identifier, ver)
+ if item is None:
+ abort(404)
+
+ return json.dumps(item)
+
+ item.__name__ = item_type + 's'
+ bp.route(f'/{item_type}s/<string:identifier>')(item)
diff --git a/src/pydrilla/templates/base.html b/src/pydrilla/templates/base.html
new file mode 100644
index 0000000..6e7887e
--- /dev/null
+++ b/src/pydrilla/templates/base.html
@@ -0,0 +1,94 @@
+{# SPDX-License-Identifier: CC-BY-NC-SA-4.0
+
+Base HTML page template.
+
+This file is part of Hydrilla
+
+Copyright (C) 2021 Wojtek Kosior
+
+This file is free cultural work: you can redistribute it with or
+without modification under the terms of the Creative Commons
+Attribution Share Alike 4.0 International as published by the
+Creative Commons Corporation.
+
+This file is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+Creative Commons Attribution Share Alike 4.0 International
+License for more details.
+
+
+I, Wojtek Kosior, thereby promise not to sue for violation of this
+file's license. Although I request that you do not make use this code
+in a proprietary program, I am not going to enforce this in court.
+#}
+
+{% macro link_for(endpoint, text) -%}
+ <a href="{{ url_for(endpoint, **kwargs) }}"
+ {{ caller() if caller is defined }}>
+ {{ text }}
+ </a>
+{%- endmacro %}
+
+<!DOCTYPE html>
+<html>
+ <head>
+ {% block head %}
+ <meta http-equiv="Content-Security-Policy" content="script-src 'none';">
+ <style>
+ {% block styles %}
+ html, body, div, h1, h2, h3, h4, h5, h6 {
+ margin: 0;
+ padding: 0;
+ }
+
+ * {
+ color: #444;
+ }
+
+ aside {
+ display: inline-block;
+ border-left: 0.2em solid #e44;
+ background-color: #edc;
+ padding: 0.2em;
+ }
+
+ .nav {
+ background-color: #ddd;
+ }
+
+ .nav>*:hover {
+ background-color: #999;
+ }
+
+ .nav>* {
+ display: inline-block;
+ padding: 1em;
+ }
+
+ .nav a {
+ text-decoration: none;
+ }
+
+ .home_link {
+ font-weight: bold;
+ font-size: 1.5em;
+ padding: 0.5em;
+ }
+ {% endblock %}
+ </style>
+ <title>{% block title %}{{ _('hydrilla') }}{% endblock %}</title>
+ {% endblock %}
+ </head>
+ <body>
+ {% block body %}
+ <div class="nav">
+ {% call link_for('bp.index', _('hydrilla')) %}
+ class="home_link"
+ {% endcall %}
+ </div>
+ {% block content %}
+ {% endblock %}
+ {% endblock %}
+ </body>
+</html>
diff --git a/src/pydrilla/templates/index.html b/src/pydrilla/templates/index.html
new file mode 100644
index 0000000..71de8ba
--- /dev/null
+++ b/src/pydrilla/templates/index.html
@@ -0,0 +1,32 @@
+{# SPDX-License-Identifier: CC-BY-NC-SA-4.0
+
+HTML index page template.
+
+This file is part of Hydrilla
+
+Copyright (C) 2021 Wojtek Kosior
+
+This file is free cultural work: you can redistribute it with or
+without modification under the terms of the Creative Commons
+Attribution Share Alike 4.0 International as published by the
+Creative Commons Corporation.
+
+This file is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+Creative Commons Attribution Share Alike 4.0 International
+License for more details.
+
+
+I, Wojtek Kosior, thereby promise not to sue for violation of this
+file's license. Although I request that you do not make use this code
+in a proprietary program, I am not going to enforce this in court.
+#}
+
+{% extends 'base.html' %}
+{% block body %}
+ {{ super() }}
+ <h2>{{ _('hydrilla_welcome') }}</h2>
+ <h4>content</h4>
+ {{ content }}
+{% endblock %}
diff --git a/src/pydrilla_dev_helper.py b/src/pydrilla_dev_helper.py
new file mode 100644
index 0000000..88dc63e
--- /dev/null
+++ b/src/pydrilla_dev_helper.py
@@ -0,0 +1,293 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# Definitions of helper commands to use with setuptools
+#
+# This file is part of Hydrilla
+#
+# Copyright (C) 2021 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this file's
+# license. Although I request that you do not make use this code in a
+# proprietary program, I am not going to enforce this in court.
+
+from setuptools import Command
+from setuptools.command.build_py import build_py
+import sys
+from pathlib import Path
+import subprocess
+import re
+import os
+import json
+import importlib
+
+def mypath(path_or_string):
+ return Path(path_or_string).resolve()
+
+debrel_regex = re.compile(r'^[^(]*\([^-]*-([^)]*)\)')
+
+def extract_debrel(debian_dir):
+ changelog_path = mypath(debian_dir) / 'changelog'
+ with open(changelog_path) as changelog_file:
+ try:
+ return debrel_regex.match(changelog_file.readline())[1]
+ except TypeError:
+ raise RuntimeException('Cannot extract debrel from %s.' %
+ changelog_path)
+
+class Helper:
+ def __init__(self, project_root, app_package_name, version, locales_dir,
+ locales=['en', 'pl'], default_locale='en', locale_domain=None,
+ packages_root=None, debian_dir=None, config_path=None):
+ self.project_root = mypath(project_root)
+ self.app_package_name = app_package_name
+ self.version = version
+ self.locales_dir = mypath(locales_dir)
+ self.locales = locales
+ self.default_locale = default_locale
+ self.locale_domain = locale_domain or app_package_name
+ self.packages_root = mypath(packages_root or project_root / 'src')
+ self.app_package_dir = self.packages_root / app_package_name
+ self.debian_dir = mypath(debian_dir or project_root / 'debian')
+ self.config_path = config_path and mypath(config_path)
+ self.locale_files_list = None
+
+ def run_command(self, command, verbose, runner=subprocess.run, **kwargs):
+ cwd = kwargs.get('cwd')
+ if cwd:
+ cwd = mypath(cwd)
+ where = f'from {cwd} '
+ else:
+ cwd = Path.cwd().resolve()
+ where = ''
+
+ str_command = [str(command[0])]
+
+ for arg in command[1:]:
+ if isinstance(arg, Path):
+ try:
+ arg = str(arg.relative_to(cwd))
+ except ValueError:
+ arg = str(arg)
+
+ str_command.append(arg)
+
+ if verbose:
+ print(f'{where}executing {" ".join(str_command)}')
+ runner(str_command, **kwargs)
+
+ def create_mo_files(self, dry_run=False, verbose=False):
+ self.locale_files_list = []
+
+ for locale in self.locales:
+ messages_dir = self.locales_dir / locale / 'LC_MESSAGES'
+
+ for po_path in messages_dir.glob('*.po'):
+ mo_path = po_path.with_suffix('.mo')
+
+ if not dry_run:
+ command = ['msgfmt', po_path, '-o', mo_path]
+ self.run_command(command, verbose=verbose, check=True)
+
+ self.locale_files_list.extend([po_path, mo_path])
+
+ def locale_files(self):
+ if self.locale_files_list is None:
+ self.create_mo_files(dry_run=True)
+
+ return self.locale_files_list
+
+ def locale_files_relative(self, to=None):
+ if to is None:
+ to = self.app_package_dir
+
+ return [file.relative_to(to) for file in self.locale_files()]
+
+ def flask_run(self, locale=None):
+ for var, val in (('ENV', 'development'), ('DEBUG', 'True')):
+ os.environ[f'FLASK_{var}'] = os.environ.get(f'FLASK_{var}', val)
+
+ config = {'lang': locale or self.default_locale}
+
+ sys.path.insert(0, str(self.packages_root))
+ package = importlib.import_module(self.app_package_name)
+
+ # make relative paths in json config resolve from project's directory
+ os.chdir(self.project_root)
+
+ kwargs = {'config_path': self.config_path} if self.config_path else {}
+ package.create_app(flask_config=config, **kwargs).run()
+
+ def update_po_files(self, verbose=False):
+ pot_path = self.locales_dir / f'{self.locale_domain}.pot'
+ rglob = self.app_package_dir.rglob
+ command = ['xgettext', '-d', self.locale_domain, '--language=Python',
+ '-o', pot_path, *rglob('*.py'), *rglob('*.html')]
+
+ self.run_command(command, verbose=verbose, check=True,
+ cwd=self.app_package_dir)
+
+ for locale in self.locales:
+ messages_dir = self.locales_dir / locale / 'LC_MESSAGES'
+
+ for po_path in messages_dir.glob('*.po'):
+ if po_path.stem != self.app_package_name:
+ continue;
+
+ if po_path.exists():
+ command = ['msgmerge', '--update', po_path, pot_path]
+ else:
+ command = ['cp', po_path, pot_path]
+
+ self.run_command(command, verbose=verbose, check=True)
+
+ if (verbose):
+ print('removing generated .pot file')
+ pot_path.unlink()
+
+ # we exclude these from the source archive we produce
+ bad_file_regex = re.compile(r'^\..*|build|debian|dist')
+
+ def make_tarballs(self, verbose=False):
+ name=self.app_package_name
+ ver=self.version
+ debrel=extract_debrel(self.debian_dir)
+
+ source_dirname = f'{name}-{ver}'
+ source_tarball_name = f'{name}_{ver}.orig.tar.gz'
+ debian_tarball_name = f'{name}_{ver}-{debrel}.debian.tar.gz'
+
+ source_args = [f'--prefix={source_dirname}/', '-o',
+ self.project_root.parent / source_tarball_name, 'HEAD']
+
+ for filepath in self.project_root.iterdir():
+ if not self.bad_file_regex.search(filepath.parts[-1]):
+ source_args.append(filepath)
+
+ debian_args = ['-o', self.project_root.parent / debian_tarball_name,
+ 'HEAD', self.debian_dir]
+
+ for args in [source_args, debian_args]:
+ command = ['git', 'archive', '--format=tar.gz', *args]
+ self.run_command(command, verbose=verbose, check=True)
+
+ def commands(self):
+ helper = self
+
+ class MsgfmtCommand(Command):
+ '''A custom command to run msgfmt on all .po files below '{}'.'''
+
+ description = 'use msgfmt to generate .mo files from .po files'
+ user_options = []
+
+ def initialize_options(self):
+ pass
+
+ def finalize_options(self):
+ pass
+
+ def run(self):
+ helper.create_mo_files(verbose=self.verbose)
+
+ MsgfmtCommand.__doc__ = MsgfmtCommand.__doc__.format(helper.locales_dir)
+
+ class RunCommand(Command):
+ '''
+ A custom command to run the app using flask.
+
+ This is similar in effect to:
+ PYTHONPATH='{packages_root}' FLASK_APP={app_package_name} \\
+ FLASK_ENV=development flask run
+ '''
+
+ description = 'run the Flask app from source directory'
+
+ user_options = [
+ ('locale=', 'l',
+ "app locale (one of: %s; default: '%s')" %
+ (', '.join([f"'{l}'" for l in helper.locales]),
+ helper.default_locale))
+ ]
+
+ def initialize_options(self):
+ self.locale = helper.default_locale
+
+ def finalize_options(self):
+ if self.locale not in helper.locales:
+ raise ValueError("Locale '%s' not supported" % self.lang)
+
+ def run(self):
+ helper.flask_run(locale=self.locale)
+
+ RunCommand.__doc__ = RunCommand.__doc__.format(
+ packages_root=self.packages_root,
+ app_package_name=self.app_package_name
+ )
+
+ class MsgmergeCommand(Command):
+ '''
+ A custom command to run xgettext and msgmerge to update project's
+ .po files below '{}'.
+ '''
+
+ description = 'use xgettext and msgmerge to update (or generate) .po files for this project'
+ user_options = []
+
+ def initialize_options(self):
+ pass
+
+ def finalize_options(self):
+ pass
+
+ def run(self):
+ helper.update_po_files(verbose=self.verbose)
+
+ MsgmergeCommand.__doc__ = \
+ MsgmergeCommand.__doc__.format(helper.locales_dir)
+
+ class TarballsCommand(Command):
+ '''
+ A custom command to run git archive to create debian tarballs of
+ this project.
+ '''
+
+ description = 'use git archive to create .orig.tar.gz and .debian.tar.gz files for this project'
+ user_options = []
+
+ def initialize_options(self):
+ pass
+
+ def finalize_options(self):
+ pass
+
+ def run(self):
+ helper.make_tarballs(verbose=self.verbose)
+
+ class BuildCommand(build_py):
+ '''
+ The build command but runs the custom msgfmt command before build.
+ '''
+ def run(self, *args, **kwargs):
+ self.run_command('msgfmt')
+ super().run(*args, **kwargs)
+
+ return {
+ 'msgfmt': MsgfmtCommand,
+ 'run': RunCommand,
+ 'msgmerge': MsgmergeCommand,
+ 'tarballs': TarballsCommand,
+ 'build_py': BuildCommand
+ }
diff --git a/src/test/__init__.py b/src/test/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/test/__init__.py
diff --git a/src/test/test_pydrilla.py b/src/test/test_pydrilla.py
new file mode 100644
index 0000000..0ed5fa9
--- /dev/null
+++ b/src/test/test_pydrilla.py
@@ -0,0 +1,90 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+# Repository tests
+#
+# This file is part of Hydrilla
+#
+# Copyright (C) 2021 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+import pytest
+import sys
+import shutil
+from pathlib import Path
+from os import mkdir, unlink, environ
+import json
+
+import pydrilla
+
+test_dir = Path(__file__).resolve().parent
+pydrilla_dir = Path(hydrilla.__file__).resolve().parent
+test_config_path = test_dir / 'config.json'
+
+@pytest.fixture
+def client():
+ app = pydrilla.create_app(test_config_path, flask_config={'TESTING': True})
+
+ with app.test_client() as client:
+ yield client
+
+def test_basic(client):
+ response = client.get('/')
+ assert b'html' in response.data
+
+def test_normalize_version():
+ assert pydrilla.normalize_version([4, 5, 3, 0, 0]) == [4, 5, 3]
+ assert pydrilla.normalize_version([1, 0, 5, 0]) == [1, 0, 5]
+ assert pydrilla.normalize_version([3, 3]) == [3, 3]
+
+### pad_versions() and compare_versions() likely won't be needed
+
+# def test_compare_versions():
+# compare_versions = pydrilla.compare_versions
+# # without revision
+# assert compare_versions([43], [43]) == 0
+# assert compare_versions([54], [34]) == 1
+# assert compare_versions([1], [3]) == -1
+# assert compare_versions([10, 2], [10, 2]) == 0
+# assert compare_versions([11, 6], [11, 2]) == 1
+# assert compare_versions([3, 0], [3, 8]) == -1
+# assert compare_versions([1, 2, 3], [1, 2]) == 1
+# assert compare_versions([1, 2], [1, 2, 3]) == -1
+# assert compare_versions([1], [1, 0, 0]) == 0
+
+# # with revision
+# assert compare_versions([43], [43], rev2=3) == -1
+# assert compare_versions([54], [34]), rev2=41) == 1
+# assert compare_versions([1], [3]), rev1=6) == -1
+# assert compare_versions([10, 2], [10, 2]), rev1=8, rev2=5) == 1
+# assert compare_versions([11, 6], [11, 2]), rev2=19) == 1
+# assert compare_versions([3, 0], [3, 8]), rev2=5) == -1
+# assert compare_versions([1, 2, 3], [1, 2]), rev1=4) == 1
+# assert compare_versions([1, 2], [1, 2, 3]), rev2=7) == -1
+# assert compare_versions([1], [1, 0, 0]), rev2=9, rev1=9) == 0
+
+# from functools import cmp_to_key
+
+# versions = [[43], [54], [3, 0], [34], [3], [1], [4, 5, 3], [1, 0, 5],
+# [3, 3], [10, 2], [11, 2], [11, 6], [3, 8], [1, 2], [1, 2, 3],
+# [1, 0, 0]]
+# versions.sort(cmp_to_key(compare_versions))
+# assert versions == [[1], [1, 0, 0], [1, 0, 5], [1, 2], [1, 2, 3], [3, 0],
+# [3], [3, 3], [3, 8], [4, 5, 3], [10, 2], [11, 2],
+# [11, 6], [34], [43], [54]]