aboutsummaryrefslogtreecommitdiff
path: root/src/hydrilla/server/serve.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/hydrilla/server/serve.py')
-rw-r--r--src/hydrilla/server/serve.py560
1 files changed, 98 insertions, 462 deletions
diff --git a/src/hydrilla/server/serve.py b/src/hydrilla/server/serve.py
index a6a1204..68dde7a 100644
--- a/src/hydrilla/server/serve.py
+++ b/src/hydrilla/server/serve.py
@@ -21,429 +21,35 @@
#
#
# I, Wojtek Kosior, thereby promise not to sue for violation of this
-# file's license. Although I request that you do not make use this code
-# in a proprietary program, I am not going to enforce this in court.
-
-# Enable using with Python 3.7.
-from __future__ import annotations
+# file's license. Although I request that you do not make use of this
+# code in a proprietary program, I am not going to enforce this in
+# court.
import re
import os
-import pathlib
import json
-import logging
+import typing as t
from pathlib import Path
-from hashlib import sha256
-from abc import ABC, abstractmethod
-from typing import Optional, Union, Iterable
import click
import flask
+import werkzeug
-from werkzeug import Response
-
-from .. import util
+from ..exceptions import HaketiloException
+from .. import _version
+from ..translations import smart_gettext as _, translation as make_translation
+from .. import versions
+from .. import item_infos
from . import config
-from . import _version
+from . import malcontent
-here = Path(__file__).resolve().parent
generated_by = {
'name': 'hydrilla.server',
'version': _version.version
}
-class ItemInfo(ABC):
- """Shortened data of a resource/mapping."""
- def __init__(self, item_obj: dict):
- """Initialize ItemInfo using item definition read from JSON."""
- self.version = util.normalize_version(item_obj['version'])
- self.identifier = item_obj['identifier']
- self.uuid = item_obj.get('uuid')
- self.long_name = item_obj['long_name']
-
- def path(self) -> str:
- """
- Get a relative path to this item's JSON definition with respect to
- directory containing items of this type.
- """
- return f'{self.identifier}/{util.version_string(self.version)}'
-
-class ResourceInfo(ItemInfo):
- """Shortened data of a resource."""
- def __init__(self, resource_obj: dict):
- """Initialize ResourceInfo using resource definition read from JSON."""
- super().__init__(resource_obj)
-
- dependencies = resource_obj.get('dependencies', [])
- self.dependencies = [res_ref['identifier'] for res_ref in dependencies]
-
-class MappingInfo(ItemInfo):
- """Shortened data of a mapping."""
- def __init__(self, mapping_obj: dict):
- """Initialize MappingInfo using mapping definition read from JSON."""
- super().__init__(mapping_obj)
-
- self.payloads = {}
- for pattern, res_ref in mapping_obj.get('payloads', {}).items():
- self.payloads[pattern] = res_ref['identifier']
-
- def as_query_result(self) -> str:
- """
- Produce a json.dump()-able object describing this mapping as one of a
- collection of query results.
- """
- return {
- 'version': self.version,
- 'identifier': self.identifier,
- 'long_name': self.long_name
- }
-
-class VersionedItemInfo:
- """Stores data of multiple versions of given resource/mapping."""
- def __init__(self):
- self.uuid = None
- self.identifier = None
- self.by_version = {}
- self.known_versions = []
-
- def register(self, item_info: ItemInfo) -> None:
- """
- Make item info queryable by version. Perform sanity checks for uuid.
- """
- if self.identifier is None:
- self.identifier = item_info.identifier
-
- if self.uuid is None:
- self.uuid = item_info.uuid
-
- if self.uuid is not None and self.uuid != item_info.uuid:
- raise ValueError(f_('uuid_mismatch_{identifier}')
- .format(identifier=self.identifier))
-
- ver = item_info.version
- ver_str = util.version_string(ver)
-
- if ver_str in self.by_version:
- raise ValueError(f_('version_clash_{identifier}_{version}')
- .format(identifier=self.identifier,
- version=ver_str))
-
- self.by_version[ver_str] = item_info
- self.known_versions.append(ver)
-
- def get_by_ver(self, ver: Optional[list[int]]=None) -> Optional[ItemInfo]:
- """
- Find and return info of the newest version of item.
-
- If ver is specified, instead find and return info of that version of the
- item (or None if absent).
- """
- ver = util.version_string(ver or self.known_versions[-1])
-
- return self.by_version.get(ver)
-
- def get_all(self) -> list[ItemInfo]:
- """
- Return a list of item info for all its versions, from oldest ot newest.
- """
- return [self.by_version[util.version_string(ver)]
- for ver in self.known_versions]
-
-class PatternTreeNode:
- """
- "Pattern Tree" is how we refer to the data structure used for querying
- Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal
- is to make it possible for given URL to quickly retrieve all known patterns
- that match it.
- """
- def __init__(self):
- self.wildcard_matches = [None, None, None]
- self.literal_match = None
- self.children = {}
-
- def search(self, segments):
- """
- Yields all matches of this segments sequence against the tree that
- starts at this node. Results are produces in order from greatest to
- lowest pattern specificity.
- """
- nodes = [self]
-
- for segment in segments:
- next_node = nodes[-1].children.get(segment)
- if next_node is None:
- break
-
- nodes.append(next_node)
-
- nsegments = len(segments)
- cond_literal = lambda: len(nodes) == nsegments
- cond_wildcard = [
- lambda: len(nodes) + 1 == nsegments and segments[-1] != '*',
- lambda: len(nodes) + 1 < nsegments,
- lambda: len(nodes) + 1 != nsegments or segments[-1] != '***'
- ]
-
- while nodes:
- node = nodes.pop()
-
- for item, condition in [(node.literal_match, cond_literal),
- *zip(node.wildcard_matches, cond_wildcard)]:
- if item is not None and condition():
- yield item
-
- def add(self, segments, item_instantiator):
- """
- Make item queryable through (this branch of) the Pattern Tree. If there
- was not yet any item associated with the tree path designated by
- segments, create a new one using item_instantiator() function. Return
- all items matching this path (both the ones that existed and the ones
- just created).
- """
- node = self
- segment = None
-
- for segment in segments:
- wildcards = node.wildcard_matches
-
- child = node.children.get(segment) or PatternTreeNode()
- node.children[segment] = child
- node = child
-
- if node.literal_match is None:
- node.literal_match = item_instantiator()
-
- if segment not in ('*', '**', '***'):
- return [node.literal_match]
-
- if wildcards[len(segment) - 1] is None:
- wildcards[len(segment) - 1] = item_instantiator()
-
- return [node.literal_match, wildcards[len(segment) - 1]]
-
-proto_regex = re.compile(r'^(?P<proto>\w+)://(?P<rest>.*)$')
-user_re = r'[^/?#@]+@' # r'(?P<user>[^/?#@]+)@' # discarded for now
-query_re = r'\??[^#]*' # r'\??(?P<query>[^#]*)' # discarded for now
-domain_re = r'(?P<domain>[^/?#]+)'
-path_re = r'(?P<path>[^?#]*)'
-http_regex = re.compile(f'{domain_re}{path_re}{query_re}.*')
-ftp_regex = re.compile(f'(?:{user_re})?{domain_re}{path_re}.*')
-
-class UrlError(ValueError):
- """Used to report a URL or URL pattern that is invalid or unsupported."""
- pass
-
-class DeconstructedUrl:
- """Represents a deconstructed URL or URL pattern"""
- def __init__(self, url):
- self.url = url
-
- match = proto_regex.match(url)
- if not match:
- raise UrlError(f_('invalid_URL_{}').format(url))
-
- self.proto = match.group('proto')
- if self.proto not in ('http', 'https', 'ftp'):
- raise UrlError(f_('disallowed_protocol_{}').format(proto))
-
- if self.proto == 'ftp':
- match = ftp_regex.match(match.group('rest'))
- elif self.proto in ('http', 'https'):
- match = http_regex.match(match.group('rest'))
-
- if not match:
- raise UrlError(f_('invalid_URL_{}').format(url))
-
- self.domain = match.group('domain').split('.')
- self.domain.reverse()
- self.path = [*filter(None, match.group('path').split('/'))]
-
-class PatternMapping:
- """
- A mapping info, together with one of its patterns, as stored in Pattern
- Tree.
- """
- def __init__(self, pattern: str, mapping_info: MappingInfo):
- self.pattern = pattern
- self.mapping_info = mapping_info
-
- def register(self, pattern_tree: dict):
- """
- Make self queryable through the Pattern Tree passed in the argument.
- """
- deco = DeconstructedUrl(self.pattern)
-
- domain_tree = pattern_tree.get(deco.proto) or PatternTreeNode()
- pattern_tree[deco.proto] = domain_tree
-
- for path_tree in domain_tree.add(deco.domain, PatternTreeNode):
- for match_list in path_tree.add(deco.path, list):
- match_list.append(self)
-
-class Malcontent:
- """
- Instance of this class represents a directory with files that can be loaded
- and served by Hydrilla.
- """
- def __init__(self, malcontent_dir_path: Path):
- """
- When an instance of Malcontent is constructed, it searches
- malcontent_dir_path for serveable site-modifying packages and loads
- them into its data structures.
- """
- self.infos = {'resource': {}, 'mapping': {}}
- self.pattern_tree = {}
-
- self.malcontent_dir_path = malcontent_dir_path
-
- if not self.malcontent_dir_path.is_dir():
- raise ValueError(f_('malcontent_dir_path_not_dir_{}')
- .format(malcontent_dir_path))
-
- for item_type in ('mapping', 'resource'):
- type_path = self.malcontent_dir_path / item_type
- if not type_path.is_dir():
- continue
-
- for subpath in type_path.iterdir():
- if not subpath.is_dir():
- continue
-
- for ver_file in subpath.iterdir():
- try:
- self._load_item(item_type, ver_file)
- except Exception as e:
- if flask.current_app._hydrilla_werror:
- raise e from None
-
- msg = f_('couldnt_load_item_from_{}').format(ver_file)
- logging.error(msg, exc_info=True)
-
- self._report_missing()
- self._finalize()
-
- def _load_item(self, item_type: str, ver_file: Path) -> None:
- """
- Reads, validates and autocompletes serveable mapping/resource
- definition, then registers information from it in data structures.
- """
- version = util.parse_version(ver_file.name)
- identifier = ver_file.parent.name
-
- with open(ver_file, 'rt') as file_handle:
- item_json = json.load(file_handle)
-
- util.validator_for(f'api_{item_type}_description-1.0.1.schema.json')\
- .validate(item_json)
-
- if item_type == 'resource':
- item_info = ResourceInfo(item_json)
- else:
- item_info = MappingInfo(item_json)
-
- if item_info.identifier != identifier:
- msg = f_('item_{item}_in_file_{file}')\
- .format({'item': item_info.identifier, 'file': ver_file})
- raise ValueError(msg)
-
- if item_info.version != version:
- ver_str = util.version_string(item_info.version)
- msg = f_('item_version_{ver}_in_file_{file}')\
- .format({'ver': ver_str, 'file': ver_file})
- raise ValueError(msg)
-
- versioned_info = self.infos[item_type].get(identifier)
- if versioned_info is None:
- versioned_info = VersionedItemInfo()
- self.infos[item_type][identifier] = versioned_info
-
- versioned_info.register(item_info)
-
- def _all_of_type(self, item_type: str) -> Iterable[ItemInfo]:
- """Iterator over all registered versions of all mappings/resources."""
- for versioned_info in self.infos[item_type].values():
- for item_info in versioned_info.by_version.values():
- yield item_info
-
- def _report_missing(self) -> None:
- """
- Use logger to print information about items that are referenced but
- were not loaded.
- """
- def report_missing_dependency(info: ResourceInfo, dep: str) -> None:
- msg = f_('no_dep_{resource}_{ver}_{dep}')\
- .format(dep=dep, resource=info.identifier,
- ver=util.version_string(info.version))
- logging.error(msg)
-
- for resource_info in self._all_of_type('resource'):
- for dep in resource_info.dependencies:
- if dep not in self.infos['resource']:
- report_missing_dependency(resource_info, dep)
-
- def report_missing_payload(info: MappingInfo, payload: str) -> None:
- msg = f_('no_payload_{mapping}_{ver}_{payload}')\
- .format(mapping=info.identifier, payload=payload,
- ver=util.version_string(info.version))
- logging.error(msg)
-
- for mapping_info in self._all_of_type('mapping'):
- for payload in mapping_info.payloads.values():
- if payload not in self.infos['resource']:
- report_missing_payload(mapping_info, payload)
-
- def _finalize(self):
- """
- Initialize structures needed to serve queries. Called once after all
- data gets loaded.
- """
- for infos_dict in self.infos.values():
- for versioned_info in infos_dict.values():
- versioned_info.known_versions.sort()
-
- for info in self._all_of_type('mapping'):
- for pattern in info.payloads:
- try:
- PatternMapping(pattern, info).register(self.pattern_tree)
- except Exception as e:
- if flask.current_app._hydrilla_werror:
- raise e from None
- msg = f_('couldnt_register_{mapping}_{ver}_{pattern}')\
- .format(mapping=info.identifier, pattern=pattern,
- ver=util.version_string(info.version))
- logging.error(msg)
-
- def query(self, url: str) -> list[MappingInfo]:
- """
- Return a list of registered mappings that match url.
-
- If multiple versions of a mapping are applicable, only the most recent
- is included in the result.
- """
- deco = DeconstructedUrl(url)
-
- collected = {}
-
- domain_tree = self.pattern_tree.get(deco.proto) or PatternTreeNode()
-
- def process_mapping(pattern_mapping: PatternMapping) -> None:
- if url[-1] != '/' and pattern_mapping.pattern[-1] == '/':
- return
-
- info = pattern_mapping.mapping_info
-
- if info.identifier not in collected or \
- info.version > collected[info.identifier].version:
- collected[info.identifier] = info
-
- for path_tree in domain_tree.search(deco.domain):
- for matches_list in path_tree.search(deco.path):
- for pattern_mapping in matches_list:
- process_mapping(pattern_mapping)
-
- return list(collected.values())
bp = flask.Blueprint('bp', __package__)
@@ -467,46 +73,36 @@ class HydrillaApp(flask.Flask):
]
}
- self._hydrilla_translation = \
- util.translation(here / 'locales', hydrilla_config['language'])
- self._hydrilla_project_url = hydrilla_config['hydrilla_project_url']
self._hydrilla_port = hydrilla_config['port']
self._hydrilla_werror = hydrilla_config.get('werror', False)
+ verify_files = hydrilla_config.get('verify_files', True)
if 'hydrilla_parent' in hydrilla_config:
- raise ValueError("Option 'hydrilla_parent' is not implemented.")
+ raise HaketiloException(_('err.server.opt_hydrilla_parent_not_implemented'))
- malcontent_dir = Path(hydrilla_config['malcontent_dir']).resolve()
- with self.app_context():
- self._hydrilla_malcontent = Malcontent(malcontent_dir)
+ malcontent_dir_path = Path(hydrilla_config['malcontent_dir']).resolve()
+ self._hydrilla_malcontent = malcontent.Malcontent(
+ malcontent_dir_path = malcontent_dir_path,
+ werror = self._hydrilla_werror,
+ verify_files = verify_files
+ )
- self.register_blueprint(bp)
+ self.jinja_env.install_gettext_translations(make_translation())
- def create_jinja_environment(self, *args, **kwargs) \
- -> flask.templating.Environment:
- """
- Flask's create_jinja_environment(), but tweaked to always include the
- 'hydrilla_project_url' global variable and to install proper
- translations.
- """
- env = super().create_jinja_environment(*args, **kwargs)
- env.install_gettext_translations(self._hydrilla_translation)
- env.globals['hydrilla_project_url'] = self._hydrilla_project_url
+ self.jinja_env.globals['hydrilla_project_url'] = \
+ hydrilla_config['hydrilla_project_url']
- return env
+ self.register_blueprint(bp)
def run(self, *args, **kwargs):
"""
- Flask's run(), but tweaked to use the port from hydrilla configuration
- by default.
+ Flask's run() but tweaked to use the port from hydrilla configuration by
+ default.
"""
return super().run(*args, port=self._hydrilla_port, **kwargs)
-def f_(text_key):
- return flask.current_app._hydrilla_translation.gettext(text_key)
-
-def malcontent():
- return flask.current_app._hydrilla_malcontent
+def get_malcontent() -> malcontent.Malcontent:
+ return t.cast(HydrillaApp, flask.current_app)._hydrilla_malcontent
@bp.route('/')
def index():
@@ -514,7 +110,8 @@ def index():
identifier_json_re = re.compile(r'^([-0-9a-z.]+)\.json$')
-def get_resource_or_mapping(item_type: str, identifier: str) -> Response:
+def get_resource_or_mapping(item_type: str, identifier: str) \
+ -> werkzeug.Response:
"""
Strip '.json' from 'identifier', look the item up and send its JSON
description.
@@ -525,36 +122,84 @@ def get_resource_or_mapping(item_type: str, identifier: str) -> Response:
identifier = match.group(1)
- versioned_info = malcontent().infos[item_type].get(identifier)
+ infos: t.Mapping[str, item_infos.VersionedItemInfo]
+ if item_type == 'resource':
+ infos = get_malcontent().resource_infos
+ else:
+ infos = get_malcontent().mapping_infos
- info = versioned_info and versioned_info.get_by_ver()
- if info is None:
+ versioned_info = infos.get(identifier)
+
+ if versioned_info is None:
flask.abort(404)
+ info = versioned_info.newest_info
+
# no need for send_from_directory(); path is safe, constructed by us
- file_path = malcontent().malcontent_dir_path / item_type / info.path()
- return flask.send_file(open(file_path, 'rb'), mimetype='application/json')
+ info_path = f'{info.identifier}/{versions.version_string(info.version)}'
+ file_path = get_malcontent().malcontent_dir_path / item_type / info_path
+
+ if flask.__version__[0:2] in ('0.', '1.'):
+ caching_args = {'add_etags': False, 'cache_timeout': 0}
+ else:
+ caching_args = {'etag': False}
+
+ return flask.send_file(
+ str(file_path),
+ mimetype = 'application/json',
+ conditional = False,
+ **caching_args # type: ignore
+ )
@bp.route('/mapping/<string:identifier_dot_json>')
-def get_newest_mapping(identifier_dot_json: str) -> Response:
+def get_newest_mapping(identifier_dot_json: str) -> werkzeug.Response:
return get_resource_or_mapping('mapping', identifier_dot_json)
@bp.route('/resource/<string:identifier_dot_json>')
-def get_newest_resource(identifier_dot_json: str) -> Response:
+def get_newest_resource(identifier_dot_json: str) -> werkzeug.Response:
return get_resource_or_mapping('resource', identifier_dot_json)
+def make_ref(info: item_infos.AnyInfo) -> t.Dict[str, t.Any]:
+ ref: t.Dict[str, t.Any] = {
+ 'version': info.version,
+ 'identifier': info.identifier,
+ 'long_name': info.long_name
+ }
+
+ if isinstance(info, item_infos.ResourceInfo):
+ ref['revision'] = info.revision
+
+ return ref
+
@bp.route('/query')
def query():
url = flask.request.args['url']
- mapping_refs = [i.as_query_result() for i in malcontent().query(url)]
+ mapping_refs = [make_ref(info) for info in get_malcontent().query(url)]
+
result = {
'$schema': 'https://hydrilla.koszko.org/schemas/api_query_result-1.schema.json',
'mappings': mapping_refs,
'generated_by': generated_by
}
- return Response(json.dumps(result), mimetype='application/json')
+ return werkzeug.Response(json.dumps(result), mimetype='application/json')
+
+@bp.route('/list_all')
+def list_all_packages():
+ malcontent = get_malcontent()
+
+ resource_refs = [make_ref(info) for info in malcontent.get_all_resources()]
+ mapping_refs = [make_ref(info) for info in malcontent.get_all_mappings()]
+
+ result = {
+ '$schema': 'https://hydrilla.koszko.org/schemas/api_package_list-2.schema.json',
+ 'resources': resource_refs,
+ 'mappings': mapping_refs,
+ 'generated_by': generated_by
+ }
+
+ return werkzeug.Response(json.dumps(result), mimetype='application/json')
@bp.route('/--help')
def mm_help():
@@ -569,9 +214,6 @@ default_config_path = Path('/etc/hydrilla/config.json')
default_malcontent_dir = '/var/lib/hydrilla/malcontent'
default_project_url = 'https://hydrillabugs.koszko.org/projects/hydrilla/wiki'
-console_gettext = util.translation(here / 'locales').gettext
-_ = console_gettext
-
@click.command(help=_('serve_hydrilla_packages_explain_wsgi_considerations'))
@click.option('-m', '--malcontent-dir',
type=click.Path(exists=True, file_okay=False),
@@ -583,24 +225,25 @@ _ = console_gettext
@click.option('-c', '--config', 'config_path',
type=click.Path(exists=True, dir_okay=False, resolve_path=True),
help=_('path_to_config_file_explain_default'))
-@click.option('-l', '--language', type=click.STRING,
- help=_('language_to_use_overrides_config'))
@click.version_option(version=_version.version, prog_name='Hydrilla',
message=_('%(prog)s_%(version)s_license'),
help=_('version_printing'))
-def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str],
- port: Optional[int], config_path: Optional[str],
- language: Optional[str]) -> None:
+def start(
+ malcontent_dir: t.Optional[str],
+ hydrilla_project_url: t.Optional[str],
+ port: t.Optional[int],
+ config_path: t.Optional[str]
+) -> None:
"""
Run a development Hydrilla server.
This command is meant to be the entry point of hydrilla command exported by
this package.
"""
- config_load_opts = {} if config_path is None \
- else {'config_path': [Path(config_path)]}
-
- hydrilla_config = config.load(**config_load_opts)
+ if config_path is None:
+ hydrilla_config = config.load()
+ else:
+ hydrilla_config = config.load(config_paths=[Path(config_path)])
if malcontent_dir is not None:
hydrilla_config['malcontent_dir'] = str(Path(malcontent_dir).resolve())
@@ -611,14 +254,7 @@ def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str],
if port is not None:
hydrilla_config['port'] = port
- if language is not None:
- hydrilla_config['language'] = language
-
- lang = hydrilla_config.get('language')
- _ = console_gettext if lang is None else \
- util.translation(here / 'locales', lang).gettext
-
- for opt in ('malcontent_dir', 'hydrilla_project_url', 'port', 'language'):
+ for opt in ('malcontent_dir', 'hydrilla_project_url', 'port'):
if opt not in hydrilla_config:
raise ValueError(_('config_option_{}_not_supplied').format(opt))
@@ -632,7 +268,7 @@ def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str],
@click.version_option(version=_version.version, prog_name='Hydrilla',
message=_('%(prog)s_%(version)s_license'),
help=_('version_printing'))
-def start_wsgi() -> None:
+def start_wsgi() -> flask.Flask:
"""
Create application object for use in WSGI deployment.