aboutsummaryrefslogtreecommitdiff
path: root/src/hydrilla/server/serve.py
diff options
context:
space:
mode:
authorWojtek Kosior <koszko@koszko.org>2022-06-13 11:06:49 +0200
committerWojtek Kosior <koszko@koszko.org>2022-07-16 16:31:44 +0200
commit52d12a4fa124daa1595529e3e7008276a7986d95 (patch)
tree9b56fe2d28ff0242f8511aca570be455112ad3df /src/hydrilla/server/serve.py
parent9dcbfdfe8620cc417438d1727aa1e0c89846e9bf (diff)
downloadhaketilo-hydrilla-52d12a4fa124daa1595529e3e7008276a7986d95.tar.gz
haketilo-hydrilla-52d12a4fa124daa1595529e3e7008276a7986d95.zip
unfinished partial work
Diffstat (limited to 'src/hydrilla/server/serve.py')
-rw-r--r--src/hydrilla/server/serve.py406
1 files changed, 88 insertions, 318 deletions
diff --git a/src/hydrilla/server/serve.py b/src/hydrilla/server/serve.py
index 779f3d2..8f0d557 100644
--- a/src/hydrilla/server/serve.py
+++ b/src/hydrilla/server/serve.py
@@ -36,16 +36,18 @@ import logging
from pathlib import Path
from hashlib import sha256
from abc import ABC, abstractmethod
-from typing import Optional, Union, Iterable
+from typing import Optional, Union, Iterable, TypeVar, Generic
import click
import flask
from werkzeug import Response
-from .. import util
+from .. import _version, versions, json_instances
+from ..item_infos import ResourceInfo, MappingInfo, VersionedItemInfo
+from ..translations import smart_gettext as _, translation as make_translation
+#from ..url_patterns import PatternTree
from . import config
-from . import _version
here = Path(__file__).resolve().parent
@@ -54,243 +56,20 @@ generated_by = {
'version': _version.version
}
-class ItemInfo(ABC):
- """Shortened data of a resource/mapping."""
- def __init__(self, item_obj: dict, major_schema_version: int):
- """Initialize ItemInfo using item definition read from JSON."""
- self.version = util.normalize_version(item_obj['version'])
- self.identifier = item_obj['identifier']
- self.uuid = item_obj.get('uuid')
- self.long_name = item_obj['long_name']
-
- self.required_mappings = []
- if major_schema_version >= 2:
- self.required_mappings = [map_ref['identifier'] for map_ref in
- item_obj.get('required_mappings', [])]
-
- def path(self) -> str:
- """
- Get a relative path to this item's JSON definition with respect to
- directory containing items of this type.
- """
- return f'{self.identifier}/{util.version_string(self.version)}'
-
-class ResourceInfo(ItemInfo):
- """Shortened data of a resource."""
- def __init__(self, resource_obj: dict, major_schema_version: int):
- """Initialize ResourceInfo using resource definition read from JSON."""
- super().__init__(resource_obj, major_schema_version)
-
- dependencies = resource_obj.get('dependencies', [])
- self.dependencies = [res_ref['identifier'] for res_ref in dependencies]
-
-class MappingInfo(ItemInfo):
- """Shortened data of a mapping."""
- def __init__(self, mapping_obj: dict, major_schema_version: int):
- """Initialize MappingInfo using mapping definition read from JSON."""
- super().__init__(mapping_obj, major_schema_version)
-
- self.payloads = {}
- for pattern, res_ref in mapping_obj.get('payloads', {}).items():
- self.payloads[pattern] = res_ref['identifier']
-
- def as_query_result(self) -> str:
- """
- Produce a json.dump()-able object describing this mapping as one of a
- collection of query results.
- """
- return {
- 'version': self.version,
- 'identifier': self.identifier,
- 'long_name': self.long_name
- }
-
-class VersionedItemInfo:
- """Stores data of multiple versions of given resource/mapping."""
- def __init__(self):
- self.uuid = None
- self.identifier = None
- self.by_version = {}
- self.known_versions = []
-
- def register(self, item_info: ItemInfo) -> None:
- """
- Make item info queryable by version. Perform sanity checks for uuid.
- """
- if self.identifier is None:
- self.identifier = item_info.identifier
-
- if self.uuid is None:
- self.uuid = item_info.uuid
-
- if self.uuid is not None and self.uuid != item_info.uuid:
- raise ValueError(f_('uuid_mismatch_{identifier}')
- .format(identifier=self.identifier))
-
- ver = item_info.version
- ver_str = util.version_string(ver)
-
- if ver_str in self.by_version:
- raise ValueError(f_('version_clash_{identifier}_{version}')
- .format(identifier=self.identifier,
- version=ver_str))
-
- self.by_version[ver_str] = item_info
- self.known_versions.append(ver)
-
- def get_by_ver(self, ver: Optional[list[int]]=None) -> Optional[ItemInfo]:
- """
- Find and return info of the newest version of item.
-
- If ver is specified, instead find and return info of that version of the
- item (or None if absent).
- """
- ver = util.version_string(ver or self.known_versions[-1])
-
- return self.by_version.get(ver)
-
- def get_all(self) -> list[ItemInfo]:
- """
- Return a list of item info for all its versions, from oldest ot newest.
- """
- return [self.by_version[util.version_string(ver)]
- for ver in self.known_versions]
-
-class PatternTreeNode:
- """
- "Pattern Tree" is how we refer to the data structure used for querying
- Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal
- is to make it possible for given URL to quickly retrieve all known patterns
- that match it.
- """
- def __init__(self):
- self.wildcard_matches = [None, None, None]
- self.literal_match = None
- self.children = {}
-
- def search(self, segments):
- """
- Yields all matches of this segments sequence against the tree that
- starts at this node. Results are produces in order from greatest to
- lowest pattern specificity.
- """
- nodes = [self]
-
- for segment in segments:
- next_node = nodes[-1].children.get(segment)
- if next_node is None:
- break
-
- nodes.append(next_node)
-
- nsegments = len(segments)
- cond_literal = lambda: len(nodes) == nsegments
- cond_wildcard = [
- lambda: len(nodes) + 1 == nsegments and segments[-1] != '*',
- lambda: len(nodes) + 1 < nsegments,
- lambda: len(nodes) + 1 != nsegments or segments[-1] != '***'
- ]
-
- while nodes:
- node = nodes.pop()
-
- for item, condition in [(node.literal_match, cond_literal),
- *zip(node.wildcard_matches, cond_wildcard)]:
- if item is not None and condition():
- yield item
-
- def add(self, segments, item_instantiator):
- """
- Make item queryable through (this branch of) the Pattern Tree. If there
- was not yet any item associated with the tree path designated by
- segments, create a new one using item_instantiator() function. Return
- all items matching this path (both the ones that existed and the ones
- just created).
- """
- node = self
- segment = None
-
- for segment in segments:
- wildcards = node.wildcard_matches
-
- child = node.children.get(segment) or PatternTreeNode()
- node.children[segment] = child
- node = child
-
- if node.literal_match is None:
- node.literal_match = item_instantiator()
-
- if segment not in ('*', '**', '***'):
- return [node.literal_match]
-
- if wildcards[len(segment) - 1] is None:
- wildcards[len(segment) - 1] = item_instantiator()
-
- return [node.literal_match, wildcards[len(segment) - 1]]
-
-proto_regex = re.compile(r'^(?P<proto>\w+)://(?P<rest>.*)$')
-user_re = r'[^/?#@]+@' # r'(?P<user>[^/?#@]+)@' # discarded for now
-query_re = r'\??[^#]*' # r'\??(?P<query>[^#]*)' # discarded for now
-domain_re = r'(?P<domain>[^/?#]+)'
-path_re = r'(?P<path>[^?#]*)'
-http_regex = re.compile(f'{domain_re}{path_re}{query_re}.*')
-ftp_regex = re.compile(f'(?:{user_re})?{domain_re}{path_re}.*')
-
-class UrlError(ValueError):
- """Used to report a URL or URL pattern that is invalid or unsupported."""
- pass
-
-class DeconstructedUrl:
- """Represents a deconstructed URL or URL pattern"""
- def __init__(self, url):
- self.url = url
-
- match = proto_regex.match(url)
- if not match:
- raise UrlError(f_('invalid_URL_{}').format(url))
-
- self.proto = match.group('proto')
- if self.proto not in ('http', 'https', 'ftp'):
- raise UrlError(f_('disallowed_protocol_{}').format(proto))
-
- if self.proto == 'ftp':
- match = ftp_regex.match(match.group('rest'))
- elif self.proto in ('http', 'https'):
- match = http_regex.match(match.group('rest'))
-
- if not match:
- raise UrlError(f_('invalid_URL_{}').format(url))
-
- self.domain = match.group('domain').split('.')
- self.domain.reverse()
- self.path = [*filter(None, match.group('path').split('/'))]
-
-class PatternMapping:
- """
- A mapping info, together with one of its patterns, as stored in Pattern
- Tree.
- """
- def __init__(self, pattern: str, mapping_info: MappingInfo):
- self.pattern = pattern
- self.mapping_info = mapping_info
-
- def register(self, pattern_tree: dict):
- """
- Make self queryable through the Pattern Tree passed in the argument.
- """
- deco = DeconstructedUrl(self.pattern)
-
- domain_tree = pattern_tree.get(deco.proto) or PatternTreeNode()
- pattern_tree[deco.proto] = domain_tree
-
- for path_tree in domain_tree.add(deco.domain, PatternTreeNode):
- for match_list in path_tree.add(deco.path, list):
- match_list.append(self)
+ # def as_query_result(self) -> dict[str, Union[str, list[int]]]:
+ # """
+ # Produce a json.dump()-able object describing this mapping as one of a
+ # collection of query results.
+ # """
+ # return {
+ # 'version': self.version,
+ # 'identifier': self.identifier,
+ # 'long_name': self.long_name
+ # }
class Malcontent:
"""
- Instance of this class represents a directory with files that can be loaded
- and served by Hydrilla.
+ Represent a directory with files that can be loaded and served by Hydrilla.
"""
def __init__(self, malcontent_dir_path: Path):
"""
@@ -298,13 +77,15 @@ class Malcontent:
malcontent_dir_path for serveable site-modifying packages and loads
them into its data structures.
"""
- self.infos = {'resource': {}, 'mapping': {}}
- self.pattern_tree = {}
+ self.resource_infos: dict[str, VersionedItemInfo[ResourceInfo]] = {}
+ self.mapping_infos: dict[str, VersionedItemInfo[MappingInfo]] = {}
+
+ self.pattern_tree: PatternTree[MappingInfo] = PatternTree()
self.malcontent_dir_path = malcontent_dir_path
if not self.malcontent_dir_path.is_dir():
- raise ValueError(f_('malcontent_dir_path_not_dir_{}')
+ raise ValueError(_('malcontent_dir_path_not_dir_{}')
.format(malcontent_dir_path))
for item_type in ('mapping', 'resource'):
@@ -323,18 +104,27 @@ class Malcontent:
if flask.current_app._hydrilla_werror:
raise e from None
- msg = f_('couldnt_load_item_from_{}').format(ver_file)
+ msg = _('couldnt_load_item_from_{}').format(ver_file)
logging.error(msg, exc_info=True)
self._report_missing()
self._finalize()
+ @staticmethod
+ def _register_info(infos: dict[str, VersionedItemInfo[VersionedType]],
+ identifier: str, item_info: VersionedType) -> None:
+ """
+ ...........
+ """
+ infos.setdefault(identifier, VersionedItemInfo())\
+ .register(item_info)
+
def _load_item(self, item_type: str, ver_file: Path) -> None:
"""
Reads, validates and autocompletes serveable mapping/resource
definition, then registers information from it in data structures.
"""
- version = util.parse_version(ver_file.name)
+ version = versions.parse_version(ver_file.name)
identifier = ver_file.parent.name
item_json, major = util.load_instance_from_file(ver_file)
@@ -342,32 +132,35 @@ class Malcontent:
util.validator_for(f'api_{item_type}_description-{major}.schema.json')\
.validate(item_json)
- if item_type == 'resource':
- item_info = ResourceInfo(item_json, major)
- else:
- item_info = MappingInfo(item_json, major)
+ # Assertion needed for mypy. If validation passed, this should not fail.
+ assert major is not None
+
+ item_info: ItemInfo = ResourceInfo(item_json, major) \
+ if item_type == 'resource' else MappingInfo(item_json, major)
if item_info.identifier != identifier:
- msg = f_('item_{item}_in_file_{file}')\
+ msg = _('item_{item}_in_file_{file}')\
.format({'item': item_info.identifier, 'file': ver_file})
raise ValueError(msg)
if item_info.version != version:
ver_str = util.version_string(item_info.version)
- msg = f_('item_version_{ver}_in_file_{file}')\
+ msg = _('item_version_{ver}_in_file_{file}')\
.format({'ver': ver_str, 'file': ver_file})
raise ValueError(msg)
- versioned_info = self.infos[item_type].get(identifier)
- if versioned_info is None:
- versioned_info = VersionedItemInfo()
- self.infos[item_type][identifier] = versioned_info
+ if isinstance(item_info, ResourceInfo):
+ self._register_info(self.resource_infos, identifier, item_info)
+ elif isinstance(item_info, MappingInfo):
+ self._register_info(self.mapping_infos, identifier, item_info)
- versioned_info.register(item_info)
-
- def _all_of_type(self, item_type: str) -> Iterable[ItemInfo]:
- """Iterator over all registered versions of all mappings/resources."""
- for versioned_info in self.infos[item_type].values():
+ @staticmethod
+ def _all_infos(infos: dict[str, VersionedItemInfo[VersionedType]]) \
+ -> Iterable[VersionedType]:
+ """
+ ...........
+ """
+ for versioned_info in infos.values():
for item_info in versioned_info.by_version.values():
yield item_info
@@ -377,38 +170,38 @@ class Malcontent:
were not loaded.
"""
def report_missing_dependency(info: ResourceInfo, dep: str) -> None:
- msg = f_('no_dep_{resource}_{ver}_{dep}')\
+ msg = _('no_dep_{resource}_{ver}_{dep}')\
.format(dep=dep, resource=info.identifier,
ver=util.version_string(info.version))
logging.error(msg)
- for resource_info in self._all_of_type('resource'):
+ for resource_info in self._all_infos(self.resource_infos):
for dep in resource_info.dependencies:
- if dep not in self.infos['resource']:
+ if dep not in self.resource_infos:
report_missing_dependency(resource_info, dep)
def report_missing_payload(info: MappingInfo, payload: str) -> None:
- msg = f_('no_payload_{mapping}_{ver}_{payload}')\
+ msg = _('no_payload_{mapping}_{ver}_{payload}')\
.format(mapping=info.identifier, payload=payload,
ver=util.version_string(info.version))
logging.error(msg)
- for mapping_info in self._all_of_type('mapping'):
+ for mapping_info in self._all_infos(self.mapping_infos):
for payload in mapping_info.payloads.values():
- if payload not in self.infos['resource']:
+ if payload not in self.resource_infos:
report_missing_payload(mapping_info, payload)
- def report_missing_mapping(info: Union[MappingInfo, ResourceInfo],
+ def report_missing_mapping(info: ItemInfo,
required_mapping: str) -> None:
msg = _('no_mapping_{required_by}_{ver}_{required}')\
.format(required_by=info.identifier, required=required_mapping,
ver=util.version_string(info.version))
logging.error(msg)
- for item_info in (*self._all_of_type('mapping'),
- *self._all_of_type('resource')):
+ for item_info in (*self._all_infos(self.mapping_infos),
+ *self._all_infos(self.resource_infos)):
for required in item_info.required_mappings:
- if required not in self.infos['mapping']:
+ if required not in self.mapping_infos:
report_missing_mapping(item_info, required)
def _finalize(self):
@@ -416,18 +209,19 @@ class Malcontent:
Initialize structures needed to serve queries. Called once after all
data gets loaded.
"""
- for infos_dict in self.infos.values():
- for versioned_info in infos_dict.values():
+ for versioned_info in (*self.mapping_infos.values(),
+ *self.resource_infos.values()):
versioned_info.known_versions.sort()
- for info in self._all_of_type('mapping'):
+ for info in self._all_infos(self.mapping_infos):
for pattern in info.payloads:
try:
- PatternMapping(pattern, info).register(self.pattern_tree)
+ self.pattern_tree = \
+ self.pattern_tree.register(pattern, info)
except Exception as e:
if flask.current_app._hydrilla_werror:
raise e from None
- msg = f_('couldnt_register_{mapping}_{ver}_{pattern}')\
+ msg = _('couldnt_register_{mapping}_{ver}_{pattern}')\
.format(mapping=info.identifier, pattern=pattern,
ver=util.version_string(info.version))
logging.error(msg)
@@ -439,27 +233,16 @@ class Malcontent:
If multiple versions of a mapping are applicable, only the most recent
is included in the result.
"""
- deco = DeconstructedUrl(url)
-
- collected = {}
-
- domain_tree = self.pattern_tree.get(deco.proto) or PatternTreeNode()
-
- def process_mapping(pattern_mapping: PatternMapping) -> None:
- if url[-1] != '/' and pattern_mapping.pattern[-1] == '/':
- return
-
- info = pattern_mapping.mapping_info
+ collected: dict[str, MappingInfo] = {}
+ for result_set in self.pattern_tree.search(url):
+ for wrapped_mapping_info in result_set:
+ info = wrapped_mapping_info.item
+ previous = collected.get(info.identifier)
+ if previous and previous.version > info.version:
+ continue
- if info.identifier not in collected or \
- info.version > collected[info.identifier].version:
collected[info.identifier] = info
- for path_tree in domain_tree.search(deco.domain):
- for matches_list in path_tree.search(deco.path):
- for pattern_mapping in matches_list:
- process_mapping(pattern_mapping)
-
return list(collected.values())
bp = flask.Blueprint('bp', __package__)
@@ -484,8 +267,6 @@ class HydrillaApp(flask.Flask):
]
}
- self._hydrilla_translation = \
- util.translation(here / 'locales', hydrilla_config['language'])
self._hydrilla_project_url = hydrilla_config['hydrilla_project_url']
self._hydrilla_port = hydrilla_config['port']
self._hydrilla_werror = hydrilla_config.get('werror', False)
@@ -506,8 +287,8 @@ class HydrillaApp(flask.Flask):
'hydrilla_project_url' global variable and to install proper
translations.
"""
- env = super().create_jinja_environment(*args, **kwargs)
- env.install_gettext_translations(self._hydrilla_translation)
+ env = super().create_jinja_environment(*args, **kwargs) # type: ignore
+ env.install_gettext_translations(make_translation())
env.globals['hydrilla_project_url'] = self._hydrilla_project_url
return env
@@ -519,9 +300,6 @@ class HydrillaApp(flask.Flask):
"""
return super().run(*args, port=self._hydrilla_port, **kwargs)
-def f_(text_key):
- return flask.current_app._hydrilla_translation.gettext(text_key)
-
def malcontent():
return flask.current_app._hydrilla_malcontent
@@ -542,7 +320,12 @@ def get_resource_or_mapping(item_type: str, identifier: str) -> Response:
identifier = match.group(1)
- versioned_info = malcontent().infos[item_type].get(identifier)
+ if item_type == 'resource':
+ infos = malcontent().resource_infos
+ else:
+ infos = malcontent().mapping_infos
+
+ versioned_info = infos.get(identifier)
info = versioned_info and versioned_info.get_by_ver()
if info is None:
@@ -586,9 +369,6 @@ default_config_path = Path('/etc/hydrilla/config.json')
default_malcontent_dir = '/var/lib/hydrilla/malcontent'
default_project_url = 'https://hydrillabugs.koszko.org/projects/hydrilla/wiki'
-console_gettext = util.translation(here / 'locales').gettext
-_ = console_gettext
-
@click.command(help=_('serve_hydrilla_packages_explain_wsgi_considerations'))
@click.option('-m', '--malcontent-dir',
type=click.Path(exists=True, file_okay=False),
@@ -600,24 +380,21 @@ _ = console_gettext
@click.option('-c', '--config', 'config_path',
type=click.Path(exists=True, dir_okay=False, resolve_path=True),
help=_('path_to_config_file_explain_default'))
-@click.option('-l', '--language', type=click.STRING,
- help=_('language_to_use_overrides_config'))
@click.version_option(version=_version.version, prog_name='Hydrilla',
message=_('%(prog)s_%(version)s_license'),
help=_('version_printing'))
def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str],
- port: Optional[int], config_path: Optional[str],
- language: Optional[str]) -> None:
+ port: Optional[int], config_path: Optional[str]) -> None:
"""
Run a development Hydrilla server.
This command is meant to be the entry point of hydrilla command exported by
this package.
"""
- config_load_opts = {} if config_path is None \
- else {'config_path': [Path(config_path)]}
-
- hydrilla_config = config.load(**config_load_opts)
+ if config_path is None:
+ hydrilla_config = config.load()
+ else:
+ hydrilla_config = config.load(config_paths=[Path(config_path)])
if malcontent_dir is not None:
hydrilla_config['malcontent_dir'] = str(Path(malcontent_dir).resolve())
@@ -628,14 +405,7 @@ def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str],
if port is not None:
hydrilla_config['port'] = port
- if language is not None:
- hydrilla_config['language'] = language
-
- lang = hydrilla_config.get('language')
- _ = console_gettext if lang is None else \
- util.translation(here / 'locales', lang).gettext
-
- for opt in ('malcontent_dir', 'hydrilla_project_url', 'port', 'language'):
+ for opt in ('malcontent_dir', 'hydrilla_project_url', 'port'):
if opt not in hydrilla_config:
raise ValueError(_('config_option_{}_not_supplied').format(opt))
@@ -649,7 +419,7 @@ def start(malcontent_dir: Optional[str], hydrilla_project_url: Optional[str],
@click.version_option(version=_version.version, prog_name='Hydrilla',
message=_('%(prog)s_%(version)s_license'),
help=_('version_printing'))
-def start_wsgi() -> None:
+def start_wsgi() -> flask.Flask:
"""
Create application object for use in WSGI deployment.