From 8238435825d01ad2ec1a11b6bcaf6d9a9aad5ab5 Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Mon, 22 Aug 2022 12:52:59 +0200 Subject: allow pulling packages from remote repository --- .../proxy/state_impl/_operations/__init__.py | 9 + .../proxy/state_impl/_operations/load_packages.py | 378 +++++++++++++++++++++ .../proxy/state_impl/_operations/prune_packages.py | 169 +++++++++ .../_operations/recompute_dependencies.py | 223 ++++++++++++ src/hydrilla/proxy/state_impl/base.py | 219 +++++++++++- src/hydrilla/proxy/state_impl/concrete_state.py | 322 ++---------------- src/hydrilla/proxy/state_impl/load_packages.py | 344 ------------------- src/hydrilla/proxy/state_impl/mappings.py | 4 +- src/hydrilla/proxy/state_impl/prune_packages.py | 152 --------- src/hydrilla/proxy/state_impl/repos.py | 135 +++++++- 10 files changed, 1138 insertions(+), 817 deletions(-) create mode 100644 src/hydrilla/proxy/state_impl/_operations/__init__.py create mode 100644 src/hydrilla/proxy/state_impl/_operations/load_packages.py create mode 100644 src/hydrilla/proxy/state_impl/_operations/prune_packages.py create mode 100644 src/hydrilla/proxy/state_impl/_operations/recompute_dependencies.py delete mode 100644 src/hydrilla/proxy/state_impl/load_packages.py delete mode 100644 src/hydrilla/proxy/state_impl/prune_packages.py (limited to 'src/hydrilla/proxy/state_impl') diff --git a/src/hydrilla/proxy/state_impl/_operations/__init__.py b/src/hydrilla/proxy/state_impl/_operations/__init__.py new file mode 100644 index 0000000..c147be4 --- /dev/null +++ b/src/hydrilla/proxy/state_impl/_operations/__init__.py @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior +# +# Available under the terms of Creative Commons Zero v1.0 Universal. + +from .load_packages import load_packages, FileResolver +from .prune_packages import prune_packages +from .recompute_dependencies import _recompute_dependencies_no_state_update diff --git a/src/hydrilla/proxy/state_impl/_operations/load_packages.py b/src/hydrilla/proxy/state_impl/_operations/load_packages.py new file mode 100644 index 0000000..c294ef0 --- /dev/null +++ b/src/hydrilla/proxy/state_impl/_operations/load_packages.py @@ -0,0 +1,378 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Haketilo proxy data and configuration (import of packages from disk files). +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +.... +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +import io +import hashlib +import dataclasses as dc +import typing as t + +from pathlib import Path +from abc import ABC, abstractmethod + +import sqlite3 + +from ....exceptions import HaketiloException +from ....translations import smart_gettext as _ +from .... import versions +from .... import item_infos + + +def make_repo_iteration(cursor: sqlite3.Cursor, repo_id: int) -> int: + cursor.execute( + ''' + SELECT + next_iteration + FROM + repos + WHERE + repo_id = ?; + ''', + (repo_id,) + ) + + (next_iteration,), = cursor.fetchall() + + cursor.execute( + ''' + UPDATE + repos + SET + next_iteration = ? + WHERE + repo_id = ?; + ''', + (next_iteration + 1, repo_id) + ) + + cursor.execute( + ''' + INSERT INTO repo_iterations(repo_id, iteration) + VALUES(?, ?); + ''', + (repo_id, next_iteration) + ) + + cursor.execute( + ''' + SELECT + repo_iteration_id + FROM + repo_iterations + WHERE + repo_id = ? AND iteration = ?; + ''', + (repo_id, next_iteration) + ) + + (repo_iteration_id,), = cursor.fetchall() + + return repo_iteration_id + +def get_or_make_item(cursor: sqlite3.Cursor, type: str, identifier: str) -> int: + type_letter = {'resource': 'R', 'mapping': 'M'}[type] + + cursor.execute( + ''' + INSERT OR IGNORE INTO items(type, identifier) + VALUES(?, ?); + ''', + (type_letter, identifier) + ) + + cursor.execute( + ''' + SELECT + item_id + FROM + items + WHERE + type = ? AND identifier = ?; + ''', + (type_letter, identifier) + ) + + (item_id,), = cursor.fetchall() + + return item_id + +def get_or_make_item_version( + cursor: sqlite3.Cursor, + item_id: int, + repo_iteration_id: int, + version: versions.VerTuple, + definition: bytes +) -> int: + ver_str = versions.version_string(version) + + cursor.execute( + ''' + INSERT OR IGNORE INTO item_versions( + item_id, + version, + repo_iteration_id, + definition + ) + VALUES(?, ?, ?, ?); + ''', + (item_id, ver_str, repo_iteration_id, definition) + ) + + cursor.execute( + ''' + SELECT + item_version_id + FROM + item_versions + WHERE + item_id = ? AND version = ? AND repo_iteration_id = ?; + ''', + (item_id, ver_str, repo_iteration_id) + ) + + (item_version_id,), = cursor.fetchall() + + return item_version_id + +def make_mapping_status(cursor: sqlite3.Cursor, item_id: int) -> None: + cursor.execute( + ''' + INSERT OR IGNORE INTO mapping_statuses(item_id, enabled, required) + VALUES(?, 'N', FALSE); + ''', + (item_id,) + ) + +def get_or_make_file(cursor: sqlite3.Cursor, sha256: str, file_bytes: bytes) \ + -> int: + cursor.execute( + ''' + INSERT OR IGNORE INTO files(sha256, data) + VALUES(?, ?) + ''', + (sha256, file_bytes) + ) + + cursor.execute( + ''' + SELECT + file_id + FROM + files + WHERE + sha256 = ?; + ''', + (sha256,) + ) + + (file_id,), = cursor.fetchall() + + return file_id + +def make_file_use( + cursor: sqlite3.Cursor, + item_version_id: int, + file_id: int, + name: str, + type: str, + mime_type: str, + idx: int +) -> None: + cursor.execute( + ''' + INSERT OR IGNORE INTO file_uses( + item_version_id, + file_id, + name, + type, + mime_type, + idx + ) + VALUES(?, ?, ?, ?, ?, ?); + ''', + (item_version_id, file_id, name, type, mime_type, idx) + ) + +@dc.dataclass(frozen=True) +class _FileInfo: + id: int + is_ascii: bool + +class FileResolver(ABC): + @abstractmethod + def by_sha256(self, sha256: str) -> bytes: + ... + +def _add_item( + cursor: sqlite3.Cursor, + package_file_resolver: FileResolver, + info: item_infos.AnyInfo, + definition: bytes, + repo_iteration_id: int +) -> None: + item_id = get_or_make_item(cursor, info.type_name, info.identifier) + + item_version_id = get_or_make_item_version( + cursor, + item_id, + repo_iteration_id, + info.version, + definition + ) + + if isinstance(info, item_infos.MappingInfo): + make_mapping_status(cursor, item_id) + + file_infos = {} + + file_specifiers = [*info.source_copyright] + if isinstance(info, item_infos.ResourceInfo): + file_specifiers.extend(info.scripts) + + for file_spec in file_specifiers: + file_bytes = package_file_resolver.by_sha256(file_spec.sha256) + + sha256 = hashlib.sha256(file_bytes).digest().hex() + if sha256 != file_spec.sha256: + fmt = _('err.proxy.file_hash_mismatched_{item_identifier}_{file_name}_{expected_sha256}_{actual_sha256}') + msg = fmt.format( + item_identifier = info.identifier, + file_name = file_spec.name, + expected_sha256 = file_spec.sha256, + actual_sha256 = sha256 + ) + raise HaketiloException(msg) + + file_id = get_or_make_file(cursor, sha256, file_bytes) + + file_infos[sha256] = _FileInfo(file_id, file_bytes.isascii()) + + for idx, file_spec in enumerate(info.source_copyright): + file_info = file_infos[file_spec.sha256] + if file_info.is_ascii: + mime = 'text/plain' + else: + mime = 'application/octet-stream' + + make_file_use( + cursor, + item_version_id = item_version_id, + file_id = file_info.id, + name = file_spec.name, + type = 'L', + mime_type = mime, + idx = idx + ) + + if isinstance(info, item_infos.MappingInfo): + return + + for idx, file_spec in enumerate(info.scripts): + file_info = file_infos[file_spec.sha256] + make_file_use( + cursor, + item_version_id = item_version_id, + file_id = file_info.id, + name = file_spec.name, + type = 'W', + mime_type = 'application/javascript', + idx = idx + ) + +AnyInfoVar = t.TypeVar( + 'AnyInfoVar', + item_infos.ResourceInfo, + item_infos.MappingInfo +) + +def _read_items(malcontent_path: Path, item_class: t.Type[AnyInfoVar]) \ + -> t.Iterator[tuple[AnyInfoVar, bytes]]: + item_type_path = malcontent_path / item_class.type_name + if not item_type_path.is_dir(): + return + + for item_path in item_type_path.iterdir(): + if not item_path.is_dir(): + continue + + for item_version_path in item_path.iterdir(): + definition = item_version_path.read_bytes() + item_info = item_class.load(definition) + + assert item_info.identifier == item_path.name + assert versions.version_string(item_info.version) == \ + item_version_path.name + + yield item_info, definition + +@dc.dataclass(frozen=True) +class MalcontentFileResolver(FileResolver): + malcontent_dir_path: Path + + def by_sha256(self, sha256: str) -> bytes: + file_path = self.malcontent_dir_path / 'file' / 'sha256' / sha256 + if not file_path.is_file(): + fmt = _('err.proxy.file_missing_{sha256}') + raise HaketiloException(fmt.format(sha256=sha256)) + + return file_path.read_bytes() + +def load_packages( + cursor: sqlite3.Cursor, + malcontent_path: Path, + repo_id: int, + package_file_resolver: t.Optional[FileResolver] = None +) -> int: + if package_file_resolver is None: + package_file_resolver = MalcontentFileResolver(malcontent_path) + + repo_iteration_id = make_repo_iteration(cursor, repo_id) + + types: t.Iterable[t.Type[item_infos.AnyInfo]] = \ + [item_infos.ResourceInfo, item_infos.MappingInfo] + + for info_type in types: + info: item_infos.AnyInfo + + for info, definition in _read_items( # type: ignore + malcontent_path, + info_type + ): + _add_item( + cursor, + package_file_resolver, + info, + definition, + repo_iteration_id + ) + + return repo_iteration_id diff --git a/src/hydrilla/proxy/state_impl/_operations/prune_packages.py b/src/hydrilla/proxy/state_impl/_operations/prune_packages.py new file mode 100644 index 0000000..9c2b1d7 --- /dev/null +++ b/src/hydrilla/proxy/state_impl/_operations/prune_packages.py @@ -0,0 +1,169 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Haketilo proxy data and configuration (removal of packages that are not used). +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +.... +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +import sqlite3 + +from pathlib import Path + + +_remove_mapping_versions_sqls = [ + ''' + CREATE TEMPORARY TABLE removed_mappings( + item_version_id INTEGER PRIMARY KEY + ); + ''', ''' + INSERT INTO + removed_mappings + SELECT + iv.item_version_id + FROM + item_versions AS iv + JOIN items AS i USING (item_id) + JOIN mapping_statuses AS ms USING (item_id) + JOIN orphan_iterations AS oi USING (repo_iteration_id) + WHERE + NOT ms.required; + ''', ''' + UPDATE + mapping_statuses + SET + active_version_id = NULL + WHERE + active_version_id IN removed_mappings; + ''', ''' + DELETE FROM + item_versions + WHERE + item_version_id IN removed_mappings; + ''', ''' + DROP TABLE removed_mappings; + ''' +] + +_remove_resource_versions_sql = ''' +WITH removed_resources AS ( + SELECT + iv.item_version_id + FROM + item_versions AS iv + JOIN items AS i + USING (item_id) + JOIN orphan_iterations AS oi + USING (repo_iteration_id) + LEFT JOIN resolved_depended_resources AS rdr + ON rdr.resource_item_id = iv.item_version_id + WHERE + rdr.payload_id IS NULL +) +DELETE FROM + item_versions +WHERE + item_version_id IN removed_resources; +''' + +_remove_items_sql = ''' +WITH removed_items AS ( + SELECT + i.item_id + FROM + items AS i + LEFT JOIN item_versions AS iv USING (item_id) + LEFT JOIN mapping_statuses AS ms USING (item_id) + WHERE + iv.item_version_id IS NULL AND + i.type = 'R' OR ms.enabled = 'N' +) +DELETE FROM + items +WHERE + item_id IN removed_items; +''' + +_remove_files_sql = ''' +WITH removed_files AS ( + SELECT + f.file_id + FROM + files AS f + LEFT JOIN file_uses AS fu USING (file_id) + WHERE + fu.file_use_id IS NULL +) +DELETE FROM + files +WHERE + file_id IN removed_files; +''' + +_remove_repo_iterations_sql = ''' +WITH removed_iterations AS ( + SELECT + oi.repo_iteration_id + FROM + orphan_iterations AS oi + LEFT JOIN item_versions AS iv USING (repo_iteration_id) + WHERE + iv.item_version_id IS NULL +) +DELETE FROM + repo_iterations +WHERE + repo_iteration_id IN removed_iterations; +''' + +_remove_repos_sql = ''' +WITH removed_repos AS ( + SELECT + r.repo_id + FROM + repos AS r + LEFT JOIN repo_iterations AS ri USING (repo_id) + WHERE + r.deleted AND ri.repo_iteration_id IS NULL AND r.repo_id != 1 +) +DELETE FROM + repos +WHERE + repo_id IN removed_repos; +''' + +def prune_packages(cursor: sqlite3.Cursor) -> None: + assert cursor.connection.in_transaction + + for sql in _remove_mapping_versions_sqls: + cursor.execute(sql) + cursor.execute(_remove_resource_versions_sql) + cursor.execute(_remove_items_sql) + cursor.execute(_remove_files_sql) + cursor.execute(_remove_repo_iterations_sql) + cursor.execute(_remove_repos_sql) diff --git a/src/hydrilla/proxy/state_impl/_operations/recompute_dependencies.py b/src/hydrilla/proxy/state_impl/_operations/recompute_dependencies.py new file mode 100644 index 0000000..4093f12 --- /dev/null +++ b/src/hydrilla/proxy/state_impl/_operations/recompute_dependencies.py @@ -0,0 +1,223 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Haketilo proxy data and configuration (update of dependency tree in the db). +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +.... +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +import typing as t + +import sqlite3 + +from .... import item_infos +from ... import simple_dependency_satisfying as sds + + +AnyInfoVar = t.TypeVar( + 'AnyInfoVar', + item_infos.ResourceInfo, + item_infos.MappingInfo +) + +def get_infos_of_type(cursor: sqlite3.Cursor, info_type: t.Type[AnyInfoVar],) \ + -> t.Mapping[int, AnyInfoVar]: + cursor.execute( + ''' + SELECT + i.item_id, iv.definition, r.name, ri.iteration + FROM + item_versions AS iv + JOIN items AS i USING (item_id) + JOIN repo_iterations AS ri USING (repo_iteration_id) + JOIN repos AS r USING (repo_id) + WHERE + i.type = ?; + ''', + (info_type.type_name[0].upper(),) + ) + + result: dict[int, AnyInfoVar] = {} + + for item_id, definition, repo_name, repo_iteration in cursor.fetchall(): + info = info_type.load(definition, repo_name, repo_iteration) + result[item_id] = info + + return result + +def _recompute_dependencies_no_state_update( + cursor: sqlite3.Cursor, + extra_requirements: t.Iterable[sds.MappingRequirement] +) -> None: + cursor.execute('DELETE FROM payloads;') + + ids_to_resources = get_infos_of_type(cursor, item_infos.ResourceInfo) + ids_to_mappings = get_infos_of_type(cursor, item_infos.MappingInfo) + + resources = ids_to_resources.items() + resources_to_ids = dict((info.identifier, id) for id, info in resources) + + mappings = ids_to_mappings.items() + mappings_to_ids = dict((info.identifier, id) for id, info in mappings) + + requirements = [*extra_requirements] + + cursor.execute( + ''' + SELECT + i.identifier + FROM + mapping_statuses AS ms + JOIN items AS i USING(item_id) + WHERE + ms.enabled = 'E' AND ms.frozen = 'N'; + ''' + ) + + for mapping_identifier, in cursor.fetchall(): + requirements.append(sds.MappingRequirement(mapping_identifier)) + + cursor.execute( + ''' + SELECT + active_version_id, frozen + FROM + mapping_statuses + WHERE + enabled = 'E' AND frozen IN ('R', 'E'); + ''' + ) + + for active_version_id, frozen in cursor.fetchall(): + info = ids_to_mappings[active_version_id] + + requirement: sds.MappingRequirement + + if frozen == 'R': + requirement = sds.MappingRepoRequirement(info.identifier, info.repo) + else: + requirement = sds.MappingVersionRequirement(info.identifier, info) + + requirements.append(requirement) + + mapping_choices = sds.compute_payloads( + ids_to_resources.values(), + ids_to_mappings.values(), + requirements + ) + + cursor.execute( + ''' + UPDATE + mapping_statuses + SET + required = FALSE, + active_version_id = NULL + WHERE + enabled != 'E'; + ''' + ) + + cursor.execute('DELETE FROM payloads;') + + for choice in mapping_choices.values(): + mapping_ver_id = mappings_to_ids[choice.info.identifier] + + cursor.execute( + ''' + SELECT + item_id + FROM + item_versions + WHERE + item_version_id = ?; + ''', + (mapping_ver_id,) + ) + + (mapping_item_id,), = cursor.fetchall() + + cursor.execute( + ''' + UPDATE + mapping_statuses + SET + required = ?, + active_version_id = ? + WHERE + item_id = ?; + ''', + (choice.required, mapping_ver_id, mapping_item_id) + ) + + for num, (pattern, payload) in enumerate(choice.payloads.items()): + cursor.execute( + ''' + INSERT INTO payloads( + mapping_item_id, + pattern, + eval_allowed, + cors_bypass_allowed + ) + VALUES (?, ?, ?, ?); + ''', + ( + mapping_ver_id, + pattern.orig_url, + payload.allows_eval, + payload.allows_cors_bypass + ) + ) + + cursor.execute( + ''' + SELECT + payload_id + FROM + payloads + WHERE + mapping_item_id = ? AND pattern = ?; + ''', + (mapping_ver_id, pattern.orig_url) + ) + + (payload_id,), = cursor.fetchall() + + for res_num, resource_info in enumerate(payload.resources): + resource_ver_id = resources_to_ids[resource_info.identifier] + cursor.execute( + ''' + INSERT INTO resolved_depended_resources( + payload_id, + resource_item_id, + idx + ) + VALUES(?, ?, ?); + ''', + (payload_id, resource_ver_id, res_num) + ) diff --git a/src/hydrilla/proxy/state_impl/base.py b/src/hydrilla/proxy/state_impl/base.py index 1ae08cb..92833dd 100644 --- a/src/hydrilla/proxy/state_impl/base.py +++ b/src/hydrilla/proxy/state_impl/base.py @@ -34,34 +34,160 @@ subtype. from __future__ import annotations import sqlite3 +import secrets import threading import dataclasses as dc import typing as t from pathlib import Path from contextlib import contextmanager - -import sqlite3 +from abc import abstractmethod from immutables import Map +from ... import url_patterns from ... import pattern_tree -from .. import state +from .. import simple_dependency_satisfying as sds +from .. import state as st from .. import policies PolicyTree = pattern_tree.PatternTree[policies.PolicyFactory] -PayloadsData = t.Mapping[state.PayloadRef, state.PayloadData] +PayloadsData = t.Mapping[st.PayloadRef, st.PayloadData] + +@dc.dataclass(frozen=True, unsafe_hash=True) +class ConcretePayloadRef(st.PayloadRef): + state: 'HaketiloStateWithFields' = dc.field(hash=False, compare=False) + + def get_data(self) -> st.PayloadData: + try: + return self.state.payloads_data[self] + except KeyError: + raise st.MissingItemError() + + def get_mapping(self) -> st.MappingVersionRef: + raise NotImplementedError() + + def get_script_paths(self) \ + -> t.Iterable[t.Sequence[str]]: + with self.state.cursor() as cursor: + cursor.execute( + ''' + SELECT + i.identifier, fu.name + FROM + payloads AS p + LEFT JOIN resolved_depended_resources AS rdd + USING (payload_id) + LEFT JOIN item_versions AS iv + ON rdd.resource_item_id = iv.item_version_id + LEFT JOIN items AS i + USING (item_id) + LEFT JOIN file_uses AS fu + USING (item_version_id) + WHERE + fu.type = 'W' AND + p.payload_id = ? AND + (fu.idx IS NOT NULL OR rdd.idx IS NULL) + ORDER BY + rdd.idx, fu.idx; + ''', + (self.id,) + ) + + paths: list[t.Sequence[str]] = [] + for resource_identifier, file_name in cursor.fetchall(): + if resource_identifier is None: + # payload found but it had no script files + return () + + paths.append((resource_identifier, *file_name.split('/'))) + + if paths == []: + # payload not found + raise st.MissingItemError() + + return paths + + def get_file_data(self, path: t.Sequence[str]) \ + -> t.Optional[st.FileData]: + if len(path) == 0: + raise st.MissingItemError() + + resource_identifier, *file_name_segments = path + + file_name = '/'.join(file_name_segments) + + with self.state.cursor() as cursor: + cursor.execute( + ''' + SELECT + f.data, fu.mime_type + FROM + payloads AS p + JOIN resolved_depended_resources AS rdd + USING (payload_id) + JOIN item_versions AS iv + ON rdd.resource_item_id = iv.item_version_id + JOIN items AS i + USING (item_id) + JOIN file_uses AS fu + USING (item_version_id) + JOIN files AS f + USING (file_id) + WHERE + p.payload_id = ? AND + i.identifier = ? AND + fu.name = ? AND + fu.type = 'W'; + ''', + (self.id, resource_identifier, file_name) + ) + + result = cursor.fetchall() + + if result == []: + return None + + (data, mime_type), = result + + return st.FileData(type=mime_type, name=file_name, contents=data) + +def register_payload( + policy_tree: PolicyTree, + pattern: url_patterns.ParsedPattern, + payload_key: st.PayloadKey, + token: str +) -> PolicyTree: + """....""" + payload_policy_factory = policies.PayloadPolicyFactory( + builtin = False, + payload_key = payload_key + ) + + policy_tree = policy_tree.register(pattern, payload_policy_factory) + + resource_policy_factory = policies.PayloadResourcePolicyFactory( + builtin = False, + payload_key = payload_key + ) + + policy_tree = policy_tree.register( + pattern.path_append(token, '***'), + resource_policy_factory + ) + + return policy_tree # mypy needs to be corrected: # https://stackoverflow.com/questions/70999513/conflict-between-mix-ins-for-abstract-dataclasses/70999704#70999704 @dc.dataclass # type: ignore[misc] -class HaketiloStateWithFields(state.HaketiloState): +class HaketiloStateWithFields(st.HaketiloState): """....""" store_dir: Path connection: sqlite3.Connection current_cursor: t.Optional[sqlite3.Cursor] = None - #settings: state.HaketiloGlobalSettings + #settings: st.HaketiloGlobalSettings policy_tree: PolicyTree = PolicyTree() payloads_data: PayloadsData = dc.field(default_factory=dict) @@ -98,6 +224,85 @@ class HaketiloStateWithFields(state.HaketiloState): finally: self.current_cursor = None - def recompute_payloads(self, cursor: sqlite3.Cursor) -> None: + def rebuild_structures(self) -> None: + """ + Recreation of data structures as done after every recomputation of + dependencies as well as at startup. + """ + with self.cursor(transaction=True) as cursor: + cursor.execute( + ''' + SELECT + p.payload_id, p.pattern, p.eval_allowed, + p.cors_bypass_allowed, + ms.enabled, + i.identifier + FROM + payloads AS p + JOIN item_versions AS iv + ON p.mapping_item_id = iv.item_version_id + JOIN items AS i USING (item_id) + JOIN mapping_statuses AS ms USING (item_id); + ''' + ) + + rows = cursor.fetchall() + + new_policy_tree = PolicyTree() + + ui_factory = policies.WebUIPolicyFactory(builtin=True) + web_ui_pattern = 'http*://hkt.mitm.it/***' + for parsed_pattern in url_patterns.parse_pattern(web_ui_pattern): + new_policy_tree = new_policy_tree.register( + parsed_pattern, + ui_factory + ) + + new_payloads_data: dict[st.PayloadRef, st.PayloadData] = {} + + for row in rows: + (payload_id_int, pattern, eval_allowed, cors_bypass_allowed, + enabled_status, + identifier) = row + + payload_ref = ConcretePayloadRef(str(payload_id_int), self) + + previous_data = self.payloads_data.get(payload_ref) + if previous_data is not None: + token = previous_data.unique_token + else: + token = secrets.token_urlsafe(8) + + payload_key = st.PayloadKey(payload_ref, identifier) + + for parsed_pattern in url_patterns.parse_pattern(pattern): + new_policy_tree = register_payload( + new_policy_tree, + parsed_pattern, + payload_key, + token + ) + + pattern_path_segments = parsed_pattern.path_segments + + payload_data = st.PayloadData( + payload_ref = payload_ref, + explicitly_enabled = enabled_status == 'E', + unique_token = token, + pattern_path_segments = pattern_path_segments, + eval_allowed = eval_allowed, + cors_bypass_allowed = cors_bypass_allowed + ) + + new_payloads_data[payload_ref] = payload_data + + self.policy_tree = new_policy_tree + self.payloads_data = new_payloads_data + + @abstractmethod + def recompute_dependencies( + self, + requirements: t.Iterable[sds.MappingRequirement] = [] + ) -> None: """....""" ... diff --git a/src/hydrilla/proxy/state_impl/concrete_state.py b/src/hydrilla/proxy/state_impl/concrete_state.py index 525a702..6a59a75 100644 --- a/src/hydrilla/proxy/state_impl/concrete_state.py +++ b/src/hydrilla/proxy/state_impl/concrete_state.py @@ -32,28 +32,23 @@ and resources. # Enable using with Python 3.7. from __future__ import annotations -import secrets -import io -import hashlib +import sqlite3 import typing as t import dataclasses as dc from pathlib import Path -import sqlite3 - from ...exceptions import HaketiloException from ...translations import smart_gettext as _ -from ... import pattern_tree from ... import url_patterns from ... import item_infos -from ..simple_dependency_satisfying import compute_payloads, ComputedPayload from .. import state as st from .. import policies +from .. import simple_dependency_satisfying as sds from . import base from . import mappings from . import repos -from .load_packages import load_packages +from . import _operations here = Path(__file__).resolve().parent @@ -73,169 +68,14 @@ class ConcreteResourceRef(st.ResourceRef): class ConcreteResourceVersionRef(st.ResourceVersionRef): pass - -@dc.dataclass(frozen=True, unsafe_hash=True) -class ConcretePayloadRef(st.PayloadRef): - state: base.HaketiloStateWithFields = dc.field(hash=False, compare=False) - - def get_data(self) -> st.PayloadData: - try: - return self.state.payloads_data[self] - except KeyError: - raise st.MissingItemError() - - def get_mapping(self) -> st.MappingVersionRef: - raise NotImplementedError() - - def get_script_paths(self) \ - -> t.Iterable[t.Sequence[str]]: - with self.state.cursor() as cursor: - cursor.execute( - ''' - SELECT - i.identifier, fu.name - FROM - payloads AS p - LEFT JOIN resolved_depended_resources AS rdd - USING (payload_id) - LEFT JOIN item_versions AS iv - ON rdd.resource_item_id = iv.item_version_id - LEFT JOIN items AS i - USING (item_id) - LEFT JOIN file_uses AS fu - USING (item_version_id) - WHERE - fu.type = 'W' AND - p.payload_id = ? AND - (fu.idx IS NOT NULL OR rdd.idx IS NULL) - ORDER BY - rdd.idx, fu.idx; - ''', - (self.id,) - ) - - paths: list[t.Sequence[str]] = [] - for resource_identifier, file_name in cursor.fetchall(): - if resource_identifier is None: - # payload found but it had no script files - return () - - paths.append((resource_identifier, *file_name.split('/'))) - - if paths == []: - # payload not found - raise st.MissingItemError() - - return paths - - def get_file_data(self, path: t.Sequence[str]) \ - -> t.Optional[st.FileData]: - if len(path) == 0: - raise st.MissingItemError() - - resource_identifier, *file_name_segments = path - - file_name = '/'.join(file_name_segments) - - with self.state.cursor() as cursor: - cursor.execute( - ''' - SELECT - f.data, fu.mime_type - FROM - payloads AS p - JOIN resolved_depended_resources AS rdd - USING (payload_id) - JOIN item_versions AS iv - ON rdd.resource_item_id = iv.item_version_id - JOIN items AS i - USING (item_id) - JOIN file_uses AS fu - USING (item_version_id) - JOIN files AS f - USING (file_id) - WHERE - p.payload_id = ? AND - i.identifier = ? AND - fu.name = ? AND - fu.type = 'W'; - ''', - (self.id, resource_identifier, file_name) - ) - - result = cursor.fetchall() - - if result == []: - return None - - (data, mime_type), = result - - return st.FileData(type=mime_type, name=file_name, contents=data) - -def register_payload( - policy_tree: base.PolicyTree, - pattern: url_patterns.ParsedPattern, - payload_key: st.PayloadKey, - token: str -) -> base.PolicyTree: - """....""" - payload_policy_factory = policies.PayloadPolicyFactory( - builtin = False, - payload_key = payload_key - ) - - policy_tree = policy_tree.register(pattern, payload_policy_factory) - - resource_policy_factory = policies.PayloadResourcePolicyFactory( - builtin = False, - payload_key = payload_key - ) - - policy_tree = policy_tree.register( - pattern.path_append(token, '***'), - resource_policy_factory - ) - - return policy_tree - -AnyInfoVar = t.TypeVar( - 'AnyInfoVar', - item_infos.ResourceInfo, - item_infos.MappingInfo -) - -def get_infos_of_type(cursor: sqlite3.Cursor, info_type: t.Type[AnyInfoVar],) \ - -> t.Mapping[AnyInfoVar, int]: - cursor.execute( - ''' - SELECT - i.item_id, iv.definition, r.name, ri.iteration - FROM - item_versions AS iv - JOIN items AS i USING (item_id) - JOIN repo_iterations AS ri USING (repo_iteration_id) - JOIN repos AS r USING (repo_id) - WHERE - i.type = ?; - ''', - (info_type.type_name[0].upper(),) - ) - - result: dict[AnyInfoVar, int] = {} - - for item_id, definition, repo_name, repo_iteration in cursor.fetchall(): - definition_io = io.StringIO(definition) - info = info_type.load(definition_io, repo_name, repo_iteration) - result[info] = item_id - - return result - @dc.dataclass class ConcreteHaketiloState(base.HaketiloStateWithFields): def __post_init__(self) -> None: + sqlite3.enable_callback_tracebacks(True) + self._prepare_database() - self._rebuild_structures() + self.rebuild_structures() def _prepare_database(self) -> None: """....""" @@ -282,147 +122,25 @@ class ConcreteHaketiloState(base.HaketiloStateWithFields): def import_packages(self, malcontent_path: Path) -> None: with self.cursor(transaction=True) as cursor: - load_packages(self, cursor, malcontent_path) - self.recompute_payloads(cursor) - - def recompute_payloads(self, cursor: sqlite3.Cursor) -> None: - assert self.connection.in_transaction - - resources = get_infos_of_type(cursor, item_infos.ResourceInfo) - mappings = get_infos_of_type(cursor, item_infos.MappingInfo) - - payloads = compute_payloads(resources.keys(), mappings.keys()) - - payloads_data: dict[st.PayloadRef, st.PayloadData] = {} - - cursor.execute('DELETE FROM payloads;') - - for mapping_info, by_pattern in payloads.items(): - for num, (pattern, payload) in enumerate(by_pattern.items()): - cursor.execute( - ''' - INSERT INTO payloads( - mapping_item_id, - pattern, - eval_allowed, - cors_bypass_allowed - ) - VALUES (?, ?, ?, ?); - ''', - ( - mappings[mapping_info], - pattern.orig_url, - payload.allows_eval, - payload.allows_cors_bypass - ) - ) + _operations.load_packages(cursor, malcontent_path, 1) + raise NotImplementedError() + _operations.prune_packages(cursor) - cursor.execute( - ''' - SELECT - payload_id - FROM - payloads - WHERE - mapping_item_id = ? AND pattern = ?; - ''', - (mappings[mapping_info], pattern.orig_url) - ) + self.recompute_dependencies() - (payload_id_int,), = cursor.fetchall() - - for res_num, resource_info in enumerate(payload.resources): - cursor.execute( - ''' - INSERT INTO resolved_depended_resources( - payload_id, - resource_item_id, - idx - ) - VALUES(?, ?, ?); - ''', - (payload_id_int, resources[resource_info], res_num) - ) - - self._rebuild_structures(cursor) - - def _rebuild_structures(self, cursor: t.Optional[sqlite3.Cursor] = None) \ - -> None: - """ - Recreation of data structures as done after every recomputation of - dependencies as well as at startup. - """ - if cursor is None: - with self.cursor() as new_cursor: - return self._rebuild_structures(new_cursor) - - cursor.execute( - ''' - SELECT - p.payload_id, p.pattern, p.eval_allowed, - p.cors_bypass_allowed, - ms.enabled, - i.identifier - FROM - payloads AS p - JOIN item_versions AS iv - ON p.mapping_item_id = iv.item_version_id - JOIN items AS i USING (item_id) - JOIN mapping_statuses AS ms USING (item_id); - ''' - ) - - new_policy_tree = base.PolicyTree() - - ui_factory = policies.WebUIPolicyFactory(builtin=True) - web_ui_pattern = 'http*://hkt.mitm.it/***' - for parsed_pattern in url_patterns.parse_pattern(web_ui_pattern): - new_policy_tree = new_policy_tree.register( - parsed_pattern, - ui_factory - ) - - new_payloads_data: dict[st.PayloadRef, st.PayloadData] = {} - - for row in cursor.fetchall(): - (payload_id_int, pattern, eval_allowed, cors_bypass_allowed, - enabled_status, - identifier) = row - - payload_ref = ConcretePayloadRef(str(payload_id_int), self) - - previous_data = self.payloads_data.get(payload_ref) - if previous_data is not None: - token = previous_data.unique_token - else: - token = secrets.token_urlsafe(8) - - payload_key = st.PayloadKey(payload_ref, identifier) - - for parsed_pattern in url_patterns.parse_pattern(pattern): - new_policy_tree = register_payload( - new_policy_tree, - parsed_pattern, - payload_key, - token - ) - - pattern_path_segments = parsed_pattern.path_segments + def recompute_dependencies( + self, + extra_requirements: t.Iterable[sds.MappingRequirement] = [] + ) -> None: + with self.cursor() as cursor: + assert self.connection.in_transaction - payload_data = st.PayloadData( - payload_ref = payload_ref, - #explicitly_enabled = enabled_status == 'E', - explicitly_enabled = True, - unique_token = token, - pattern_path_segments = pattern_path_segments, - eval_allowed = eval_allowed, - cors_bypass_allowed = cors_bypass_allowed + _operations._recompute_dependencies_no_state_update( + cursor, + extra_requirements ) - new_payloads_data[payload_ref] = payload_data - - self.policy_tree = new_policy_tree - self.payloads_data = new_payloads_data + self.rebuild_structures() def repo_store(self) -> st.RepoStore: return repos.ConcreteRepoStore(self) diff --git a/src/hydrilla/proxy/state_impl/load_packages.py b/src/hydrilla/proxy/state_impl/load_packages.py deleted file mode 100644 index 6983c3e..0000000 --- a/src/hydrilla/proxy/state_impl/load_packages.py +++ /dev/null @@ -1,344 +0,0 @@ -# SPDX-License-Identifier: GPL-3.0-or-later - -# Haketilo proxy data and configuration (import of packages from disk files). -# -# This file is part of Hydrilla&Haketilo. -# -# Copyright (C) 2022 Wojtek Kosior -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -# -# -# I, Wojtek Kosior, thereby promise not to sue for violation of this -# file's license. Although I request that you do not make use this code -# in a proprietary program, I am not going to enforce this in court. - -""" -.... -""" - -# Enable using with Python 3.7. -from __future__ import annotations - -import io -import hashlib -import dataclasses as dc -import typing as t - -from pathlib import Path - -import sqlite3 - -from ...exceptions import HaketiloException -from ...translations import smart_gettext as _ -from ... import versions -from ... import item_infos -from . import base - - -def get_or_make_repo_iteration(cursor: sqlite3.Cursor, repo_name: str) -> int: - cursor.execute( - ''' - SELECT - repo_id, next_iteration - 1 - FROM - repos - WHERE - name = ?; - ''', - (repo_name,) - ) - - (repo_id, last_iteration), = cursor.fetchall() - - cursor.execute( - ''' - INSERT OR IGNORE INTO repo_iterations(repo_id, iteration) - VALUES(?, ?); - ''', - (repo_id, last_iteration) - ) - - cursor.execute( - ''' - SELECT - repo_iteration_id - FROM - repo_iterations - WHERE - repo_id = ? AND iteration = ?; - ''', - (repo_id, last_iteration) - ) - - (repo_iteration_id,), = cursor.fetchall() - - return repo_iteration_id - -def get_or_make_item(cursor: sqlite3.Cursor, type: str, identifier: str) -> int: - type_letter = {'resource': 'R', 'mapping': 'M'}[type] - - cursor.execute( - ''' - INSERT OR IGNORE INTO items(type, identifier) - VALUES(?, ?); - ''', - (type_letter, identifier) - ) - - cursor.execute( - ''' - SELECT - item_id - FROM - items - WHERE - type = ? AND identifier = ?; - ''', - (type_letter, identifier) - ) - - (item_id,), = cursor.fetchall() - - return item_id - -def get_or_make_item_version( - cursor: sqlite3.Cursor, - item_id: int, - repo_iteration_id: int, - version: versions.VerTuple, - definition: str -) -> int: - ver_str = versions.version_string(version) - - cursor.execute( - ''' - INSERT OR IGNORE INTO item_versions( - item_id, - version, - repo_iteration_id, - definition - ) - VALUES(?, ?, ?, ?); - ''', - (item_id, ver_str, repo_iteration_id, definition) - ) - - cursor.execute( - ''' - SELECT - item_version_id - FROM - item_versions - WHERE - item_id = ? AND version = ? AND repo_iteration_id = ?; - ''', - (item_id, ver_str, repo_iteration_id) - ) - - (item_version_id,), = cursor.fetchall() - - return item_version_id - -def make_mapping_status(cursor: sqlite3.Cursor, item_id: int) -> None: - cursor.execute( - ''' - INSERT OR IGNORE INTO mapping_statuses(item_id, enabled, frozen) - VALUES(?, 'E', 'R'); - ''', - (item_id,) - ) - -def get_or_make_file(cursor: sqlite3.Cursor, sha256: str, file_bytes: bytes) \ - -> int: - cursor.execute( - ''' - INSERT OR IGNORE INTO files(sha256, data) - VALUES(?, ?) - ''', - (sha256, file_bytes) - ) - - cursor.execute( - ''' - SELECT - file_id - FROM - files - WHERE - sha256 = ?; - ''', - (sha256,) - ) - - (file_id,), = cursor.fetchall() - - return file_id - -def make_file_use( - cursor: sqlite3.Cursor, - item_version_id: int, - file_id: int, - name: str, - type: str, - mime_type: str, - idx: int -) -> None: - cursor.execute( - ''' - INSERT OR IGNORE INTO file_uses( - item_version_id, - file_id, - name, - type, - mime_type, - idx - ) - VALUES(?, ?, ?, ?, ?, ?); - ''', - (item_version_id, file_id, name, type, mime_type, idx) - ) - -@dc.dataclass(frozen=True) -class _FileInfo: - id: int - is_ascii: bool - -def _add_item( - cursor: sqlite3.Cursor, - files_by_sha256_path: Path, - info: item_infos.AnyInfo, - definition: str -) -> None: - repo_iteration_id = get_or_make_repo_iteration(cursor, '') - - item_id = get_or_make_item(cursor, info.type_name, info.identifier) - - item_version_id = get_or_make_item_version( - cursor, - item_id, - repo_iteration_id, - info.version, - definition - ) - - if isinstance(info, item_infos.MappingInfo): - make_mapping_status(cursor, item_id) - - file_infos = {} - - file_specifiers = [*info.source_copyright] - if isinstance(info, item_infos.ResourceInfo): - file_specifiers.extend(info.scripts) - - for file_spec in file_specifiers: - file_path = files_by_sha256_path / file_spec.sha256 - if not file_path.is_file(): - fmt = _('err.proxy.file_missing_{item_identifier}_{file_name}_{sha256}') - msg = fmt.format( - item_identifier = info.identifier, - file_name = file_spec.name, - sha256 = file_spec.sha256 - ) - raise HaketiloException(msg) - - file_bytes = file_path.read_bytes() - - sha256 = hashlib.sha256(file_bytes).digest().hex() - if sha256 != file_spec.sha256: - fmt = _('err.proxy.file_hash_mismatched_{item_identifier}_{file_name}_{expected_sha256}_{actual_sha256}') - msg = fmt.format( - item_identifier = info.identifier, - file_name = file_spec.name, - expected_sha256 = file_spec.sha256, - actual_sha256 = sha256 - ) - raise HaketiloException(msg) - - file_id = get_or_make_file(cursor, sha256, file_bytes) - - file_infos[sha256] = _FileInfo(file_id, file_bytes.isascii()) - - for idx, file_spec in enumerate(info.source_copyright): - file_info = file_infos[file_spec.sha256] - if file_info.is_ascii: - mime = 'text/plain' - else: - mime = 'application/octet-stream' - - make_file_use( - cursor, - item_version_id = item_version_id, - file_id = file_info.id, - name = file_spec.name, - type = 'L', - mime_type = mime, - idx = idx - ) - - if isinstance(info, item_infos.MappingInfo): - return - - for idx, file_spec in enumerate(info.scripts): - file_info = file_infos[file_spec.sha256] - make_file_use( - cursor, - item_version_id = item_version_id, - file_id = file_info.id, - name = file_spec.name, - type = 'W', - mime_type = 'application/javascript', - idx = idx - ) - -AnyInfoVar = t.TypeVar( - 'AnyInfoVar', - item_infos.ResourceInfo, - item_infos.MappingInfo -) - -def _read_items(malcontent_path: Path, item_class: t.Type[AnyInfoVar]) \ - -> t.Iterator[tuple[AnyInfoVar, str]]: - item_type_path = malcontent_path / item_class.type_name - if not item_type_path.is_dir(): - return - - for item_path in item_type_path.iterdir(): - if not item_path.is_dir(): - continue - - for item_version_path in item_path.iterdir(): - definition = item_version_path.read_text() - item_info = item_class.load(io.StringIO(definition)) - - assert item_info.identifier == item_path.name - assert versions.version_string(item_info.version) == \ - item_version_path.name - - yield item_info, definition - -def load_packages( - state: base.HaketiloStateWithFields, - cursor: sqlite3.Cursor, - malcontent_path: Path -) -> None: - files_by_sha256_path = malcontent_path / 'file' / 'sha256' - - for info_type in [item_infos.ResourceInfo, item_infos.MappingInfo]: - info: item_infos.AnyInfo - for info, definition in _read_items( - malcontent_path, - info_type # type: ignore - ): - _add_item(cursor, files_by_sha256_path, info, definition) diff --git a/src/hydrilla/proxy/state_impl/mappings.py b/src/hydrilla/proxy/state_impl/mappings.py index 3668784..cce2a36 100644 --- a/src/hydrilla/proxy/state_impl/mappings.py +++ b/src/hydrilla/proxy/state_impl/mappings.py @@ -76,7 +76,7 @@ class ConcreteMappingVersionRef(st.MappingVersionRef): (status_letter, definition, repo, repo_iteration, is_orphan), = rows item_info = item_infos.MappingInfo.load( - io.StringIO(definition), + definition, repo, repo_iteration ) @@ -120,7 +120,7 @@ class ConcreteMappingVersionStore(st.MappingVersionStore): ref = ConcreteMappingVersionRef(str(item_version_id), self.state) item_info = item_infos.MappingInfo.load( - io.StringIO(definition), + definition, repo, repo_iteration ) diff --git a/src/hydrilla/proxy/state_impl/prune_packages.py b/src/hydrilla/proxy/state_impl/prune_packages.py deleted file mode 100644 index 1857188..0000000 --- a/src/hydrilla/proxy/state_impl/prune_packages.py +++ /dev/null @@ -1,152 +0,0 @@ -# SPDX-License-Identifier: GPL-3.0-or-later - -# Haketilo proxy data and configuration (removal of packages that are not used). -# -# This file is part of Hydrilla&Haketilo. -# -# Copyright (C) 2022 Wojtek Kosior -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -# -# -# I, Wojtek Kosior, thereby promise not to sue for violation of this -# file's license. Although I request that you do not make use this code -# in a proprietary program, I am not going to enforce this in court. - -""" -.... -""" - -# Enable using with Python 3.7. -from __future__ import annotations - -import sqlite3 - - -_remove_mapping_versions_sql = ''' -WITH removed_mappings AS ( - SELECT - iv.item_version_id - FROM - item_versions AS iv - JOIN items AS i - USING (item_id) - JOIN orphan_iterations AS oi - USING (repo_iteration_id) - LEFT JOIN payloads AS p - ON p.mapping_item_id = iv.item_version_id - WHERE - i.type = 'M' AND p.payload_id IS NULL -) -DELETE FROM - item_versions -WHERE - item_version_id IN removed_mappings; -''' - -_remove_resource_versions_sql = ''' -WITH removed_resources AS ( - SELECT - iv.item_version_id - FROM - item_versions AS iv - JOIN items AS i - USING (item_id) - JOIN orphan_iterations AS oi - USING (repo_iteration_id) - LEFT JOIN resolved_depended_resources AS rdr - ON rdr.resource_item_id = iv.item_version_id - WHERE - rdr.payload_id IS NULL -) -DELETE FROM - item_versions -WHERE - item_version_id IN removed_resources; -''' - -_remove_items_sql = ''' -WITH removed_items AS ( - SELECT - i.item_id - FROM - items AS i - LEFT JOIN item_versions AS iv USING (item_id) - LEFT JOIN mapping_statuses AS ms USING (item_id) - WHERE - iv.item_version_id IS NULL AND - i.type = 'R' OR ms.enabled = 'N' -) -DELETE FROM - items -WHERE - item_id IN removed_items; -''' - -_remove_files_sql = ''' -WITH removed_files AS ( - SELECT - f.file_id - FROM - files AS f - LEFT JOIN file_uses AS fu USING (file_id) - WHERE - fu.file_use_id IS NULL -) -DELETE FROM - files -WHERE - file_id IN removed_files; -''' - -_remove_repo_iterations_sql = ''' -WITH removed_iterations AS ( - SELECT - oi.repo_iteration_id - FROM - orphan_iterations AS oi - LEFT JOIN item_versions AS iv USING (repo_iteration_id) - WHERE - iv.item_version_id IS NULL -) -DELETE FROM - repo_iterations -WHERE - repo_iteration_id IN removed_iterations; -''' - -_remove_repos_sql = ''' -WITH removed_repos AS ( - SELECT - r.repo_id - FROM - repos AS r - LEFT JOIN repo_iterations AS ri USING (repo_id) - WHERE - r.deleted AND ri.repo_iteration_id IS NULL -) -DELETE FROM - repos -WHERE - repo_id IN removed_repos; -''' - -def prune(cursor: sqlite3.Cursor) -> None: - """....""" - cursor.execute(_remove_mapping_versions_sql) - cursor.execute(_remove_resource_versions_sql) - cursor.execute(_remove_items_sql) - cursor.execute(_remove_files_sql) - cursor.execute(_remove_repo_iterations_sql) - cursor.execute(_remove_repos_sql) diff --git a/src/hydrilla/proxy/state_impl/repos.py b/src/hydrilla/proxy/state_impl/repos.py index 5553ec2..f4c7c71 100644 --- a/src/hydrilla/proxy/state_impl/repos.py +++ b/src/hydrilla/proxy/state_impl/repos.py @@ -33,20 +33,27 @@ inside Haketilo. from __future__ import annotations import re +import json +import tempfile +import requests +import sqlite3 import typing as t import dataclasses as dc -from urllib.parse import urlparse +from urllib.parse import urlparse, urljoin from datetime import datetime +from pathlib import Path -import sqlite3 - +from ... import json_instances +from ... import item_infos +from ... import versions from .. import state as st +from .. import simple_dependency_satisfying as sds from . import base -from . import prune_packages +from . import _operations -def validate_repo_url(url: str) -> None: +def sanitize_repo_url(url: str) -> str: try: parsed = urlparse(url) except: @@ -55,6 +62,11 @@ def validate_repo_url(url: str) -> None: if parsed.scheme not in ('http', 'https'): raise st.RepoUrlInvalid() + if url[-1] != '/': + url = url + '/' + + return url + def ensure_repo_not_deleted(cursor: sqlite3.Cursor, repo_id: str) -> None: cursor.execute( @@ -73,6 +85,53 @@ def ensure_repo_not_deleted(cursor: sqlite3.Cursor, repo_id: str) -> None: raise st.MissingItemError() +def sync_remote_repo_definitions(repo_url: str, dest: Path) -> None: + try: + list_all_response = requests.get(urljoin(repo_url, 'list_all')) + assert list_all_response.ok + + list_instance = list_all_response.json() + except: + raise st.RepoCommunicationError() + + try: + json_instances.validate_instance( + list_instance, + 'api_package_list-{}.schema.json' + ) + except json_instances.UnknownSchemaError: + raise st.RepoApiVersionUnsupported() + except: + raise st.RepoCommunicationError() + + ref: dict[str, t.Any] + + for item_type_name in ('resource', 'mapping'): + for ref in list_instance[item_type_name + 's']: + ver = versions.version_string(versions.normalize(ref['version'])) + item_rel_path = f'{item_type_name}/{ref["identifier"]}/{ver}' + + try: + item_response = requests.get(urljoin(repo_url, item_rel_path)) + assert item_response.ok + except: + raise st.RepoCommunicationError() + + item_path = dest / item_rel_path + item_path.parent.mkdir(parents=True, exist_ok=True) + item_path.write_bytes(item_response.content) + + +@dc.dataclass(frozen=True) +class RemoteFileResolver(_operations.FileResolver): + repo_url: str + + def by_sha256(self, sha256: str) -> bytes: + response = requests.get(urljoin(self.repo_url, f'file/sha256/{sha256}')) + assert response.ok + return response.content + + def make_repo_display_info( ref: st.RepoRef, name: str, @@ -122,6 +181,37 @@ class ConcreteRepoRef(st.RepoRef): (self.id,) ) + _operations.prune_packages(cursor) + + # For mappings explicitly enabled by the user (+ all mappings they + # recursively depend on) let's make sure that their exact same + # versions will be enabled after the change. + cursor.execute( + ''' + SELECT + iv.definition, r.name, ri.iteration + FROM + mapping_statuses AS ms + JOIN item_versions AS iv + ON ms.active_version_id = iv.item_version_id + JOIN repo_iterations AS ri + USING (repo_iteration_id) + JOIN repos AS r + USING (repo_id) + WHERE + ms.required + ''' + ) + + requirements = [] + + for definition, repo, iteration in cursor.fetchall(): + info = item_infos.MappingInfo.load(definition, repo, iteration) + req = sds.MappingVersionRequirement(info.identifier, info) + requirements.append(req) + + self.state.recompute_dependencies(requirements) + def update( self, *, @@ -134,7 +224,7 @@ class ConcreteRepoRef(st.RepoRef): if url is None: return - validate_repo_url(url) + url = sanitize_repo_url(url) with self.state.cursor(transaction=True) as cursor: ensure_repo_not_deleted(cursor, self.id) @@ -144,10 +234,30 @@ class ConcreteRepoRef(st.RepoRef): (url, self.id) ) - prune_packages.prune(cursor) - def refresh(self) -> st.RepoIterationRef: - raise NotImplementedError() + with self.state.cursor(transaction=True) as cursor: + ensure_repo_not_deleted(cursor, self.id) + + cursor.execute( + 'SELECT url from repos where repo_id = ?;', + (self.id,) + ) + + (repo_url,), = cursor.fetchall() + + with tempfile.TemporaryDirectory() as tmpdir_str: + tmpdir = Path(tmpdir_str) + sync_remote_repo_definitions(repo_url, tmpdir) + new_iteration_id = _operations.load_packages( + cursor, + tmpdir, + int(self.id), + RemoteFileResolver(repo_url) + ) + + self.state.recompute_dependencies() + + return ConcreteRepoIterationRef(str(new_iteration_id), self.state) def get_display_info(self) -> st.RepoDisplayInfo: with self.state.cursor() as cursor: @@ -199,7 +309,7 @@ class ConcreteRepoStore(st.RepoStore): if repo_name_regex.match(name) is None: raise st.RepoNameInvalid() - validate_repo_url(url) + url = sanitize_repo_url(url) with self.state.cursor(transaction=True) as cursor: cursor.execute( @@ -272,3 +382,8 @@ class ConcreteRepoStore(st.RepoStore): result.append(make_repo_display_info(ref, *rest)) return result + + +@dc.dataclass(frozen=True, unsafe_hash=True) +class ConcreteRepoIterationRef(st.RepoIterationRef): + state: base.HaketiloStateWithFields -- cgit v1.2.3