From 8238435825d01ad2ec1a11b6bcaf6d9a9aad5ab5 Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Mon, 22 Aug 2022 12:52:59 +0200 Subject: allow pulling packages from remote repository --- .../proxy/state_impl/_operations/__init__.py | 9 + .../proxy/state_impl/_operations/load_packages.py | 378 +++++++++++++++++++++ .../proxy/state_impl/_operations/prune_packages.py | 169 +++++++++ .../_operations/recompute_dependencies.py | 223 ++++++++++++ 4 files changed, 779 insertions(+) create mode 100644 src/hydrilla/proxy/state_impl/_operations/__init__.py create mode 100644 src/hydrilla/proxy/state_impl/_operations/load_packages.py create mode 100644 src/hydrilla/proxy/state_impl/_operations/prune_packages.py create mode 100644 src/hydrilla/proxy/state_impl/_operations/recompute_dependencies.py (limited to 'src/hydrilla/proxy/state_impl/_operations') diff --git a/src/hydrilla/proxy/state_impl/_operations/__init__.py b/src/hydrilla/proxy/state_impl/_operations/__init__.py new file mode 100644 index 0000000..c147be4 --- /dev/null +++ b/src/hydrilla/proxy/state_impl/_operations/__init__.py @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior +# +# Available under the terms of Creative Commons Zero v1.0 Universal. + +from .load_packages import load_packages, FileResolver +from .prune_packages import prune_packages +from .recompute_dependencies import _recompute_dependencies_no_state_update diff --git a/src/hydrilla/proxy/state_impl/_operations/load_packages.py b/src/hydrilla/proxy/state_impl/_operations/load_packages.py new file mode 100644 index 0000000..c294ef0 --- /dev/null +++ b/src/hydrilla/proxy/state_impl/_operations/load_packages.py @@ -0,0 +1,378 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Haketilo proxy data and configuration (import of packages from disk files). +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +.... +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +import io +import hashlib +import dataclasses as dc +import typing as t + +from pathlib import Path +from abc import ABC, abstractmethod + +import sqlite3 + +from ....exceptions import HaketiloException +from ....translations import smart_gettext as _ +from .... import versions +from .... import item_infos + + +def make_repo_iteration(cursor: sqlite3.Cursor, repo_id: int) -> int: + cursor.execute( + ''' + SELECT + next_iteration + FROM + repos + WHERE + repo_id = ?; + ''', + (repo_id,) + ) + + (next_iteration,), = cursor.fetchall() + + cursor.execute( + ''' + UPDATE + repos + SET + next_iteration = ? + WHERE + repo_id = ?; + ''', + (next_iteration + 1, repo_id) + ) + + cursor.execute( + ''' + INSERT INTO repo_iterations(repo_id, iteration) + VALUES(?, ?); + ''', + (repo_id, next_iteration) + ) + + cursor.execute( + ''' + SELECT + repo_iteration_id + FROM + repo_iterations + WHERE + repo_id = ? AND iteration = ?; + ''', + (repo_id, next_iteration) + ) + + (repo_iteration_id,), = cursor.fetchall() + + return repo_iteration_id + +def get_or_make_item(cursor: sqlite3.Cursor, type: str, identifier: str) -> int: + type_letter = {'resource': 'R', 'mapping': 'M'}[type] + + cursor.execute( + ''' + INSERT OR IGNORE INTO items(type, identifier) + VALUES(?, ?); + ''', + (type_letter, identifier) + ) + + cursor.execute( + ''' + SELECT + item_id + FROM + items + WHERE + type = ? AND identifier = ?; + ''', + (type_letter, identifier) + ) + + (item_id,), = cursor.fetchall() + + return item_id + +def get_or_make_item_version( + cursor: sqlite3.Cursor, + item_id: int, + repo_iteration_id: int, + version: versions.VerTuple, + definition: bytes +) -> int: + ver_str = versions.version_string(version) + + cursor.execute( + ''' + INSERT OR IGNORE INTO item_versions( + item_id, + version, + repo_iteration_id, + definition + ) + VALUES(?, ?, ?, ?); + ''', + (item_id, ver_str, repo_iteration_id, definition) + ) + + cursor.execute( + ''' + SELECT + item_version_id + FROM + item_versions + WHERE + item_id = ? AND version = ? AND repo_iteration_id = ?; + ''', + (item_id, ver_str, repo_iteration_id) + ) + + (item_version_id,), = cursor.fetchall() + + return item_version_id + +def make_mapping_status(cursor: sqlite3.Cursor, item_id: int) -> None: + cursor.execute( + ''' + INSERT OR IGNORE INTO mapping_statuses(item_id, enabled, required) + VALUES(?, 'N', FALSE); + ''', + (item_id,) + ) + +def get_or_make_file(cursor: sqlite3.Cursor, sha256: str, file_bytes: bytes) \ + -> int: + cursor.execute( + ''' + INSERT OR IGNORE INTO files(sha256, data) + VALUES(?, ?) + ''', + (sha256, file_bytes) + ) + + cursor.execute( + ''' + SELECT + file_id + FROM + files + WHERE + sha256 = ?; + ''', + (sha256,) + ) + + (file_id,), = cursor.fetchall() + + return file_id + +def make_file_use( + cursor: sqlite3.Cursor, + item_version_id: int, + file_id: int, + name: str, + type: str, + mime_type: str, + idx: int +) -> None: + cursor.execute( + ''' + INSERT OR IGNORE INTO file_uses( + item_version_id, + file_id, + name, + type, + mime_type, + idx + ) + VALUES(?, ?, ?, ?, ?, ?); + ''', + (item_version_id, file_id, name, type, mime_type, idx) + ) + +@dc.dataclass(frozen=True) +class _FileInfo: + id: int + is_ascii: bool + +class FileResolver(ABC): + @abstractmethod + def by_sha256(self, sha256: str) -> bytes: + ... + +def _add_item( + cursor: sqlite3.Cursor, + package_file_resolver: FileResolver, + info: item_infos.AnyInfo, + definition: bytes, + repo_iteration_id: int +) -> None: + item_id = get_or_make_item(cursor, info.type_name, info.identifier) + + item_version_id = get_or_make_item_version( + cursor, + item_id, + repo_iteration_id, + info.version, + definition + ) + + if isinstance(info, item_infos.MappingInfo): + make_mapping_status(cursor, item_id) + + file_infos = {} + + file_specifiers = [*info.source_copyright] + if isinstance(info, item_infos.ResourceInfo): + file_specifiers.extend(info.scripts) + + for file_spec in file_specifiers: + file_bytes = package_file_resolver.by_sha256(file_spec.sha256) + + sha256 = hashlib.sha256(file_bytes).digest().hex() + if sha256 != file_spec.sha256: + fmt = _('err.proxy.file_hash_mismatched_{item_identifier}_{file_name}_{expected_sha256}_{actual_sha256}') + msg = fmt.format( + item_identifier = info.identifier, + file_name = file_spec.name, + expected_sha256 = file_spec.sha256, + actual_sha256 = sha256 + ) + raise HaketiloException(msg) + + file_id = get_or_make_file(cursor, sha256, file_bytes) + + file_infos[sha256] = _FileInfo(file_id, file_bytes.isascii()) + + for idx, file_spec in enumerate(info.source_copyright): + file_info = file_infos[file_spec.sha256] + if file_info.is_ascii: + mime = 'text/plain' + else: + mime = 'application/octet-stream' + + make_file_use( + cursor, + item_version_id = item_version_id, + file_id = file_info.id, + name = file_spec.name, + type = 'L', + mime_type = mime, + idx = idx + ) + + if isinstance(info, item_infos.MappingInfo): + return + + for idx, file_spec in enumerate(info.scripts): + file_info = file_infos[file_spec.sha256] + make_file_use( + cursor, + item_version_id = item_version_id, + file_id = file_info.id, + name = file_spec.name, + type = 'W', + mime_type = 'application/javascript', + idx = idx + ) + +AnyInfoVar = t.TypeVar( + 'AnyInfoVar', + item_infos.ResourceInfo, + item_infos.MappingInfo +) + +def _read_items(malcontent_path: Path, item_class: t.Type[AnyInfoVar]) \ + -> t.Iterator[tuple[AnyInfoVar, bytes]]: + item_type_path = malcontent_path / item_class.type_name + if not item_type_path.is_dir(): + return + + for item_path in item_type_path.iterdir(): + if not item_path.is_dir(): + continue + + for item_version_path in item_path.iterdir(): + definition = item_version_path.read_bytes() + item_info = item_class.load(definition) + + assert item_info.identifier == item_path.name + assert versions.version_string(item_info.version) == \ + item_version_path.name + + yield item_info, definition + +@dc.dataclass(frozen=True) +class MalcontentFileResolver(FileResolver): + malcontent_dir_path: Path + + def by_sha256(self, sha256: str) -> bytes: + file_path = self.malcontent_dir_path / 'file' / 'sha256' / sha256 + if not file_path.is_file(): + fmt = _('err.proxy.file_missing_{sha256}') + raise HaketiloException(fmt.format(sha256=sha256)) + + return file_path.read_bytes() + +def load_packages( + cursor: sqlite3.Cursor, + malcontent_path: Path, + repo_id: int, + package_file_resolver: t.Optional[FileResolver] = None +) -> int: + if package_file_resolver is None: + package_file_resolver = MalcontentFileResolver(malcontent_path) + + repo_iteration_id = make_repo_iteration(cursor, repo_id) + + types: t.Iterable[t.Type[item_infos.AnyInfo]] = \ + [item_infos.ResourceInfo, item_infos.MappingInfo] + + for info_type in types: + info: item_infos.AnyInfo + + for info, definition in _read_items( # type: ignore + malcontent_path, + info_type + ): + _add_item( + cursor, + package_file_resolver, + info, + definition, + repo_iteration_id + ) + + return repo_iteration_id diff --git a/src/hydrilla/proxy/state_impl/_operations/prune_packages.py b/src/hydrilla/proxy/state_impl/_operations/prune_packages.py new file mode 100644 index 0000000..9c2b1d7 --- /dev/null +++ b/src/hydrilla/proxy/state_impl/_operations/prune_packages.py @@ -0,0 +1,169 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Haketilo proxy data and configuration (removal of packages that are not used). +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +.... +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +import sqlite3 + +from pathlib import Path + + +_remove_mapping_versions_sqls = [ + ''' + CREATE TEMPORARY TABLE removed_mappings( + item_version_id INTEGER PRIMARY KEY + ); + ''', ''' + INSERT INTO + removed_mappings + SELECT + iv.item_version_id + FROM + item_versions AS iv + JOIN items AS i USING (item_id) + JOIN mapping_statuses AS ms USING (item_id) + JOIN orphan_iterations AS oi USING (repo_iteration_id) + WHERE + NOT ms.required; + ''', ''' + UPDATE + mapping_statuses + SET + active_version_id = NULL + WHERE + active_version_id IN removed_mappings; + ''', ''' + DELETE FROM + item_versions + WHERE + item_version_id IN removed_mappings; + ''', ''' + DROP TABLE removed_mappings; + ''' +] + +_remove_resource_versions_sql = ''' +WITH removed_resources AS ( + SELECT + iv.item_version_id + FROM + item_versions AS iv + JOIN items AS i + USING (item_id) + JOIN orphan_iterations AS oi + USING (repo_iteration_id) + LEFT JOIN resolved_depended_resources AS rdr + ON rdr.resource_item_id = iv.item_version_id + WHERE + rdr.payload_id IS NULL +) +DELETE FROM + item_versions +WHERE + item_version_id IN removed_resources; +''' + +_remove_items_sql = ''' +WITH removed_items AS ( + SELECT + i.item_id + FROM + items AS i + LEFT JOIN item_versions AS iv USING (item_id) + LEFT JOIN mapping_statuses AS ms USING (item_id) + WHERE + iv.item_version_id IS NULL AND + i.type = 'R' OR ms.enabled = 'N' +) +DELETE FROM + items +WHERE + item_id IN removed_items; +''' + +_remove_files_sql = ''' +WITH removed_files AS ( + SELECT + f.file_id + FROM + files AS f + LEFT JOIN file_uses AS fu USING (file_id) + WHERE + fu.file_use_id IS NULL +) +DELETE FROM + files +WHERE + file_id IN removed_files; +''' + +_remove_repo_iterations_sql = ''' +WITH removed_iterations AS ( + SELECT + oi.repo_iteration_id + FROM + orphan_iterations AS oi + LEFT JOIN item_versions AS iv USING (repo_iteration_id) + WHERE + iv.item_version_id IS NULL +) +DELETE FROM + repo_iterations +WHERE + repo_iteration_id IN removed_iterations; +''' + +_remove_repos_sql = ''' +WITH removed_repos AS ( + SELECT + r.repo_id + FROM + repos AS r + LEFT JOIN repo_iterations AS ri USING (repo_id) + WHERE + r.deleted AND ri.repo_iteration_id IS NULL AND r.repo_id != 1 +) +DELETE FROM + repos +WHERE + repo_id IN removed_repos; +''' + +def prune_packages(cursor: sqlite3.Cursor) -> None: + assert cursor.connection.in_transaction + + for sql in _remove_mapping_versions_sqls: + cursor.execute(sql) + cursor.execute(_remove_resource_versions_sql) + cursor.execute(_remove_items_sql) + cursor.execute(_remove_files_sql) + cursor.execute(_remove_repo_iterations_sql) + cursor.execute(_remove_repos_sql) diff --git a/src/hydrilla/proxy/state_impl/_operations/recompute_dependencies.py b/src/hydrilla/proxy/state_impl/_operations/recompute_dependencies.py new file mode 100644 index 0000000..4093f12 --- /dev/null +++ b/src/hydrilla/proxy/state_impl/_operations/recompute_dependencies.py @@ -0,0 +1,223 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Haketilo proxy data and configuration (update of dependency tree in the db). +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +.... +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +import typing as t + +import sqlite3 + +from .... import item_infos +from ... import simple_dependency_satisfying as sds + + +AnyInfoVar = t.TypeVar( + 'AnyInfoVar', + item_infos.ResourceInfo, + item_infos.MappingInfo +) + +def get_infos_of_type(cursor: sqlite3.Cursor, info_type: t.Type[AnyInfoVar],) \ + -> t.Mapping[int, AnyInfoVar]: + cursor.execute( + ''' + SELECT + i.item_id, iv.definition, r.name, ri.iteration + FROM + item_versions AS iv + JOIN items AS i USING (item_id) + JOIN repo_iterations AS ri USING (repo_iteration_id) + JOIN repos AS r USING (repo_id) + WHERE + i.type = ?; + ''', + (info_type.type_name[0].upper(),) + ) + + result: dict[int, AnyInfoVar] = {} + + for item_id, definition, repo_name, repo_iteration in cursor.fetchall(): + info = info_type.load(definition, repo_name, repo_iteration) + result[item_id] = info + + return result + +def _recompute_dependencies_no_state_update( + cursor: sqlite3.Cursor, + extra_requirements: t.Iterable[sds.MappingRequirement] +) -> None: + cursor.execute('DELETE FROM payloads;') + + ids_to_resources = get_infos_of_type(cursor, item_infos.ResourceInfo) + ids_to_mappings = get_infos_of_type(cursor, item_infos.MappingInfo) + + resources = ids_to_resources.items() + resources_to_ids = dict((info.identifier, id) for id, info in resources) + + mappings = ids_to_mappings.items() + mappings_to_ids = dict((info.identifier, id) for id, info in mappings) + + requirements = [*extra_requirements] + + cursor.execute( + ''' + SELECT + i.identifier + FROM + mapping_statuses AS ms + JOIN items AS i USING(item_id) + WHERE + ms.enabled = 'E' AND ms.frozen = 'N'; + ''' + ) + + for mapping_identifier, in cursor.fetchall(): + requirements.append(sds.MappingRequirement(mapping_identifier)) + + cursor.execute( + ''' + SELECT + active_version_id, frozen + FROM + mapping_statuses + WHERE + enabled = 'E' AND frozen IN ('R', 'E'); + ''' + ) + + for active_version_id, frozen in cursor.fetchall(): + info = ids_to_mappings[active_version_id] + + requirement: sds.MappingRequirement + + if frozen == 'R': + requirement = sds.MappingRepoRequirement(info.identifier, info.repo) + else: + requirement = sds.MappingVersionRequirement(info.identifier, info) + + requirements.append(requirement) + + mapping_choices = sds.compute_payloads( + ids_to_resources.values(), + ids_to_mappings.values(), + requirements + ) + + cursor.execute( + ''' + UPDATE + mapping_statuses + SET + required = FALSE, + active_version_id = NULL + WHERE + enabled != 'E'; + ''' + ) + + cursor.execute('DELETE FROM payloads;') + + for choice in mapping_choices.values(): + mapping_ver_id = mappings_to_ids[choice.info.identifier] + + cursor.execute( + ''' + SELECT + item_id + FROM + item_versions + WHERE + item_version_id = ?; + ''', + (mapping_ver_id,) + ) + + (mapping_item_id,), = cursor.fetchall() + + cursor.execute( + ''' + UPDATE + mapping_statuses + SET + required = ?, + active_version_id = ? + WHERE + item_id = ?; + ''', + (choice.required, mapping_ver_id, mapping_item_id) + ) + + for num, (pattern, payload) in enumerate(choice.payloads.items()): + cursor.execute( + ''' + INSERT INTO payloads( + mapping_item_id, + pattern, + eval_allowed, + cors_bypass_allowed + ) + VALUES (?, ?, ?, ?); + ''', + ( + mapping_ver_id, + pattern.orig_url, + payload.allows_eval, + payload.allows_cors_bypass + ) + ) + + cursor.execute( + ''' + SELECT + payload_id + FROM + payloads + WHERE + mapping_item_id = ? AND pattern = ?; + ''', + (mapping_ver_id, pattern.orig_url) + ) + + (payload_id,), = cursor.fetchall() + + for res_num, resource_info in enumerate(payload.resources): + resource_ver_id = resources_to_ids[resource_info.identifier] + cursor.execute( + ''' + INSERT INTO resolved_depended_resources( + payload_id, + resource_item_id, + idx + ) + VALUES(?, ?, ?); + ''', + (payload_id, resource_ver_id, res_num) + ) -- cgit v1.2.3