From d516b9decad07b940b3cd117fc4e353dd8bbe7d2 Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Wed, 24 Aug 2022 10:47:33 +0200 Subject: make repo packages (mappings) load as uninstalled; make them installable through the web UI --- .../proxy/state_impl/_operations/__init__.py | 3 +- .../proxy/state_impl/_operations/load_packages.py | 124 ++++++++------------- .../proxy/state_impl/_operations/prune_packages.py | 20 ---- .../state_impl/_operations/pull_missing_files.py | 113 +++++++++++++++++++ .../_operations/recompute_dependencies.py | 64 ++--------- 5 files changed, 176 insertions(+), 148 deletions(-) create mode 100644 src/hydrilla/proxy/state_impl/_operations/pull_missing_files.py (limited to 'src/hydrilla/proxy/state_impl/_operations') diff --git a/src/hydrilla/proxy/state_impl/_operations/__init__.py b/src/hydrilla/proxy/state_impl/_operations/__init__.py index c147be4..ff34b0b 100644 --- a/src/hydrilla/proxy/state_impl/_operations/__init__.py +++ b/src/hydrilla/proxy/state_impl/_operations/__init__.py @@ -4,6 +4,7 @@ # # Available under the terms of Creative Commons Zero v1.0 Universal. -from .load_packages import load_packages, FileResolver from .prune_packages import prune_packages +from .pull_missing_files import pull_missing_files +from .load_packages import _load_packages_no_state_update from .recompute_dependencies import _recompute_dependencies_no_state_update diff --git a/src/hydrilla/proxy/state_impl/_operations/load_packages.py b/src/hydrilla/proxy/state_impl/_operations/load_packages.py index 78e8024..16d1154 100644 --- a/src/hydrilla/proxy/state_impl/_operations/load_packages.py +++ b/src/hydrilla/proxy/state_impl/_operations/load_packages.py @@ -32,20 +32,19 @@ from __future__ import annotations import io -import hashlib +import mimetypes +import sqlite3 import dataclasses as dc import typing as t -from pathlib import Path -from abc import ABC, abstractmethod - -import sqlite3 +from pathlib import Path, PurePosixPath -from ....exceptions import HaketiloException -from ....translations import smart_gettext as _ from .... import versions from .... import item_infos - +from ... import state +from .recompute_dependencies import _recompute_dependencies_no_state_update, \ + FileResolver +from .prune_packages import prune_packages def make_repo_iteration(cursor: sqlite3.Cursor, repo_id: int) -> int: cursor.execute( @@ -128,8 +127,9 @@ def get_or_make_item(cursor: sqlite3.Cursor, type: str, identifier: str) -> int: def make_item_version( cursor: sqlite3.Cursor, item_id: int, - repo_iteration_id: int, version: versions.VerTuple, + installed: str, + repo_iteration_id: int, definition: bytes ) -> int: ver_str = versions.version_string(version) @@ -143,9 +143,9 @@ def make_item_version( repo_iteration_id, definition ) - VALUES(?, ?, 'I', ?, ?); + VALUES(?, ?, ?, ?, ?); ''', - (item_id, ver_str, repo_iteration_id, definition) + (item_id, ver_str, installed, repo_iteration_id, definition) ) cursor.execute( @@ -173,27 +173,10 @@ def make_mapping_status(cursor: sqlite3.Cursor, item_id: int) -> None: (item_id,) ) -def get_or_make_file(cursor: sqlite3.Cursor, sha256: str, file_bytes: bytes) \ - -> int: - cursor.execute( - ''' - INSERT OR IGNORE INTO files(sha256, data) - VALUES(?, ?) - ''', - (sha256, file_bytes) - ) +def get_or_make_file(cursor: sqlite3.Cursor, sha256: str) -> int: + cursor.execute('INSERT OR IGNORE INTO files(sha256) VALUES(?);', (sha256,)) - cursor.execute( - ''' - SELECT - file_id - FROM - files - WHERE - sha256 = ?; - ''', - (sha256,) - ) + cursor.execute('SELECT file_id FROM files WHERE sha256 = ?;', (sha256,)) (file_id,), = cursor.fetchall() @@ -225,20 +208,15 @@ def make_file_use( @dc.dataclass(frozen=True) class _FileInfo: - id: int - is_ascii: bool - -class FileResolver(ABC): - @abstractmethod - def by_sha256(self, sha256: str) -> bytes: - ... + id: int + extension: str def _add_item( cursor: sqlite3.Cursor, - package_file_resolver: FileResolver, info: item_infos.AnyInfo, definition: bytes, - repo_iteration_id: int + repo_iteration_id: int, + repo_id: int ) -> None: item_id = get_or_make_item(cursor, info.type_name, info.identifier) @@ -246,11 +224,12 @@ def _add_item( make_mapping_status(cursor, item_id) item_version_id = make_item_version( - cursor, - item_id, - repo_iteration_id, - info.version, - definition + cursor = cursor, + item_id = item_id, + version = info.version, + installed = 'I' if repo_id == 1 else 'N', + repo_iteration_id = repo_iteration_id, + definition = definition ) file_infos = {} @@ -260,29 +239,24 @@ def _add_item( file_specifiers.extend(info.scripts) for file_spec in file_specifiers: - file_bytes = package_file_resolver.by_sha256(file_spec.sha256) - - sha256 = hashlib.sha256(file_bytes).digest().hex() - if sha256 != file_spec.sha256: - fmt = _('err.proxy.file_hash_mismatched_{item_identifier}_{file_name}_{expected_sha256}_{actual_sha256}') - msg = fmt.format( - item_identifier = info.identifier, - file_name = file_spec.name, - expected_sha256 = file_spec.sha256, - actual_sha256 = sha256 - ) - raise HaketiloException(msg) + file_id = get_or_make_file(cursor, file_spec.sha256) - file_id = get_or_make_file(cursor, sha256, file_bytes) + suffix = PurePosixPath(file_spec.name).suffix - file_infos[sha256] = _FileInfo(file_id, file_bytes.isascii()) + file_infos[file_spec.sha256] = _FileInfo(file_id, suffix) for idx, file_spec in enumerate(info.source_copyright): file_info = file_infos[file_spec.sha256] - if file_info.is_ascii: - mime = 'text/plain' - else: + + mime = mimetypes.types_map.get(file_info.extension) + if mime is None: + mime = mimetypes.common_types.get(file_info.extension) + if mime is None: mime = 'application/octet-stream' + if mime is None and file_info.extension == '.spdx': + # We don't know of any estabilished mime type for tag-value SPDX + # reports. Let's use the following for now. + mime = 'text/spdx' make_file_use( cursor, @@ -342,22 +316,17 @@ class MalcontentFileResolver(FileResolver): def by_sha256(self, sha256: str) -> bytes: file_path = self.malcontent_dir_path / 'file' / 'sha256' / sha256 if not file_path.is_file(): - fmt = _('err.proxy.file_missing_{sha256}') - raise HaketiloException(fmt.format(sha256=sha256)) + raise state.FileMissingError(repo_id='1', sha256=sha256) return file_path.read_bytes() -def load_packages( +def _load_packages_no_state_update( cursor: sqlite3.Cursor, malcontent_path: Path, - repo_id: int, - package_file_resolver: t.Optional[FileResolver] = None + repo_id: int ) -> int: assert cursor.connection.in_transaction - if package_file_resolver is None: - package_file_resolver = MalcontentFileResolver(malcontent_path) - repo_iteration_id = make_repo_iteration(cursor, repo_id) types: t.Iterable[t.Type[item_infos.AnyInfo]] = \ @@ -371,11 +340,16 @@ def load_packages( info_type ): _add_item( - cursor, - package_file_resolver, - info, - definition, - repo_iteration_id + cursor = cursor, + info = info, + definition = definition, + repo_iteration_id = repo_iteration_id, + repo_id = repo_id ) + _recompute_dependencies_no_state_update( + cursor = cursor, + semirepo_file_resolver = MalcontentFileResolver(malcontent_path) + ) + return repo_iteration_id diff --git a/src/hydrilla/proxy/state_impl/_operations/prune_packages.py b/src/hydrilla/proxy/state_impl/_operations/prune_packages.py index eb0539c..6f4b3e7 100644 --- a/src/hydrilla/proxy/state_impl/_operations/prune_packages.py +++ b/src/hydrilla/proxy/state_impl/_operations/prune_packages.py @@ -137,28 +137,8 @@ WHERE def prune_packages(cursor: sqlite3.Cursor) -> None: assert cursor.connection.in_transaction - print('VERSIONS TO DELETE', cursor.execute(''' -SELECT - iv.item_version_id - FROM - item_versions AS iv - JOIN orphan_iterations AS oi USING (repo_iteration_id) - WHERE - iv.installed != 'I'; -''').fetchall()) for sql in _remove_item_versions_sqls: cursor.execute(sql) - print('ITEMS TO DELETE', cursor.execute(''' -SELECT - i.item_id - FROM - items AS i - LEFT JOIN item_versions AS iv USING (item_id) - LEFT JOIN mapping_statuses AS ms USING (item_id) - WHERE - iv.item_version_id IS NULL AND - (i.type = 'R' OR ms.enabled = 'N'); -''').fetchall()) cursor.execute(_remove_items_sql) cursor.execute(_remove_files_sql) cursor.execute(_remove_repo_iterations_sql) diff --git a/src/hydrilla/proxy/state_impl/_operations/pull_missing_files.py b/src/hydrilla/proxy/state_impl/_operations/pull_missing_files.py new file mode 100644 index 0000000..04a2910 --- /dev/null +++ b/src/hydrilla/proxy/state_impl/_operations/pull_missing_files.py @@ -0,0 +1,113 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Haketilo proxy data and configuration (download of package files). +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +""" +.... +""" + +# Enable using with Python 3.7. +from __future__ import annotations + +import sqlite3 +import hashlib + +from abc import ABC, abstractmethod +from pathlib import Path +from urllib.parse import urljoin + +import requests + +from ... import state + + +class FileResolver(ABC): + @abstractmethod + def by_sha256(self, sha256: str) -> bytes: + ... + +class DummyFileResolver(FileResolver): + def by_sha256(self, sha256: str) -> bytes: + raise NotImplementedError() + +def pull_missing_files( + cursor: sqlite3.Cursor, + semirepo_file_resolver: FileResolver = DummyFileResolver() +) -> None: + cursor.execute( + ''' + SELECT DISTINCT + f.file_id, f.sha256, + r.repo_id, r.url + FROM + repos AS R + JOIN repo_iterations AS ri USING (repo_id) + JOIN item_versions AS iv USING (repo_iteration_id) + JOIN file_uses AS fu USING (item_version_id) + JOIN files AS f USING (file_id) + WHERE + iv.installed = 'I' AND f.data IS NULL; + ''' + ) + + rows = cursor.fetchall() + + for file_id, sha256, repo_id, repo_url in rows: + if repo_id == 1: + file_bytes = semirepo_file_resolver.by_sha256(sha256) + else: + try: + url = urljoin(repo_url, f'file/sha256/{sha256}') + response = requests.get(url) + except: + raise state.RepoCommunicationError() + + if not response.ok: + raise state.FileMissingError( + repo_id = str(repo_id), + sha256 = sha256 + ) + + file_bytes = response.content + + computed_sha256 = hashlib.sha256(file_bytes).digest().hex() + if computed_sha256 != sha256: + raise state.FileIntegrityError( + repo_id = str(repo_id), + sha256 = sha256, + invalid_sha256 = computed_sha256 + ) + + cursor.execute( + ''' + UPDATE + files + SET + data = ? + WHERE + file_id = ?; + ''', + (file_bytes, file_id) + ) diff --git a/src/hydrilla/proxy/state_impl/_operations/recompute_dependencies.py b/src/hydrilla/proxy/state_impl/_operations/recompute_dependencies.py index 2b18a51..494d130 100644 --- a/src/hydrilla/proxy/state_impl/_operations/recompute_dependencies.py +++ b/src/hydrilla/proxy/state_impl/_operations/recompute_dependencies.py @@ -34,13 +34,10 @@ from __future__ import annotations import sqlite3 import typing as t -from urllib.parse import urlparse, urljoin - -import requests - from .... import item_infos from ... import simple_dependency_satisfying as sds -from ... import state +from .pull_missing_files import pull_missing_files, FileResolver, \ + DummyFileResolver AnyInfoVar = t.TypeVar( @@ -165,7 +162,8 @@ def _recompute_dependencies_no_state_update_no_pull_files( for choice in mapping_choices.values(): mapping_ver_id = mappings_to_ids[choice.info.identifier] - _mark_version_installed(cursor, mapping_ver_id) + if choice.required: + _mark_version_installed(cursor, mapping_ver_id) cursor.execute( ''' @@ -200,7 +198,7 @@ def _recompute_dependencies_no_state_update_no_pull_files( INSERT INTO payloads( mapping_item_id, pattern, - eval_allowed, + eval_allowed, cors_bypass_allowed ) VALUES (?, ?, ?, ?); @@ -230,7 +228,8 @@ def _recompute_dependencies_no_state_update_no_pull_files( for res_num, resource_info in enumerate(payload.resources): resource_ver_id = resources_to_ids[resource_info.identifier] - _mark_version_installed(cursor, resource_ver_id) + if choice.required: + _mark_version_installed(cursor, resource_ver_id) cursor.execute( ''' @@ -244,53 +243,14 @@ def _recompute_dependencies_no_state_update_no_pull_files( (payload_id, resource_ver_id, res_num) ) -def _pull_missing_files(cursor: sqlite3.Cursor) -> None: - cursor.execute( - ''' - SELECT DISTINCT - f.file_id, f.sha256, - r.repo_id, r.url - FROM - repos AS R - JOIN repo_iterations AS ri USING (repo_id) - JOIN item_versions AS iv USING (repo_iteration_id) - JOIN file_uses AS fu USING (item_version_id) - JOIN files AS f USING (file_id) - WHERE - iv.installed = 'I' AND f.data IS NULL; - ''' - ) - - rows = cursor.fetchall() - - for file_id, sha256, repo_id, repo_url in rows: - try: - response = requests.get(urljoin(repo_url, f'file/sha256/{sha256}')) - assert response.ok - except: - raise state.FileInstallationError( - repo_id = str(repo_id), - sha256 = sha256 - ) - - cursor.execute( - ''' - UPDATE - files - SET - data = ? - WHERE - file_id = ?; - ''', - (response.content, file_id) - ) - def _recompute_dependencies_no_state_update( - cursor: sqlite3.Cursor, - extra_requirements: t.Iterable[sds.MappingRequirement] + cursor: sqlite3.Cursor, + extra_requirements: t.Iterable[sds.MappingRequirement] = (), + semirepo_file_resolver: FileResolver = DummyFileResolver() ) -> None: _recompute_dependencies_no_state_update_no_pull_files( cursor, extra_requirements ) - _pull_missing_files(cursor) + + pull_missing_files(cursor, semirepo_file_resolver) -- cgit v1.2.3