diff options
Diffstat (limited to 'src/hydrilla/proxy/state_impl/_operations/load_packages.py')
-rw-r--r-- | src/hydrilla/proxy/state_impl/_operations/load_packages.py | 410 |
1 files changed, 410 insertions, 0 deletions
diff --git a/src/hydrilla/proxy/state_impl/_operations/load_packages.py b/src/hydrilla/proxy/state_impl/_operations/load_packages.py new file mode 100644 index 0000000..288ee5b --- /dev/null +++ b/src/hydrilla/proxy/state_impl/_operations/load_packages.py @@ -0,0 +1,410 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Haketilo proxy data and configuration (import of packages from disk files). +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use of this +# code in a proprietary program, I am not going to enforce this in +# court. + +""" +.... +""" + +import io +import mimetypes +import sqlite3 +import hashlib +import dataclasses as dc +import typing as t + +from pathlib import Path, PurePosixPath + +from .... import versions +from .... import item_infos +from ... import state +from .recompute_dependencies import _recompute_dependencies_no_state_update, \ + FileResolver +from .prune_orphans import prune_orphans + +def make_repo_iteration(cursor: sqlite3.Cursor, repo_id: int) -> int: + cursor.execute( + ''' + SELECT + next_iteration + FROM + repos + WHERE + repo_id = ?; + ''', + (repo_id,) + ) + + (next_iteration,), = cursor.fetchall() + + cursor.execute( + ''' + INSERT INTO repo_iterations(repo_id, iteration) + VALUES(?, ?); + ''', + (repo_id, next_iteration) + ) + + cursor.execute( + ''' + SELECT + repo_iteration_id + FROM + repo_iterations + WHERE + repo_id = ? AND iteration = ?; + ''', + (repo_id, next_iteration) + ) + + (repo_iteration_id,), = cursor.fetchall() + + cursor.execute( + ''' + UPDATE + repos + SET + next_iteration = ?, + active_iteration_id = ( + CASE + WHEN repo_id = 1 THEN NULL + ELSE ? + END + ), + last_refreshed = ( + CASE + WHEN repo_id = 1 THEN NULL + ELSE STRFTIME('%s', 'NOW') + END + ) + WHERE + repo_id = ?; + ''', + (next_iteration + 1, repo_iteration_id, repo_id) + ) + + return repo_iteration_id + +def get_or_make_item(cursor: sqlite3.Cursor, type: str, identifier: str) -> int: + type_letter = {'resource': 'R', 'mapping': 'M'}[type] + + cursor.execute( + ''' + INSERT OR IGNORE INTO items(type, identifier) + VALUES(?, ?); + ''', + (type_letter, identifier) + ) + + cursor.execute( + ''' + SELECT + item_id + FROM + items + WHERE + type = ? AND identifier = ?; + ''', + (type_letter, identifier) + ) + + (item_id,), = cursor.fetchall() + + return item_id + +def update_or_make_item_version( + cursor: sqlite3.Cursor, + item_id: int, + version: versions.VerTuple, + installed: str, + repo_iteration_id: int, + repo_id: int, + definition: bytes +) -> int: + ver_str = versions.version_string(version) + + definition_sha256 = hashlib.sha256(definition).digest().hex() + + cursor.execute( + ''' + SELECT + item_version_id + FROM + item_versions AS iv + JOIN repo_iterations AS ri USING (repo_iteration_id) + JOIN repos AS r USING (repo_id) + WHERE + r.repo_id = ? AND iv.definition_sha256 = ?; + ''', + (repo_id, definition_sha256) + ) + + rows = cursor.fetchall() + + if rows != []: + (item_version_id,), = rows + cursor.execute( + ''' + UPDATE + item_versions + SET + installed = ( + CASE + WHEN installed = 'I' OR ? = 'I' THEN 'I' + ELSE 'N' + END + ), + repo_iteration_id = ? + WHERE + item_version_id = ?; + ''', + (installed, repo_iteration_id, item_version_id) + ) + + return item_version_id + + cursor.execute( + ''' + INSERT INTO item_versions( + item_id, + version, + installed, + repo_iteration_id, + definition, + definition_sha256 + ) + VALUES(?, ?, ?, ?, ?, ?); + ''', + (item_id, ver_str, installed, repo_iteration_id, definition, + definition_sha256) + ) + + cursor.execute( + ''' + SELECT + item_version_id + FROM + item_versions + WHERE + item_id = ? AND version = ? AND repo_iteration_id = ?; + ''', + (item_id, ver_str, repo_iteration_id) + ) + + (item_version_id,), = cursor.fetchall() + + return item_version_id + +def make_mapping_status(cursor: sqlite3.Cursor, item_id: int) -> None: + cursor.execute( + 'INSERT OR IGNORE INTO mapping_statuses(item_id) VALUES(?);', + (item_id,) + ) + +def get_or_make_file(cursor: sqlite3.Cursor, sha256: str) -> int: + cursor.execute('INSERT OR IGNORE INTO files(sha256) VALUES(?);', (sha256,)) + + cursor.execute('SELECT file_id FROM files WHERE sha256 = ?;', (sha256,)) + + (file_id,), = cursor.fetchall() + + return file_id + +def make_file_use( + cursor: sqlite3.Cursor, + item_version_id: int, + file_id: int, + name: str, + type: str, + mime_type: str, + idx: int +) -> None: + cursor.execute( + ''' + INSERT OR IGNORE INTO file_uses( + item_version_id, + file_id, + name, + type, + mime_type, + idx + ) + VALUES(?, ?, ?, ?, ?, ?); + ''', + (item_version_id, file_id, name, type, mime_type, idx) + ) + +@dc.dataclass(frozen=True) +class _FileInfo: + id: int + extension: str + +def _add_item( + cursor: sqlite3.Cursor, + info: item_infos.AnyInfo, + definition: bytes, + repo_iteration_id: int, + repo_id: int +) -> None: + item_id = get_or_make_item(cursor, info.type.value, info.identifier) + + if isinstance(info, item_infos.MappingInfo): + make_mapping_status(cursor, item_id) + + item_version_id = update_or_make_item_version( + cursor = cursor, + item_id = item_id, + version = info.version, + installed = 'I' if repo_id == 1 else 'N', + repo_iteration_id = repo_iteration_id, + repo_id = repo_id, + definition = definition + ) + + file_infos = {} + + file_specifiers = [*info.source_copyright] + if isinstance(info, item_infos.ResourceInfo): + file_specifiers.extend(info.scripts) + + for file_spec in file_specifiers: + file_id = get_or_make_file(cursor, file_spec.sha256) + + suffix = PurePosixPath(file_spec.name).suffix + + file_infos[file_spec.sha256] = _FileInfo(file_id, suffix) + + for idx, file_spec in enumerate(info.source_copyright): + file_info = file_infos[file_spec.sha256] + + mime = mimetypes.types_map.get(file_info.extension) + if mime is None: + mime = mimetypes.common_types.get(file_info.extension) + if mime is None: + mime = 'application/octet-stream' + if mime is None and file_info.extension == '.spdx': + # We don't know of any estabilished mime type for tag-value SPDX + # reports. Let's use the following for now. + mime = 'text/spdx' + + make_file_use( + cursor, + item_version_id = item_version_id, + file_id = file_info.id, + name = file_spec.name, + type = 'L', + mime_type = mime, + idx = idx + ) + + if isinstance(info, item_infos.MappingInfo): + return + + for idx, file_spec in enumerate(info.scripts): + file_info = file_infos[file_spec.sha256] + make_file_use( + cursor, + item_version_id = item_version_id, + file_id = file_info.id, + name = file_spec.name, + type = 'W', + mime_type = 'application/javascript', + idx = idx + ) + +AnyInfoVar = t.TypeVar( + 'AnyInfoVar', + item_infos.ResourceInfo, + item_infos.MappingInfo +) + +def _read_items(malcontent_path: Path, info_class: t.Type[AnyInfoVar]) \ + -> t.Iterator[tuple[AnyInfoVar, bytes]]: + item_type_path = malcontent_path / info_class.type.value + if not item_type_path.is_dir(): + return + + for item_path in item_type_path.iterdir(): + if not item_path.is_dir(): + continue + + for item_version_path in item_path.iterdir(): + definition = item_version_path.read_bytes() + item_info = info_class.load(definition) + + assert item_info.identifier == item_path.name + assert versions.version_string(item_info.version) == \ + item_version_path.name + + yield item_info, definition + +@dc.dataclass(frozen=True) +class MalcontentFileResolver(FileResolver): + malcontent_dir_path: Path + + def by_sha256(self, sha256: str) -> bytes: + file_path = self.malcontent_dir_path / 'file' / 'sha256' / sha256 + if not file_path.is_file(): + raise state.FileMissingError(repo_id='1', sha256=sha256) + + return file_path.read_bytes() + +def _load_packages_no_state_update( + cursor: sqlite3.Cursor, + malcontent_path: Path, + repo_id: int +) -> int: + assert cursor.connection.in_transaction + + repo_iteration_id = make_repo_iteration(cursor, repo_id) + + for type in [item_infos.ItemType.RESOURCE, item_infos.ItemType.MAPPING]: + info: item_infos.AnyInfo + for info, definition in _read_items( # type: ignore + malcontent_path, + type.info_class + ): + _add_item( + cursor = cursor, + info = info, + definition = definition, + repo_iteration_id = repo_iteration_id, + repo_id = repo_id + ) + + if repo_id != 1: + # In case of local semirepo (repo_id = 1) all packages from previous + # iteration are already orphans and can be assumed to be in a pruned + # state no matter what. + prune_orphans(cursor) + + _recompute_dependencies_no_state_update( + cursor = cursor, + unlocked_required_mappings = [], + semirepo_file_resolver = MalcontentFileResolver(malcontent_path) + ) + + return repo_iteration_id |