summaryrefslogtreecommitdiff
path: root/src/hydrilla/proxy/state_impl/_operations
diff options
context:
space:
mode:
Diffstat (limited to 'src/hydrilla/proxy/state_impl/_operations')
-rw-r--r--src/hydrilla/proxy/state_impl/_operations/__init__.py9
-rw-r--r--src/hydrilla/proxy/state_impl/_operations/load_packages.py378
-rw-r--r--src/hydrilla/proxy/state_impl/_operations/prune_packages.py169
-rw-r--r--src/hydrilla/proxy/state_impl/_operations/recompute_dependencies.py223
4 files changed, 779 insertions, 0 deletions
diff --git a/src/hydrilla/proxy/state_impl/_operations/__init__.py b/src/hydrilla/proxy/state_impl/_operations/__init__.py
new file mode 100644
index 0000000..c147be4
--- /dev/null
+++ b/src/hydrilla/proxy/state_impl/_operations/__init__.py
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: CC0-1.0
+
+# Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org>
+#
+# Available under the terms of Creative Commons Zero v1.0 Universal.
+
+from .load_packages import load_packages, FileResolver
+from .prune_packages import prune_packages
+from .recompute_dependencies import _recompute_dependencies_no_state_update
diff --git a/src/hydrilla/proxy/state_impl/_operations/load_packages.py b/src/hydrilla/proxy/state_impl/_operations/load_packages.py
new file mode 100644
index 0000000..c294ef0
--- /dev/null
+++ b/src/hydrilla/proxy/state_impl/_operations/load_packages.py
@@ -0,0 +1,378 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# Haketilo proxy data and configuration (import of packages from disk files).
+#
+# This file is part of Hydrilla&Haketilo.
+#
+# Copyright (C) 2022 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+"""
+....
+"""
+
+# Enable using with Python 3.7.
+from __future__ import annotations
+
+import io
+import hashlib
+import dataclasses as dc
+import typing as t
+
+from pathlib import Path
+from abc import ABC, abstractmethod
+
+import sqlite3
+
+from ....exceptions import HaketiloException
+from ....translations import smart_gettext as _
+from .... import versions
+from .... import item_infos
+
+
+def make_repo_iteration(cursor: sqlite3.Cursor, repo_id: int) -> int:
+ cursor.execute(
+ '''
+ SELECT
+ next_iteration
+ FROM
+ repos
+ WHERE
+ repo_id = ?;
+ ''',
+ (repo_id,)
+ )
+
+ (next_iteration,), = cursor.fetchall()
+
+ cursor.execute(
+ '''
+ UPDATE
+ repos
+ SET
+ next_iteration = ?
+ WHERE
+ repo_id = ?;
+ ''',
+ (next_iteration + 1, repo_id)
+ )
+
+ cursor.execute(
+ '''
+ INSERT INTO repo_iterations(repo_id, iteration)
+ VALUES(?, ?);
+ ''',
+ (repo_id, next_iteration)
+ )
+
+ cursor.execute(
+ '''
+ SELECT
+ repo_iteration_id
+ FROM
+ repo_iterations
+ WHERE
+ repo_id = ? AND iteration = ?;
+ ''',
+ (repo_id, next_iteration)
+ )
+
+ (repo_iteration_id,), = cursor.fetchall()
+
+ return repo_iteration_id
+
+def get_or_make_item(cursor: sqlite3.Cursor, type: str, identifier: str) -> int:
+ type_letter = {'resource': 'R', 'mapping': 'M'}[type]
+
+ cursor.execute(
+ '''
+ INSERT OR IGNORE INTO items(type, identifier)
+ VALUES(?, ?);
+ ''',
+ (type_letter, identifier)
+ )
+
+ cursor.execute(
+ '''
+ SELECT
+ item_id
+ FROM
+ items
+ WHERE
+ type = ? AND identifier = ?;
+ ''',
+ (type_letter, identifier)
+ )
+
+ (item_id,), = cursor.fetchall()
+
+ return item_id
+
+def get_or_make_item_version(
+ cursor: sqlite3.Cursor,
+ item_id: int,
+ repo_iteration_id: int,
+ version: versions.VerTuple,
+ definition: bytes
+) -> int:
+ ver_str = versions.version_string(version)
+
+ cursor.execute(
+ '''
+ INSERT OR IGNORE INTO item_versions(
+ item_id,
+ version,
+ repo_iteration_id,
+ definition
+ )
+ VALUES(?, ?, ?, ?);
+ ''',
+ (item_id, ver_str, repo_iteration_id, definition)
+ )
+
+ cursor.execute(
+ '''
+ SELECT
+ item_version_id
+ FROM
+ item_versions
+ WHERE
+ item_id = ? AND version = ? AND repo_iteration_id = ?;
+ ''',
+ (item_id, ver_str, repo_iteration_id)
+ )
+
+ (item_version_id,), = cursor.fetchall()
+
+ return item_version_id
+
+def make_mapping_status(cursor: sqlite3.Cursor, item_id: int) -> None:
+ cursor.execute(
+ '''
+ INSERT OR IGNORE INTO mapping_statuses(item_id, enabled, required)
+ VALUES(?, 'N', FALSE);
+ ''',
+ (item_id,)
+ )
+
+def get_or_make_file(cursor: sqlite3.Cursor, sha256: str, file_bytes: bytes) \
+ -> int:
+ cursor.execute(
+ '''
+ INSERT OR IGNORE INTO files(sha256, data)
+ VALUES(?, ?)
+ ''',
+ (sha256, file_bytes)
+ )
+
+ cursor.execute(
+ '''
+ SELECT
+ file_id
+ FROM
+ files
+ WHERE
+ sha256 = ?;
+ ''',
+ (sha256,)
+ )
+
+ (file_id,), = cursor.fetchall()
+
+ return file_id
+
+def make_file_use(
+ cursor: sqlite3.Cursor,
+ item_version_id: int,
+ file_id: int,
+ name: str,
+ type: str,
+ mime_type: str,
+ idx: int
+) -> None:
+ cursor.execute(
+ '''
+ INSERT OR IGNORE INTO file_uses(
+ item_version_id,
+ file_id,
+ name,
+ type,
+ mime_type,
+ idx
+ )
+ VALUES(?, ?, ?, ?, ?, ?);
+ ''',
+ (item_version_id, file_id, name, type, mime_type, idx)
+ )
+
+@dc.dataclass(frozen=True)
+class _FileInfo:
+ id: int
+ is_ascii: bool
+
+class FileResolver(ABC):
+ @abstractmethod
+ def by_sha256(self, sha256: str) -> bytes:
+ ...
+
+def _add_item(
+ cursor: sqlite3.Cursor,
+ package_file_resolver: FileResolver,
+ info: item_infos.AnyInfo,
+ definition: bytes,
+ repo_iteration_id: int
+) -> None:
+ item_id = get_or_make_item(cursor, info.type_name, info.identifier)
+
+ item_version_id = get_or_make_item_version(
+ cursor,
+ item_id,
+ repo_iteration_id,
+ info.version,
+ definition
+ )
+
+ if isinstance(info, item_infos.MappingInfo):
+ make_mapping_status(cursor, item_id)
+
+ file_infos = {}
+
+ file_specifiers = [*info.source_copyright]
+ if isinstance(info, item_infos.ResourceInfo):
+ file_specifiers.extend(info.scripts)
+
+ for file_spec in file_specifiers:
+ file_bytes = package_file_resolver.by_sha256(file_spec.sha256)
+
+ sha256 = hashlib.sha256(file_bytes).digest().hex()
+ if sha256 != file_spec.sha256:
+ fmt = _('err.proxy.file_hash_mismatched_{item_identifier}_{file_name}_{expected_sha256}_{actual_sha256}')
+ msg = fmt.format(
+ item_identifier = info.identifier,
+ file_name = file_spec.name,
+ expected_sha256 = file_spec.sha256,
+ actual_sha256 = sha256
+ )
+ raise HaketiloException(msg)
+
+ file_id = get_or_make_file(cursor, sha256, file_bytes)
+
+ file_infos[sha256] = _FileInfo(file_id, file_bytes.isascii())
+
+ for idx, file_spec in enumerate(info.source_copyright):
+ file_info = file_infos[file_spec.sha256]
+ if file_info.is_ascii:
+ mime = 'text/plain'
+ else:
+ mime = 'application/octet-stream'
+
+ make_file_use(
+ cursor,
+ item_version_id = item_version_id,
+ file_id = file_info.id,
+ name = file_spec.name,
+ type = 'L',
+ mime_type = mime,
+ idx = idx
+ )
+
+ if isinstance(info, item_infos.MappingInfo):
+ return
+
+ for idx, file_spec in enumerate(info.scripts):
+ file_info = file_infos[file_spec.sha256]
+ make_file_use(
+ cursor,
+ item_version_id = item_version_id,
+ file_id = file_info.id,
+ name = file_spec.name,
+ type = 'W',
+ mime_type = 'application/javascript',
+ idx = idx
+ )
+
+AnyInfoVar = t.TypeVar(
+ 'AnyInfoVar',
+ item_infos.ResourceInfo,
+ item_infos.MappingInfo
+)
+
+def _read_items(malcontent_path: Path, item_class: t.Type[AnyInfoVar]) \
+ -> t.Iterator[tuple[AnyInfoVar, bytes]]:
+ item_type_path = malcontent_path / item_class.type_name
+ if not item_type_path.is_dir():
+ return
+
+ for item_path in item_type_path.iterdir():
+ if not item_path.is_dir():
+ continue
+
+ for item_version_path in item_path.iterdir():
+ definition = item_version_path.read_bytes()
+ item_info = item_class.load(definition)
+
+ assert item_info.identifier == item_path.name
+ assert versions.version_string(item_info.version) == \
+ item_version_path.name
+
+ yield item_info, definition
+
+@dc.dataclass(frozen=True)
+class MalcontentFileResolver(FileResolver):
+ malcontent_dir_path: Path
+
+ def by_sha256(self, sha256: str) -> bytes:
+ file_path = self.malcontent_dir_path / 'file' / 'sha256' / sha256
+ if not file_path.is_file():
+ fmt = _('err.proxy.file_missing_{sha256}')
+ raise HaketiloException(fmt.format(sha256=sha256))
+
+ return file_path.read_bytes()
+
+def load_packages(
+ cursor: sqlite3.Cursor,
+ malcontent_path: Path,
+ repo_id: int,
+ package_file_resolver: t.Optional[FileResolver] = None
+) -> int:
+ if package_file_resolver is None:
+ package_file_resolver = MalcontentFileResolver(malcontent_path)
+
+ repo_iteration_id = make_repo_iteration(cursor, repo_id)
+
+ types: t.Iterable[t.Type[item_infos.AnyInfo]] = \
+ [item_infos.ResourceInfo, item_infos.MappingInfo]
+
+ for info_type in types:
+ info: item_infos.AnyInfo
+
+ for info, definition in _read_items( # type: ignore
+ malcontent_path,
+ info_type
+ ):
+ _add_item(
+ cursor,
+ package_file_resolver,
+ info,
+ definition,
+ repo_iteration_id
+ )
+
+ return repo_iteration_id
diff --git a/src/hydrilla/proxy/state_impl/_operations/prune_packages.py b/src/hydrilla/proxy/state_impl/_operations/prune_packages.py
new file mode 100644
index 0000000..9c2b1d7
--- /dev/null
+++ b/src/hydrilla/proxy/state_impl/_operations/prune_packages.py
@@ -0,0 +1,169 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# Haketilo proxy data and configuration (removal of packages that are not used).
+#
+# This file is part of Hydrilla&Haketilo.
+#
+# Copyright (C) 2022 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+"""
+....
+"""
+
+# Enable using with Python 3.7.
+from __future__ import annotations
+
+import sqlite3
+
+from pathlib import Path
+
+
+_remove_mapping_versions_sqls = [
+ '''
+ CREATE TEMPORARY TABLE removed_mappings(
+ item_version_id INTEGER PRIMARY KEY
+ );
+ ''', '''
+ INSERT INTO
+ removed_mappings
+ SELECT
+ iv.item_version_id
+ FROM
+ item_versions AS iv
+ JOIN items AS i USING (item_id)
+ JOIN mapping_statuses AS ms USING (item_id)
+ JOIN orphan_iterations AS oi USING (repo_iteration_id)
+ WHERE
+ NOT ms.required;
+ ''', '''
+ UPDATE
+ mapping_statuses
+ SET
+ active_version_id = NULL
+ WHERE
+ active_version_id IN removed_mappings;
+ ''', '''
+ DELETE FROM
+ item_versions
+ WHERE
+ item_version_id IN removed_mappings;
+ ''', '''
+ DROP TABLE removed_mappings;
+ '''
+]
+
+_remove_resource_versions_sql = '''
+WITH removed_resources AS (
+ SELECT
+ iv.item_version_id
+ FROM
+ item_versions AS iv
+ JOIN items AS i
+ USING (item_id)
+ JOIN orphan_iterations AS oi
+ USING (repo_iteration_id)
+ LEFT JOIN resolved_depended_resources AS rdr
+ ON rdr.resource_item_id = iv.item_version_id
+ WHERE
+ rdr.payload_id IS NULL
+)
+DELETE FROM
+ item_versions
+WHERE
+ item_version_id IN removed_resources;
+'''
+
+_remove_items_sql = '''
+WITH removed_items AS (
+ SELECT
+ i.item_id
+ FROM
+ items AS i
+ LEFT JOIN item_versions AS iv USING (item_id)
+ LEFT JOIN mapping_statuses AS ms USING (item_id)
+ WHERE
+ iv.item_version_id IS NULL AND
+ i.type = 'R' OR ms.enabled = 'N'
+)
+DELETE FROM
+ items
+WHERE
+ item_id IN removed_items;
+'''
+
+_remove_files_sql = '''
+WITH removed_files AS (
+ SELECT
+ f.file_id
+ FROM
+ files AS f
+ LEFT JOIN file_uses AS fu USING (file_id)
+ WHERE
+ fu.file_use_id IS NULL
+)
+DELETE FROM
+ files
+WHERE
+ file_id IN removed_files;
+'''
+
+_remove_repo_iterations_sql = '''
+WITH removed_iterations AS (
+ SELECT
+ oi.repo_iteration_id
+ FROM
+ orphan_iterations AS oi
+ LEFT JOIN item_versions AS iv USING (repo_iteration_id)
+ WHERE
+ iv.item_version_id IS NULL
+)
+DELETE FROM
+ repo_iterations
+WHERE
+ repo_iteration_id IN removed_iterations;
+'''
+
+_remove_repos_sql = '''
+WITH removed_repos AS (
+ SELECT
+ r.repo_id
+ FROM
+ repos AS r
+ LEFT JOIN repo_iterations AS ri USING (repo_id)
+ WHERE
+ r.deleted AND ri.repo_iteration_id IS NULL AND r.repo_id != 1
+)
+DELETE FROM
+ repos
+WHERE
+ repo_id IN removed_repos;
+'''
+
+def prune_packages(cursor: sqlite3.Cursor) -> None:
+ assert cursor.connection.in_transaction
+
+ for sql in _remove_mapping_versions_sqls:
+ cursor.execute(sql)
+ cursor.execute(_remove_resource_versions_sql)
+ cursor.execute(_remove_items_sql)
+ cursor.execute(_remove_files_sql)
+ cursor.execute(_remove_repo_iterations_sql)
+ cursor.execute(_remove_repos_sql)
diff --git a/src/hydrilla/proxy/state_impl/_operations/recompute_dependencies.py b/src/hydrilla/proxy/state_impl/_operations/recompute_dependencies.py
new file mode 100644
index 0000000..4093f12
--- /dev/null
+++ b/src/hydrilla/proxy/state_impl/_operations/recompute_dependencies.py
@@ -0,0 +1,223 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# Haketilo proxy data and configuration (update of dependency tree in the db).
+#
+# This file is part of Hydrilla&Haketilo.
+#
+# Copyright (C) 2022 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use this code
+# in a proprietary program, I am not going to enforce this in court.
+
+"""
+....
+"""
+
+# Enable using with Python 3.7.
+from __future__ import annotations
+
+import typing as t
+
+import sqlite3
+
+from .... import item_infos
+from ... import simple_dependency_satisfying as sds
+
+
+AnyInfoVar = t.TypeVar(
+ 'AnyInfoVar',
+ item_infos.ResourceInfo,
+ item_infos.MappingInfo
+)
+
+def get_infos_of_type(cursor: sqlite3.Cursor, info_type: t.Type[AnyInfoVar],) \
+ -> t.Mapping[int, AnyInfoVar]:
+ cursor.execute(
+ '''
+ SELECT
+ i.item_id, iv.definition, r.name, ri.iteration
+ FROM
+ item_versions AS iv
+ JOIN items AS i USING (item_id)
+ JOIN repo_iterations AS ri USING (repo_iteration_id)
+ JOIN repos AS r USING (repo_id)
+ WHERE
+ i.type = ?;
+ ''',
+ (info_type.type_name[0].upper(),)
+ )
+
+ result: dict[int, AnyInfoVar] = {}
+
+ for item_id, definition, repo_name, repo_iteration in cursor.fetchall():
+ info = info_type.load(definition, repo_name, repo_iteration)
+ result[item_id] = info
+
+ return result
+
+def _recompute_dependencies_no_state_update(
+ cursor: sqlite3.Cursor,
+ extra_requirements: t.Iterable[sds.MappingRequirement]
+) -> None:
+ cursor.execute('DELETE FROM payloads;')
+
+ ids_to_resources = get_infos_of_type(cursor, item_infos.ResourceInfo)
+ ids_to_mappings = get_infos_of_type(cursor, item_infos.MappingInfo)
+
+ resources = ids_to_resources.items()
+ resources_to_ids = dict((info.identifier, id) for id, info in resources)
+
+ mappings = ids_to_mappings.items()
+ mappings_to_ids = dict((info.identifier, id) for id, info in mappings)
+
+ requirements = [*extra_requirements]
+
+ cursor.execute(
+ '''
+ SELECT
+ i.identifier
+ FROM
+ mapping_statuses AS ms
+ JOIN items AS i USING(item_id)
+ WHERE
+ ms.enabled = 'E' AND ms.frozen = 'N';
+ '''
+ )
+
+ for mapping_identifier, in cursor.fetchall():
+ requirements.append(sds.MappingRequirement(mapping_identifier))
+
+ cursor.execute(
+ '''
+ SELECT
+ active_version_id, frozen
+ FROM
+ mapping_statuses
+ WHERE
+ enabled = 'E' AND frozen IN ('R', 'E');
+ '''
+ )
+
+ for active_version_id, frozen in cursor.fetchall():
+ info = ids_to_mappings[active_version_id]
+
+ requirement: sds.MappingRequirement
+
+ if frozen == 'R':
+ requirement = sds.MappingRepoRequirement(info.identifier, info.repo)
+ else:
+ requirement = sds.MappingVersionRequirement(info.identifier, info)
+
+ requirements.append(requirement)
+
+ mapping_choices = sds.compute_payloads(
+ ids_to_resources.values(),
+ ids_to_mappings.values(),
+ requirements
+ )
+
+ cursor.execute(
+ '''
+ UPDATE
+ mapping_statuses
+ SET
+ required = FALSE,
+ active_version_id = NULL
+ WHERE
+ enabled != 'E';
+ '''
+ )
+
+ cursor.execute('DELETE FROM payloads;')
+
+ for choice in mapping_choices.values():
+ mapping_ver_id = mappings_to_ids[choice.info.identifier]
+
+ cursor.execute(
+ '''
+ SELECT
+ item_id
+ FROM
+ item_versions
+ WHERE
+ item_version_id = ?;
+ ''',
+ (mapping_ver_id,)
+ )
+
+ (mapping_item_id,), = cursor.fetchall()
+
+ cursor.execute(
+ '''
+ UPDATE
+ mapping_statuses
+ SET
+ required = ?,
+ active_version_id = ?
+ WHERE
+ item_id = ?;
+ ''',
+ (choice.required, mapping_ver_id, mapping_item_id)
+ )
+
+ for num, (pattern, payload) in enumerate(choice.payloads.items()):
+ cursor.execute(
+ '''
+ INSERT INTO payloads(
+ mapping_item_id,
+ pattern,
+ eval_allowed,
+ cors_bypass_allowed
+ )
+ VALUES (?, ?, ?, ?);
+ ''',
+ (
+ mapping_ver_id,
+ pattern.orig_url,
+ payload.allows_eval,
+ payload.allows_cors_bypass
+ )
+ )
+
+ cursor.execute(
+ '''
+ SELECT
+ payload_id
+ FROM
+ payloads
+ WHERE
+ mapping_item_id = ? AND pattern = ?;
+ ''',
+ (mapping_ver_id, pattern.orig_url)
+ )
+
+ (payload_id,), = cursor.fetchall()
+
+ for res_num, resource_info in enumerate(payload.resources):
+ resource_ver_id = resources_to_ids[resource_info.identifier]
+ cursor.execute(
+ '''
+ INSERT INTO resolved_depended_resources(
+ payload_id,
+ resource_item_id,
+ idx
+ )
+ VALUES(?, ?, ?);
+ ''',
+ (payload_id, resource_ver_id, res_num)
+ )