From 16eaeb86948349141b1e6072eb6540c7cece10b6 Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Mon, 7 Feb 2022 16:51:11 +0100 Subject: move to a namespace package under 'hydrilla' --- src/hydrilla/__init__.py | 7 + src/hydrilla/builder/__init__.py | 7 + src/hydrilla/builder/__main__.py | 61 ++++++ src/hydrilla/builder/build.py | 375 ++++++++++++++++++++++++++++++++ src/hydrilla/builder/schemas | 1 + src/hydrilla/util/__init__.py | 101 +++++++++ src/hydrilla_builder/__init__.py | 5 - src/hydrilla_builder/__main__.py | 61 ------ src/hydrilla_builder/build.py | 434 -------------------------------------- src/hydrilla_builder/schemas | 1 - src/test/test_hydrilla_builder.py | 7 +- 11 files changed, 556 insertions(+), 504 deletions(-) create mode 100644 src/hydrilla/__init__.py create mode 100644 src/hydrilla/builder/__init__.py create mode 100644 src/hydrilla/builder/__main__.py create mode 100644 src/hydrilla/builder/build.py create mode 160000 src/hydrilla/builder/schemas create mode 100644 src/hydrilla/util/__init__.py delete mode 100644 src/hydrilla_builder/__init__.py delete mode 100644 src/hydrilla_builder/__main__.py delete mode 100644 src/hydrilla_builder/build.py delete mode 160000 src/hydrilla_builder/schemas (limited to 'src') diff --git a/src/hydrilla/__init__.py b/src/hydrilla/__init__.py new file mode 100644 index 0000000..6aeb276 --- /dev/null +++ b/src/hydrilla/__init__.py @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: 0BSD + +# Copyright (C) 2013-2020, PyPA + +# https://packaging.python.org/en/latest/guides/packaging-namespace-packages/#pkgutil-style-namespace-packages + +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/src/hydrilla/builder/__init__.py b/src/hydrilla/builder/__init__.py new file mode 100644 index 0000000..73dc579 --- /dev/null +++ b/src/hydrilla/builder/__init__.py @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior +# +# Available under the terms of Creative Commons Zero v1.0 Universal. + +from .build import Build diff --git a/src/hydrilla/builder/__main__.py b/src/hydrilla/builder/__main__.py new file mode 100644 index 0000000..5b98202 --- /dev/null +++ b/src/hydrilla/builder/__main__.py @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +# Command line interface of Hydrilla package builder. +# +# This file is part of Hydrilla +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +from pathlib import Path + +import click + +from .build import Build + +def validate_dir_path(ctx, param, value): + path = Path(value) + if path.is_dir(): + return path.resolve() + + raise click.BadParameter(f'{param.human_readable_name} must be a directory path') + +def validate_path(ctx, param, value): + return Path(value) + +@click.command() +@click.option('-s', '--srcdir', default='.', type=click.Path(), + callback=validate_dir_path, + help='Source directory to build from.') +@click.option('-i', '--index-json', default='index.json', type=click.Path(), + callback=validate_path, + help='Path to file to be processed instead of index.json (if not absolute, resolved relative to srcdir).') +@click.option('-d', '--dstdir', type=click.Path(), required=True, + callback=validate_dir_path, + help='Destination directory to write built package files to.') +def preform_build(srcdir, index_json, dstdir): + """ + Build Hydrilla package from scrdir and write the resulting files under + dstdir. + """ + build = Build(srcdir, index_json) + build.write_package_files(dstdir) + +preform_build() diff --git a/src/hydrilla/builder/build.py b/src/hydrilla/builder/build.py new file mode 100644 index 0000000..d89ead3 --- /dev/null +++ b/src/hydrilla/builder/build.py @@ -0,0 +1,375 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +# Building Hydrilla packages. +# +# This file is part of Hydrilla +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +import json +import re +import zipfile +from pathlib import Path +from hashlib import sha256 +from sys import stderr + +import jsonschema + +from .. import util + +here = Path(__file__).resolve().parent +with open(here / 'schemas' / 'package_source-1.schema.json') as schema_file: + index_json_schema = json.load(schema_file) + +class FileReferenceError(Exception): + """ + Exception used to report various problems concerning files referenced from + source package's index.json. + """ + +class ReuseError(Exception): + """ + Exception used to report various problems when calling the REUSE tool. + """ + +class FileBuffer: + """ + Implement a file-like object that buffers data written to it. + """ + def __init__(self): + """ + Initialize FileBuffer. + """ + self.chunks = [] + + def write(self, b): + """ + Buffer 'b', return number of bytes buffered. + + 'b' is expected to be an instance of 'bytes' or 'str', in which case it + gets encoded as UTF-8. + """ + if type(b) is str: + b = b.encode() + self.chunks.append(b) + return len(b) + + def flush(self): + """ + A no-op mock of file-like object's flush() method. + """ + pass + + def get_bytes(self): + """ + Return all data written so far concatenated into a single 'bytes' + object. + """ + return b''.join(self.chunks) + +def generate_spdx_report(root): + """ + Use REUSE tool to generate an SPDX report for sources under 'root' and + return the report's contents as 'bytes'. + + 'root' shall be an instance of pathlib.Path. + + In case the directory tree under 'root' does not constitute a + REUSE-compliant package, linting report is printed to standard output and + an exception is raised. + + In case the reuse package is not installed, an exception is also raised. + """ + try: + from reuse._main import main as reuse_main + except ModuleNotFoundError: + ReuseError("Could not import 'reuse'. Is the tool installed and visible to this Python instance?") + + mocked_output = FileBuffer() + if reuse_main(args=['--root', str(root), 'lint'], out=mocked_output) != 0: + stderr.write(mocked_output.get_bytes().decode()) + raise ReuseError('Attempt to generate an SPDX report for a REUSE-incompliant package.') + + mocked_output = FileBuffer() + if reuse_main(args=['--root', str(root), 'spdx'], out=mocked_output) != 0: + stderr.write(mocked_output.get_bytes().decode()) + raise ReuseError("Couldn't generate an SPDX report for package.") + + return mocked_output.get_bytes() + +class FileRef: + """Represent reference to a file in the package.""" + def __init__(self, path: Path, contents: bytes): + """Initialize FileRef.""" + self.include_in_distribution = False + self.include_in_zipfile = True + self.path = path + self.contents = contents + + self.contents_hash = sha256(contents).digest().hex() + + def make_ref_dict(self, filename: str): + """ + Represent the file reference through a dict that can be included in JSON + defintions. + """ + return { + 'file': filename, + 'sha256': self.contents_hash + } + +class Build: + """ + Build a Hydrilla package. + """ + def __init__(self, srcdir, index_json_path): + """ + Initialize a build. All files to be included in a distribution package + are loaded into memory, all data gets validated and all necessary + computations (e.g. preparing of hashes) are performed. + + 'srcdir' and 'index_json' are expected to be pathlib.Path objects. + """ + self.srcdir = srcdir.resolve() + self.index_json_path = index_json_path + self.files_by_path = {} + self.resource_list = [] + self.mapping_list = [] + + if not index_json_path.is_absolute(): + self.index_json_path = (self.srcdir / self.index_json_path) + + self.index_json_path = self.index_json_path.resolve() + + with open(self.index_json_path, 'rt') as index_file: + index_json_text = index_file.read() + + index_obj = json.loads(util.strip_json_comments(index_json_text)) + + self.files_by_path[self.srcdir / 'index.json'] = \ + FileRef(self.srcdir / 'index.json', index_json_text.encode()) + + self._process_index_json(index_obj) + + def _process_file(self, filename: str, include_in_distribution: bool=True): + """ + Resolve 'filename' relative to srcdir, load it to memory (if not loaded + before), compute its hash and store its information in + 'self.files_by_path'. + + 'filename' shall represent a relative path using '/' as a separator. + + if 'include_in_distribution' is True it shall cause the file to not only + be included in the source package's zipfile, but also written as one of + built package's files. + + Return file's reference object that can be included in JSON defintions + of various kinds. + """ + path = self.srcdir + for segment in filename.split('/'): + path /= segment + + path = path.resolve() + if not path.is_relative_to(self.srcdir): + raise FileReferenceError(f"Attempt to load '{filename}' which lies outside package source directory.") + + if str(path.relative_to(self.srcdir)) == 'index.json': + raise FileReferenceError("Attempt to load 'index.json' which is a reserved filename.") + + file_ref = self.files_by_path.get(path) + if file_ref is None: + with open(path, 'rb') as file_handle: + contents = file_handle.read() + + file_ref = FileRef(path, contents) + self.files_by_path[path] = file_ref + + if include_in_distribution: + file_ref.include_in_distribution = True + + return file_ref.make_ref_dict(filename) + + def _prepare_source_package_zip(self, root_dir_name: str): + """ + Create and store in memory a .zip archive containing files needed to + build this source package. + + 'root_dir_name' shall not contain any slashes ('/'). + + Return zipfile's sha256 sum's hexstring. + """ + fb = FileBuffer() + root_dir_path = Path(root_dir_name) + + def zippath(file_path): + file_path = root_dir_path / file_path.relative_to(self.srcdir) + return file_path.as_posix() + + with zipfile.ZipFile(fb, 'w') as xpi: + for file_ref in self.files_by_path.values(): + if file_ref.include_in_zipfile: + xpi.writestr(zippath(file_ref.path), file_ref.contents) + + self.source_zip_contents = fb.get_bytes() + + return sha256(self.source_zip_contents).digest().hex() + + def _process_item(self, item_def: dict): + """ + Process 'item_def' as definition of a resource/mapping and store in + memory its processed form and files used by it. + + Return a minimal item reference suitable for using in source + description. + """ + copy_props = ['type', 'identifier', 'long_name', 'uuid', 'description'] + if 'comment' in item_def: + copy_props.append('comment') + + if item_def['type'] == 'resource': + item_list = self.resource_list + + copy_props.append('revision') + + script_file_refs = [self._process_file(f['file']) + for f in item_def.get('scripts', [])] + + new_item_obj = { + 'dependencies': item_def.get('dependencies', []), + 'scripts': script_file_refs + } + else: + item_list = self.mapping_list + + payloads = {} + for pat, res_ref in item_def.get('payloads', {}).items(): + payloads[pat] = {'identifier': res_ref['identifier']} + + new_item_obj = { + 'payloads': payloads + } + + new_item_obj.update([(p, item_def[p]) for p in copy_props]) + + new_item_obj['version'] = util.normalize_version(item_def['version']) + new_item_obj['api_schema_version'] = [1, 0, 1] + new_item_obj['source_copyright'] = self.copyright_file_refs + new_item_obj['source_name'] = self.source_name + + item_list.append(new_item_obj) + + return dict([(prop, new_item_obj[prop]) + for prop in ('type', 'identifier', 'version')]) + + def _process_index_json(self, index_obj: dict): + """ + Process 'index_obj' as contents of source package's index.json and store + in memory this source package's zipfile as well as package's individual + files and computed definitions of the source package and items defined + in it. + """ + jsonschema.validate(index_obj, index_json_schema) + + self.source_name = index_obj['source_name'] + + generate_spdx = index_obj.get('reuse_generate_spdx_report', False) + if generate_spdx: + contents = generate_spdx_report(self.srcdir) + spdx_path = (self.srcdir / 'report.spdx').resolve() + spdx_ref = FileRef(spdx_path, contents) + + spdx_ref.include_in_zipfile = False + self.files_by_path[spdx_path] = spdx_ref + + self.copyright_file_refs = \ + [self._process_file(f['file']) for f in index_obj['copyright']] + + if generate_spdx and not spdx_ref.include_in_distribution: + raise FileReferenceError("Told to generate 'report.spdx' but 'report.spdx' is not listed among copyright files. Refusing to proceed.") + + item_refs = [self._process_item(d) for d in index_obj['definitions']] + + for file_ref in index_obj.get('additional_files', []): + self._process_file(file_ref['file'], include_in_distribution=False) + + root_dir_path = Path(self.source_name) + + source_archives_obj = { + 'zip' : { + 'sha256': self._prepare_source_package_zip(root_dir_path) + } + } + + self.source_description = { + 'api_schema_version': [1, 0, 1], + 'source_name': self.source_name, + 'source_copyright': self.copyright_file_refs, + 'upstream_url': index_obj['upstream_url'], + 'definitions': item_refs, + 'source_archives': source_archives_obj + } + + if 'comment' in index_obj: + self.source_description['comment'] = index_obj['comment'] + + def write_source_package_zip(self, dstpath: Path): + """ + Create a .zip archive containing files needed to build this source + package and write it at 'dstpath'. + """ + with open(dstpath, 'wb') as output: + output.write(self.source_zip_contents) + + def write_package_files(self, dstpath: Path): + """Write package files under 'dstpath' for distribution.""" + file_dir_path = (dstpath / 'file').resolve() + file_dir_path.mkdir(parents=True, exist_ok=True) + + for file_ref in self.files_by_path.values(): + if file_ref.include_in_distribution: + file_name = f'sha256-{file_ref.contents_hash}' + with open(file_dir_path / file_name, 'wb') as output: + output.write(file_ref.contents) + + source_dir_path = (dstpath / 'source').resolve() + source_dir_path.mkdir(parents=True, exist_ok=True) + source_name = self.source_description["source_name"] + + with open(source_dir_path / f'{source_name}.json', 'wt') as output: + json.dump(self.source_description, output) + + with open(source_dir_path / f'{source_name}.zip', 'wb') as output: + output.write(self.source_zip_contents) + + for item_type, item_list in [ + ('resource', self.resource_list), + ('mapping', self.mapping_list) + ]: + item_type_dir_path = (dstpath / item_type).resolve() + + for item_def in item_list: + item_dir_path = item_type_dir_path / item_def['identifier'] + item_dir_path.mkdir(parents=True, exist_ok=True) + + version = '.'.join([str(n) for n in item_def['version']]) + with open(item_dir_path / version, 'wt') as output: + json.dump(item_def, output) diff --git a/src/hydrilla/builder/schemas b/src/hydrilla/builder/schemas new file mode 160000 index 0000000..ca1de2e --- /dev/null +++ b/src/hydrilla/builder/schemas @@ -0,0 +1 @@ +Subproject commit ca1de2ed4a69a71f2f75552ade693d04ea1baa85 diff --git a/src/hydrilla/util/__init__.py b/src/hydrilla/util/__init__.py new file mode 100644 index 0000000..72f73bc --- /dev/null +++ b/src/hydrilla/util/__init__.py @@ -0,0 +1,101 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +# Building Hydrilla packages. +# +# This file is part of Hydrilla +# +# Copyright (C) 2021, 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use this code +# in a proprietary program, I am not going to enforce this in court. + +import re as _re +import json as _json + +from typing import Optional as _Optional + +_strip_comment_re = _re.compile(r''' +^ # match from the beginning of each line +( # catch the part before '//' comment + (?: # this group matches either a string or a single out-of-string character + [^"/] | + " + (?: # this group matches any in-a-string character + [^"\\] | # match any normal character + \\[^u] | # match any escaped character like '\f' or '\n' + \\u[a-fA-F0-9]{4} # match an escape + )* + " + )* +) +# expect either end-of-line or a comment: +# * unterminated strings will cause matching to fail +# * bad comment (with '/' instead of '//') will be indicated by second group +# having length 1 instead of 2 or 0 +(//?|$) +''', _re.VERBOSE) + +def strip_json_comments(text: str) -> str: + """ + Accept JSON text with optional C++-style ('//') comments and return the text + with comments removed. Consecutive slashes inside strings are handled + properly. A spurious single slash ('/') shall generate an error. Errors in + JSON itself shall be ignored. + """ + processed = 0 + stripped_text = [] + for line in text.split('\n'): + match = _strip_comment_re.match(line) + + if match is None: # unterminated string + # ignore this error, let json module report it + stripped = line + elif len(match[2]) == 1: + raise _json.JSONDecodeError('bad comment', text, + processed + len(match[1])) + else: + stripped = match[1] + + stripped_text.append(stripped) + processed += len(line) + 1 + + return '\n'.join(stripped_text) + +def normalize_version(ver: list[int]) -> list[int]: + """Strip right-most zeroes from 'ver'. The original list is not modified.""" + new_len = 0 + for i, num in enumerate(ver): + if num != 0: + new_len = i + 1 + + return ver[:new_len] + +def parse_version(ver_str: str) -> list[int]: + """ + Convert 'ver_str' into an array representation, e.g. for ver_str="4.6.13.0" + return [4, 6, 13, 0]. + """ + return [int(num) for num in ver_str.split('.')] + +def version_string(ver: list[int], rev: _Optional[int]=None) -> str: + """ + Produce version's string representation (optionally with revision), like: + 1.2.3-5 + No version normalization is performed. + """ + return '.'.join([str(n) for n in ver]) + ('' if rev is None else f'-{rev}') diff --git a/src/hydrilla_builder/__init__.py b/src/hydrilla_builder/__init__.py deleted file mode 100644 index d382ead..0000000 --- a/src/hydrilla_builder/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# SPDX-License-Identifier: CC0-1.0 - -# Copyright (C) 2022 Wojtek Kosior -# -# Available under the terms of Creative Commons Zero v1.0 Universal. diff --git a/src/hydrilla_builder/__main__.py b/src/hydrilla_builder/__main__.py deleted file mode 100644 index 5b98202..0000000 --- a/src/hydrilla_builder/__main__.py +++ /dev/null @@ -1,61 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later - -# Command line interface of Hydrilla package builder. -# -# This file is part of Hydrilla -# -# Copyright (C) 2022 Wojtek Kosior -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# -# -# I, Wojtek Kosior, thereby promise not to sue for violation of this -# file's license. Although I request that you do not make use this code -# in a proprietary program, I am not going to enforce this in court. - -from pathlib import Path - -import click - -from .build import Build - -def validate_dir_path(ctx, param, value): - path = Path(value) - if path.is_dir(): - return path.resolve() - - raise click.BadParameter(f'{param.human_readable_name} must be a directory path') - -def validate_path(ctx, param, value): - return Path(value) - -@click.command() -@click.option('-s', '--srcdir', default='.', type=click.Path(), - callback=validate_dir_path, - help='Source directory to build from.') -@click.option('-i', '--index-json', default='index.json', type=click.Path(), - callback=validate_path, - help='Path to file to be processed instead of index.json (if not absolute, resolved relative to srcdir).') -@click.option('-d', '--dstdir', type=click.Path(), required=True, - callback=validate_dir_path, - help='Destination directory to write built package files to.') -def preform_build(srcdir, index_json, dstdir): - """ - Build Hydrilla package from scrdir and write the resulting files under - dstdir. - """ - build = Build(srcdir, index_json) - build.write_package_files(dstdir) - -preform_build() diff --git a/src/hydrilla_builder/build.py b/src/hydrilla_builder/build.py deleted file mode 100644 index 652e537..0000000 --- a/src/hydrilla_builder/build.py +++ /dev/null @@ -1,434 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later - -# Building Hydrilla packages. -# -# This file is part of Hydrilla -# -# Copyright (C) 2021,2022 Wojtek Kosior -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# -# -# I, Wojtek Kosior, thereby promise not to sue for violation of this -# file's license. Although I request that you do not make use this code -# in a proprietary program, I am not going to enforce this in court. - - -import json -import re -import zipfile -from pathlib import Path -from hashlib import sha256 -from sys import stderr - -import jsonschema - -here = Path(__file__).resolve().parent -with open(here / 'schemas' / 'package_source-1.schema.json') as schema_file: - index_json_schema = json.load(schema_file) - -class FileReferenceError(Exception): - """ - Exception used to report various problems concerning files referenced from - source package's index.json. - """ - -class ReuseError(Exception): - """ - Exception used to report various problems when calling the REUSE tool. - """ - -strip_comment_re = re.compile(r''' -^ # match from the beginning of each line -( # catch the part before '//' comment - (?: # this group matches either a string or a single out-of-string character - [^"/] | - " - (?: # this group matches any in-a-string character - [^"\\] | # match any normal character - \\[^u] | # match any escaped character like '\f' or '\n' - \\u[a-fA-F0-9]{4} # match an escape - )* - " - )* -) -# expect either end-of-line or a comment: -# * unterminated strings will cause matching to fail -# * bad comment (with '/' instead of '//') will be indicated by second group -# having length 1 instead of 2 or 0 -(//?|$) -''', re.VERBOSE) - -def strip_json_comments(text): - """ - Accept JSON text with optional C++-style ('//') comments and return the text - with comments removed. Consecutive slashes inside strings are handled - properly. A spurious single slash ('/') shall generate an error. Errors in - JSON itself shall be ignored. - """ - processed = 0 - stripped_text = [] - for line in text.split('\n'): - match = strip_comment_re.match(line) - - if match is None: # unterminated string - # ignore this error, let json module report it - stripped = line - elif len(match[2]) == 1: - raise json.JSONDecodeError('bad comment', text, - processed + len(match[1])) - else: - stripped = match[1] - - stripped_text.append(stripped) - processed += len(line) + 1 - - return '\n'.join(stripped_text) - -def normalize_version(ver): - ''' - 'ver' is an array of integers. Strip right-most zeroes from ver. - - Returns a *new* array. Doesn't modify its argument. - ''' - new_len = 0 - for i, num in enumerate(ver): - if num != 0: - new_len = i + 1 - - return ver[:new_len] - -class FileBuffer: - """ - Implement a file-like object that buffers data written to it. - """ - def __init__(self): - """ - Initialize FileBuffer. - """ - self.chunks = [] - - def write(self, b): - """ - Buffer 'b', return number of bytes buffered. - - 'b' is expected to be an instance of 'bytes' or 'str', in which case it - gets encoded as UTF-8. - """ - if type(b) is str: - b = b.encode() - self.chunks.append(b) - return len(b) - - def flush(self): - """ - A no-op mock of file-like object's flush() method. - """ - pass - - def get_bytes(self): - """ - Return all data written so far concatenated into a single 'bytes' - object. - """ - return b''.join(self.chunks) - -def generate_spdx_report(root): - """ - Use REUSE tool to generate an SPDX report for sources under 'root' and - return the report's contents as 'bytes'. - - 'root' shall be an instance of pathlib.Path. - - In case the directory tree under 'root' does not constitute a - REUSE-compliant package, linting report is printed to standard output and - an exception is raised. - - In case the reuse package is not installed, an exception is also raised. - """ - try: - from reuse._main import main as reuse_main - except ModuleNotFoundError: - ReuseError("Could not import 'reuse'. Is the tool installed and visible to this Python instance?") - - mocked_output = FileBuffer() - if reuse_main(args=['--root', str(root), 'lint'], out=mocked_output) != 0: - stderr.write(mocked_output.get_bytes().decode()) - raise ReuseError('Attempt to generate an SPDX report for a REUSE-incompliant package.') - - mocked_output = FileBuffer() - if reuse_main(args=['--root', str(root), 'spdx'], out=mocked_output) != 0: - stderr.write(mocked_output.get_bytes().decode()) - raise ReuseError("Couldn't generate an SPDX report for package.") - - return mocked_output.get_bytes() - -class FileRef: - """Represent reference to a file in the package.""" - def __init__(self, path: Path, contents: bytes): - """Initialize FileRef.""" - self.include_in_distribution = False - self.include_in_zipfile = True - self.path = path - self.contents = contents - - self.contents_hash = sha256(contents).digest().hex() - - def make_ref_dict(self, filename: str): - """ - Represent the file reference through a dict that can be included in JSON - defintions. - """ - return { - 'file': filename, - 'sha256': self.contents_hash - } - -class Build: - """ - Build a Hydrilla package. - """ - def __init__(self, srcdir, index_json_path): - """ - Initialize a build. All files to be included in a distribution package - are loaded into memory, all data gets validated and all necessary - computations (e.g. preparing of hashes) are performed. - - 'srcdir' and 'index_json' are expected to be pathlib.Path objects. - """ - self.srcdir = srcdir.resolve() - self.index_json_path = index_json_path - self.files_by_path = {} - self.resource_list = [] - self.mapping_list = [] - - if not index_json_path.is_absolute(): - self.index_json_path = (self.srcdir / self.index_json_path) - - self.index_json_path = self.index_json_path.resolve() - - with open(self.index_json_path, 'rt') as index_file: - index_json_text = index_file.read() - - index_obj = json.loads(strip_json_comments(index_json_text)) - - self.files_by_path[self.srcdir / 'index.json'] = \ - FileRef(self.srcdir / 'index.json', index_json_text.encode()) - - self._process_index_json(index_obj) - - def _process_file(self, filename: str, include_in_distribution: bool=True): - """ - Resolve 'filename' relative to srcdir, load it to memory (if not loaded - before), compute its hash and store its information in - 'self.files_by_path'. - - 'filename' shall represent a relative path using '/' as a separator. - - if 'include_in_distribution' is True it shall cause the file to not only - be included in the source package's zipfile, but also written as one of - built package's files. - - Return file's reference object that can be included in JSON defintions - of various kinds. - """ - path = self.srcdir - for segment in filename.split('/'): - path /= segment - - path = path.resolve() - if not path.is_relative_to(self.srcdir): - raise FileReferenceError(f"Attempt to load '{filename}' which lies outside package source directory.") - - if str(path.relative_to(self.srcdir)) == 'index.json': - raise FileReferenceError("Attempt to load 'index.json' which is a reserved filename.") - - file_ref = self.files_by_path.get(path) - if file_ref is None: - with open(path, 'rb') as file_handle: - contents = file_handle.read() - - file_ref = FileRef(path, contents) - self.files_by_path[path] = file_ref - - if include_in_distribution: - file_ref.include_in_distribution = True - - return file_ref.make_ref_dict(filename) - - def _prepare_source_package_zip(self, root_dir_name: str): - """ - Create and store in memory a .zip archive containing files needed to - build this source package. - - 'root_dir_name' shall not contain any slashes ('/'). - - Return zipfile's sha256 sum's hexstring. - """ - fb = FileBuffer() - root_dir_path = Path(root_dir_name) - - def zippath(file_path): - file_path = root_dir_path / file_path.relative_to(self.srcdir) - return file_path.as_posix() - - with zipfile.ZipFile(fb, 'w') as xpi: - for file_ref in self.files_by_path.values(): - if file_ref.include_in_zipfile: - xpi.writestr(zippath(file_ref.path), file_ref.contents) - - self.source_zip_contents = fb.get_bytes() - - return sha256(self.source_zip_contents).digest().hex() - - def _process_item(self, item_def: dict): - """ - Process 'item_def' as definition of a resource/mapping and store in - memory its processed form and files used by it. - - Return a minimal item reference suitable for using in source - description. - """ - copy_props = ['type', 'identifier', 'long_name', 'uuid', 'description'] - if 'comment' in item_def: - copy_props.append('comment') - - if item_def['type'] == 'resource': - item_list = self.resource_list - - copy_props.append('revision') - - script_file_refs = [self._process_file(f['file']) - for f in item_def.get('scripts', [])] - - new_item_obj = { - 'dependencies': item_def.get('dependencies', []), - 'scripts': script_file_refs - } - else: - item_list = self.mapping_list - - payloads = {} - for pat, res_ref in item_def.get('payloads', {}).items(): - payloads[pat] = {'identifier': res_ref['identifier']} - - new_item_obj = { - 'payloads': payloads - } - - new_item_obj.update([(p, item_def[p]) for p in copy_props]) - - new_item_obj['version'] = normalize_version(item_def['version']) - new_item_obj['api_schema_version'] = [1, 0, 1] - new_item_obj['source_copyright'] = self.copyright_file_refs - new_item_obj['source_name'] = self.source_name - - item_list.append(new_item_obj) - - return dict([(prop, new_item_obj[prop]) - for prop in ('type', 'identifier', 'version')]) - - def _process_index_json(self, index_obj: dict): - """ - Process 'index_obj' as contents of source package's index.json and store - in memory this source package's zipfile as well as package's individual - files and computed definitions of the source package and items defined - in it. - """ - jsonschema.validate(index_obj, index_json_schema) - - self.source_name = index_obj['source_name'] - - generate_spdx = index_obj.get('reuse_generate_spdx_report', False) - if generate_spdx: - contents = generate_spdx_report(self.srcdir) - spdx_path = (self.srcdir / 'report.spdx').resolve() - spdx_ref = FileRef(spdx_path, contents) - - spdx_ref.include_in_zipfile = False - self.files_by_path[spdx_path] = spdx_ref - - self.copyright_file_refs = \ - [self._process_file(f['file']) for f in index_obj['copyright']] - - if generate_spdx and not spdx_ref.include_in_distribution: - raise FileReferenceError("Told to generate 'report.spdx' but 'report.spdx' is not listed among copyright files. Refusing to proceed.") - - item_refs = [self._process_item(d) for d in index_obj['definitions']] - - for file_ref in index_obj.get('additional_files', []): - self._process_file(file_ref['file'], include_in_distribution=False) - - root_dir_path = Path(self.source_name) - - source_archives_obj = { - 'zip' : { - 'sha256': self._prepare_source_package_zip(root_dir_path) - } - } - - self.source_description = { - 'api_schema_version': [1, 0, 1], - 'source_name': self.source_name, - 'source_copyright': self.copyright_file_refs, - 'upstream_url': index_obj['upstream_url'], - 'definitions': item_refs, - 'source_archives': source_archives_obj - } - - if 'comment' in index_obj: - self.source_description['comment'] = index_obj['comment'] - - def write_source_package_zip(self, dstpath: Path): - """ - Create a .zip archive containing files needed to build this source - package and write it at 'dstpath'. - """ - with open(dstpath, 'wb') as output: - output.write(self.source_zip_contents) - - def write_package_files(self, dstpath: Path): - """Write package files under 'dstpath' for distribution.""" - file_dir_path = (dstpath / 'file').resolve() - file_dir_path.mkdir(parents=True, exist_ok=True) - - for file_ref in self.files_by_path.values(): - if file_ref.include_in_distribution: - file_name = f'sha256-{file_ref.contents_hash}' - with open(file_dir_path / file_name, 'wb') as output: - output.write(file_ref.contents) - - source_dir_path = (dstpath / 'source').resolve() - source_dir_path.mkdir(parents=True, exist_ok=True) - source_name = self.source_description["source_name"] - - with open(source_dir_path / f'{source_name}.json', 'wt') as output: - json.dump(self.source_description, output) - - with open(source_dir_path / f'{source_name}.zip', 'wb') as output: - output.write(self.source_zip_contents) - - for item_type, item_list in [ - ('resource', self.resource_list), - ('mapping', self.mapping_list) - ]: - item_type_dir_path = (dstpath / item_type).resolve() - - for item_def in item_list: - item_dir_path = item_type_dir_path / item_def['identifier'] - item_dir_path.mkdir(parents=True, exist_ok=True) - - version = '.'.join([str(n) for n in item_def['version']]) - with open(item_dir_path / version, 'wt') as output: - json.dump(item_def, output) diff --git a/src/hydrilla_builder/schemas b/src/hydrilla_builder/schemas deleted file mode 160000 index ca1de2e..0000000 --- a/src/hydrilla_builder/schemas +++ /dev/null @@ -1 +0,0 @@ -Subproject commit ca1de2ed4a69a71f2f75552ade693d04ea1baa85 diff --git a/src/test/test_hydrilla_builder.py b/src/test/test_hydrilla_builder.py index f4a4d2f..410b7a1 100644 --- a/src/test/test_hydrilla_builder.py +++ b/src/test/test_hydrilla_builder.py @@ -16,7 +16,8 @@ from typing import Callable, Optional from jsonschema import ValidationError -from hydrilla_builder import build +from hydrilla import util as hydrilla_util +from hydrilla.builder import build here = Path(__file__).resolve().parent @@ -187,7 +188,7 @@ def prepare_modified(tmpdir: Path, modify_cb: ModifyCb) -> CaseSettings: settings.srcdir = tmpdir / 'srcdir_copy' with open(settings.srcdir / 'index.json', 'rt') as file_handle: - obj = json.loads(build.strip_json_comments(file_handle.read())) + obj = json.loads(hydrilla_util.strip_json_comments(file_handle.read())) contents = modify_cb(settings, obj) @@ -438,4 +439,4 @@ def test_build_error(tmpdir: str, break_index_json: tuple[ModifyCb, type]): with pytest.raises(error_type): build.Build(settings.srcdir, settings.index_json_path)\ - .write_package_files(dstdir) + .write_package_files(dstdir) -- cgit v1.2.3