From f4edcbe7f4739d6f82a2e1bb180960b003b30862 Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Fri, 19 Nov 2021 10:32:31 +0100 Subject: fill served definitions with computed sha256 sums of files --- src/pydrilla/pydrilla.py | 124 +++++++++++++++++++++++++++++++---------------- 1 file changed, 83 insertions(+), 41 deletions(-) (limited to 'src/pydrilla/pydrilla.py') diff --git a/src/pydrilla/pydrilla.py b/src/pydrilla/pydrilla.py index b0a5974..9e697ba 100644 --- a/src/pydrilla/pydrilla.py +++ b/src/pydrilla/pydrilla.py @@ -28,7 +28,7 @@ from flask import Flask, Blueprint, current_app, url_for, abort, request, \ redirect from jinja2 import Environment, PackageLoader import re -#from hashlib import sha256 +from hashlib import sha256 import os import pathlib import json @@ -333,17 +333,52 @@ class MappingItem: class Content: '''Stores serveable website content.''' - def __init__(self): + def __init__(self, content_dir_path): + ''' + When an instance of Content is constructed, it searches + content_dir_path for custom serveable site content and loads it. + ''' self.resources = {} self.mappings = {} self.licenses = {} self.indexes = {} self.definition_processors = { - 'resource': self.process_resource_or_mapping, - 'mapping': self.process_resource_or_mapping, - 'license': self.process_license + 'resource': self._process_resource_or_mapping, + 'mapping': self._process_resource_or_mapping, + 'license': self._process_license } self.patterns_by_proto = {} + self.file_sha256sums = {} + + self.content_dir_path = pathlib.Path(content_dir_path).resolve() + + if not self.content_dir_path.is_dir(): + raise ValueError(_('content_dir_path_not_dir')) + + for subdir_path in self.content_dir_path.iterdir(): + if not subdir_path.is_dir(): + continue + try: + self._load_content_from_subdir(subdir_path, subdir_path.name) + except Exception as e: + if current_app._pydrilla_werror: + raise e from None + logging.error(_('couldnt_load_content_from_%s'), subdir_path, + exc_info=True) + + self._report_missing() + self._finalize() + + def _load_content_from_subdir(self, subdir_path, source_name): + ''' + Helper function used to load definitions from index.json of a + subdirectory of the content direcotory. + ''' + index_path = subdir_path / 'index.json' + with open(index_path) as index_file: + index = json.loads(strip_json_comments(index_file.read())) + + self._process_index(index, source_name) @staticmethod def register_item(dict, item): @@ -361,13 +396,38 @@ class Content: @staticmethod def _process_copyright_and_license(definition): - '''Helper function used by other process_*() methods.''' + '''Helper function used by other _process_*() methods.''' for field in ['copyright', 'licenses']: if definition[field] == 'auto': raise MyNotImplError(f'"{{field}}": "auto"', definition['source_name']) - def process_resource_or_mapping(self, definition, index): + def _get_file_sha256sum(self, path): + ''' + Compute sha256 of the file at path. Cache results on this Content + object. + ''' + path = path.resolve() + sha256sum = self.file_sha256sums.get(path) + + if sha256sum is None: + with open(path, mode='rb') as hashed_file: + sha256sum = sha256(hashed_file.read()).digest().hex() + self.file_sha256sums[path] = sha256sum + + return sha256sum + + def _add_file_sha256sum(self, source_name, file_object): + ''' + Expect file_object to be a dict with field "file" holding a file path + relative to content directory's subdirectory source_name. Compute or + fetch from cache the sha256 sum of that file and put it in file_object's + "sha256" field. + ''' + file_path = self.content_dir_path / source_name / file_object['file'] + file_object['sha256'] = self._get_file_sha256sum(file_path) + + def _process_resource_or_mapping(self, definition, index): ''' Sanitizes, autocompletes and registers serveable mapping/resource definition. @@ -378,10 +438,13 @@ class Content: self._process_copyright_and_license(definition) definition['dependencies'] = definition.get('dependencies', []) self.register_item(self.resources, definition) + source_name = definition['source_name'] + for script in definition['scripts']: + self._add_file_sha256sum(source_name, script) else: self.register_item(self.mappings, definition) - def process_license(self, license, index): + def _process_license(self, license, index): '''Sanitizes and registers serveable license definition.''' identifier = license['identifier'] if identifier in self.licenses: @@ -389,7 +452,15 @@ class Content: self.licenses[identifier] = license - def process_index(self, index, source_name): + source_name = license['source_name'] + for legal_text in license['legal_text']: + self._add_file_sha256sum(source_name, legal_text) + + notice = license.get('notice') + if notice is not None: + self._add_file_sha256sum(source_name, notice) + + def _process_index(self, index, source_name): ''' Sanitizes, autocompletes and registers data from a loaded index.json file. @@ -429,7 +500,7 @@ class Content: for item in versioned_item.by_version.values(): yield item - def report_missing(self): + def _report_missing(self): ''' Use logger to print information about items that are referenced but were not loaded. @@ -488,7 +559,7 @@ class Content: if payload not in self.resources: report_missing_payload(mapping, payload) - def finalize(self): + def _finalize(self): ''' Initialize structures needed to serve queries. Called once after all data gets loaded. @@ -543,35 +614,6 @@ class Content: return list(mappings.values()) -def load_content_from_subdir(subdir_path, source_name, content): - index_path = subdir_path / 'index.json' - with open(index_path) as index_file: - index = json.loads(strip_json_comments(index_file.read())) - - content.process_index(index, source_name) - -def load_content(path): - if not path.is_dir(): - raise ValueError(_('content_dir_path_not_dir')) - - content = Content() - - for subdir_path in path.iterdir(): - if not subdir_path.is_dir(): - continue - try: - load_content_from_subdir(subdir_path, subdir_path.name, content) - except Exception as e: - if current_app._pydrilla_werror: - raise e from None - logging.error(_('couldnt_load_content_from_%s'), subdir_path, - exc_info=True) - - content.report_missing() - content.finalize() - - return content - def create_app(config_path=(here / 'config.json'), flask_config={}): app = Flask(__package__) app.config.update(flask_config) @@ -603,7 +645,7 @@ def create_app(config_path=(here / 'config.json'), flask_config={}): if not content_dir.is_absolute(): content_dir = config_path.parent / content_dir with app.app_context(): - app._pydrilla_content = load_content(content_dir.resolve()) + app._pydrilla_content = Content(content_dir.resolve()) app.register_blueprint(bp) -- cgit v1.2.3