aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWojtek Kosior <koszko@koszko.org>2021-11-19 10:32:31 +0100
committerWojtek Kosior <koszko@koszko.org>2021-11-19 10:32:31 +0100
commitf4edcbe7f4739d6f82a2e1bb180960b003b30862 (patch)
treea862184eb172a88050f101776f55b58546529930
parent5200c39fca2d870b07c18b395619937b54d9d116 (diff)
downloadpydrilla-f4edcbe7f4739d6f82a2e1bb180960b003b30862.tar.gz
pydrilla-f4edcbe7f4739d6f82a2e1bb180960b003b30862.zip
fill served definitions with computed sha256 sums of files
-rw-r--r--src/pydrilla/pydrilla.py124
-rw-r--r--src/test/example_content/hello/index.json21
-rw-r--r--src/test/test_pydrilla.py17
3 files changed, 112 insertions, 50 deletions
diff --git a/src/pydrilla/pydrilla.py b/src/pydrilla/pydrilla.py
index b0a5974..9e697ba 100644
--- a/src/pydrilla/pydrilla.py
+++ b/src/pydrilla/pydrilla.py
@@ -28,7 +28,7 @@ from flask import Flask, Blueprint, current_app, url_for, abort, request, \
redirect
from jinja2 import Environment, PackageLoader
import re
-#from hashlib import sha256
+from hashlib import sha256
import os
import pathlib
import json
@@ -333,17 +333,52 @@ class MappingItem:
class Content:
'''Stores serveable website content.'''
- def __init__(self):
+ def __init__(self, content_dir_path):
+ '''
+ When an instance of Content is constructed, it searches
+ content_dir_path for custom serveable site content and loads it.
+ '''
self.resources = {}
self.mappings = {}
self.licenses = {}
self.indexes = {}
self.definition_processors = {
- 'resource': self.process_resource_or_mapping,
- 'mapping': self.process_resource_or_mapping,
- 'license': self.process_license
+ 'resource': self._process_resource_or_mapping,
+ 'mapping': self._process_resource_or_mapping,
+ 'license': self._process_license
}
self.patterns_by_proto = {}
+ self.file_sha256sums = {}
+
+ self.content_dir_path = pathlib.Path(content_dir_path).resolve()
+
+ if not self.content_dir_path.is_dir():
+ raise ValueError(_('content_dir_path_not_dir'))
+
+ for subdir_path in self.content_dir_path.iterdir():
+ if not subdir_path.is_dir():
+ continue
+ try:
+ self._load_content_from_subdir(subdir_path, subdir_path.name)
+ except Exception as e:
+ if current_app._pydrilla_werror:
+ raise e from None
+ logging.error(_('couldnt_load_content_from_%s'), subdir_path,
+ exc_info=True)
+
+ self._report_missing()
+ self._finalize()
+
+ def _load_content_from_subdir(self, subdir_path, source_name):
+ '''
+ Helper function used to load definitions from index.json of a
+ subdirectory of the content direcotory.
+ '''
+ index_path = subdir_path / 'index.json'
+ with open(index_path) as index_file:
+ index = json.loads(strip_json_comments(index_file.read()))
+
+ self._process_index(index, source_name)
@staticmethod
def register_item(dict, item):
@@ -361,13 +396,38 @@ class Content:
@staticmethod
def _process_copyright_and_license(definition):
- '''Helper function used by other process_*() methods.'''
+ '''Helper function used by other _process_*() methods.'''
for field in ['copyright', 'licenses']:
if definition[field] == 'auto':
raise MyNotImplError(f'"{{field}}": "auto"',
definition['source_name'])
- def process_resource_or_mapping(self, definition, index):
+ def _get_file_sha256sum(self, path):
+ '''
+ Compute sha256 of the file at path. Cache results on this Content
+ object.
+ '''
+ path = path.resolve()
+ sha256sum = self.file_sha256sums.get(path)
+
+ if sha256sum is None:
+ with open(path, mode='rb') as hashed_file:
+ sha256sum = sha256(hashed_file.read()).digest().hex()
+ self.file_sha256sums[path] = sha256sum
+
+ return sha256sum
+
+ def _add_file_sha256sum(self, source_name, file_object):
+ '''
+ Expect file_object to be a dict with field "file" holding a file path
+ relative to content directory's subdirectory source_name. Compute or
+ fetch from cache the sha256 sum of that file and put it in file_object's
+ "sha256" field.
+ '''
+ file_path = self.content_dir_path / source_name / file_object['file']
+ file_object['sha256'] = self._get_file_sha256sum(file_path)
+
+ def _process_resource_or_mapping(self, definition, index):
'''
Sanitizes, autocompletes and registers serveable mapping/resource
definition.
@@ -378,10 +438,13 @@ class Content:
self._process_copyright_and_license(definition)
definition['dependencies'] = definition.get('dependencies', [])
self.register_item(self.resources, definition)
+ source_name = definition['source_name']
+ for script in definition['scripts']:
+ self._add_file_sha256sum(source_name, script)
else:
self.register_item(self.mappings, definition)
- def process_license(self, license, index):
+ def _process_license(self, license, index):
'''Sanitizes and registers serveable license definition.'''
identifier = license['identifier']
if identifier in self.licenses:
@@ -389,7 +452,15 @@ class Content:
self.licenses[identifier] = license
- def process_index(self, index, source_name):
+ source_name = license['source_name']
+ for legal_text in license['legal_text']:
+ self._add_file_sha256sum(source_name, legal_text)
+
+ notice = license.get('notice')
+ if notice is not None:
+ self._add_file_sha256sum(source_name, notice)
+
+ def _process_index(self, index, source_name):
'''
Sanitizes, autocompletes and registers data from a loaded index.json
file.
@@ -429,7 +500,7 @@ class Content:
for item in versioned_item.by_version.values():
yield item
- def report_missing(self):
+ def _report_missing(self):
'''
Use logger to print information about items that are referenced but
were not loaded.
@@ -488,7 +559,7 @@ class Content:
if payload not in self.resources:
report_missing_payload(mapping, payload)
- def finalize(self):
+ def _finalize(self):
'''
Initialize structures needed to serve queries. Called once after all
data gets loaded.
@@ -543,35 +614,6 @@ class Content:
return list(mappings.values())
-def load_content_from_subdir(subdir_path, source_name, content):
- index_path = subdir_path / 'index.json'
- with open(index_path) as index_file:
- index = json.loads(strip_json_comments(index_file.read()))
-
- content.process_index(index, source_name)
-
-def load_content(path):
- if not path.is_dir():
- raise ValueError(_('content_dir_path_not_dir'))
-
- content = Content()
-
- for subdir_path in path.iterdir():
- if not subdir_path.is_dir():
- continue
- try:
- load_content_from_subdir(subdir_path, subdir_path.name, content)
- except Exception as e:
- if current_app._pydrilla_werror:
- raise e from None
- logging.error(_('couldnt_load_content_from_%s'), subdir_path,
- exc_info=True)
-
- content.report_missing()
- content.finalize()
-
- return content
-
def create_app(config_path=(here / 'config.json'), flask_config={}):
app = Flask(__package__)
app.config.update(flask_config)
@@ -603,7 +645,7 @@ def create_app(config_path=(here / 'config.json'), flask_config={}):
if not content_dir.is_absolute():
content_dir = config_path.parent / content_dir
with app.app_context():
- app._pydrilla_content = load_content(content_dir.resolve())
+ app._pydrilla_content = Content(content_dir.resolve())
app.register_blueprint(bp)
diff --git a/src/test/example_content/hello/index.json b/src/test/example_content/hello/index.json
index 12105c2..16843cb 100644
--- a/src/test/example_content/hello/index.json
+++ b/src/test/example_content/hello/index.json
@@ -150,8 +150,9 @@
// Array of javascript files that belong to this resource.
"scripts": [
{
- // Script name. It should also be a valid file path.
- "name": "hello.js",
+ // Script name. It should also be a valid file path relative
+ // to index.json's containing directory.
+ "file": "hello.js",
// Copyright and license info of a script file can be
// specified using the same format as in the case of the
// index.json file itself. If "copyright" or "license" is
@@ -160,7 +161,7 @@
"copyright": "auto",
"licenses": "auto"
}, {
- "name": "bye.js"
+ "file": "bye.js"
}
]
}, {
@@ -175,7 +176,7 @@
"licenses": "CC0-1.0",
// If "dependencies" is empty, it can also be omitted.
// "dependencies": [],
- "scripts": [{"name": "message.js"}]
+ "scripts": [{"file": "message.js"}]
}, {
"type": "mapping",
@@ -262,14 +263,16 @@
//
// "comment": "Expat license is the most common form of the license often called \"MIT\". Many other forms of \"MIT\" license exist. Here the name \"Expat\" is used to avoid ambiguity."
- // If applicable, a "notice" can be included. It shall then be a
- // path (relative to index.json) to a plain text file with that
- // notice.
+ // If applicable, a "notice" can be included. It shall then be an
+ // object with "file" field containing a path (relative to
+ // index.json's directory) to a plain text file with that notice.
//
- // "notice": "license-notice.txt"
+ // "notice": {
+ // "file": "license-notice.txt"
+ // }
//
// This is needed for example in case of GNU licenses (both with and
- // without exceptions). For example,
+ // without exceptions). For instance,
// "GPL-3.0-or-later-with-html-exception" could have the following
// in its notice file:
//
diff --git a/src/test/test_pydrilla.py b/src/test/test_pydrilla.py
index 22022ae..50757a7 100644
--- a/src/test/test_pydrilla.py
+++ b/src/test/test_pydrilla.py
@@ -28,6 +28,7 @@ import pytest
import sys
import shutil
from pathlib import Path
+from hashlib import sha256
from os import mkdir, unlink, environ
import json
from markupsafe import escape
@@ -37,6 +38,7 @@ from pydrilla import pydrilla, create_app
test_dir = Path(__file__).resolve().parent
packages_dir = test_dir.parent
development_config_path = test_dir / 'development_config.json'
+example_content_dir = test_dir / 'example_content'
@pytest.fixture
def client():
@@ -51,6 +53,11 @@ def development_config():
yield json.loads(pydrilla.strip_json_comments(config_file.read()))
def test_api_basic(client, development_config):
+ def verify_sha256sum(source_name, file_object):
+ with open(example_content_dir / source_name / file_object['file'],
+ mode='rb') as file:
+ assert sha256(file.read()).digest().hex() == file_object['sha256']
+
response = client.get('/')
assert b'html' in response.data
sources_uri = development_config['hydrilla_sources_uri']
@@ -63,6 +70,11 @@ def test_api_basic(client, development_config):
assert definition['type'] == item_type
assert definition['source_name'] == 'hello'
assert definition['version'] == [2021, 11, 10]
+ if item_type == 'resource':
+ assert type(definition['scripts']) is list
+ assert len(definition['scripts']) > 0
+ for script_file in definition['scripts']:
+ verify_sha256sum(definition['source_name'], script_file)
response = client.get(f'/{item_type}s/helloapple?ver=2021.11.10.0')
assert response.status_code == 200
@@ -91,6 +103,11 @@ def test_api_basic(client, development_config):
assert definition['long_name'] == 'Creative Commons Zero v1.0 Universal'
assert definition['source_name'] == 'hello'
+ assert type(definition['legal_text']) is list
+ assert len(definition['legal_text']) > 0
+ for license_file in definition['legal_text']:
+ verify_sha256sum(definition['source_name'], license_file)
+
response = client.get('/licenses/random-bad-identifier')
assert response.status_code == 404