aboutsummaryrefslogtreecommitdiff
path: root/test/haketilo_test/unit/test_patterns_query_tree.py
diff options
context:
space:
mode:
Diffstat (limited to 'test/haketilo_test/unit/test_patterns_query_tree.py')
-rw-r--r--test/haketilo_test/unit/test_patterns_query_tree.py474
1 files changed, 474 insertions, 0 deletions
diff --git a/test/haketilo_test/unit/test_patterns_query_tree.py b/test/haketilo_test/unit/test_patterns_query_tree.py
new file mode 100644
index 0000000..80bf554
--- /dev/null
+++ b/test/haketilo_test/unit/test_patterns_query_tree.py
@@ -0,0 +1,474 @@
+# SPDX-License-Identifier: CC0-1.0
+
+"""
+Haketilo unit tests - URL patterns
+"""
+
+# This file is part of Haketilo
+#
+# Copyright (C) 2021, Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the CC0 1.0 Universal License as published by
+# the Creative Commons Corporation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# CC0 1.0 Universal License for more details.
+
+import pytest
+
+from ..script_loader import load_script
+
+@pytest.mark.get_page('https://gotmyowndoma.in')
+def test_modify_branch(execute_in_page):
+ """
+ patterns_query_tree.js contains Pattern Tree data structure that allows
+ arrays of string labels to be mapped to items.
+ Verify operations modifying a single branch of such tree work properly.
+ """
+ execute_in_page(load_script('common/patterns_query_tree.js'))
+ execute_in_page(
+ '''
+ let items_added;
+ let items_removed;
+
+ function _item_adder(item, array)
+ {
+ items_added++;
+ return [...(array || []), item];
+ }
+
+ function item_adder(item)
+ {
+ items_added = 0;
+ return array => _item_adder(item, array);
+ }
+
+ function _item_remover(array)
+ {
+ if (array !== null) {
+ items_removed++;
+ array.pop();
+ }
+ return (array && array.length > 0) ? array : null;
+ }
+
+ function item_remover()
+ {
+ items_removed = 0;
+ return _item_remover;
+ }''')
+
+ # Let's construct some tree branch while checking that each addition gives
+ # the right result.
+ branch = execute_in_page(
+ '''{
+ const branch = empty_node();
+ modify_sequence(branch, ['com', 'example'], item_adder('some_item'));
+ returnval(branch);
+ }''')
+ assert branch == {
+ 'literal_match': None,
+ 'wildcard_matches': [None, None, None],
+ 'children': {
+ 'com': {
+ 'literal_match': None,
+ 'wildcard_matches': [None, None, None],
+ 'children': {
+ 'example': {
+ 'literal_match': ['some_item'],
+ 'wildcard_matches': [None, None, None],
+ 'children': {
+ }
+ }
+ }
+ }
+ }
+ }
+
+ branch, items_added = execute_in_page(
+ '''{
+ const branch = arguments[0];
+ modify_sequence(branch, ['com', 'example'], item_adder('other_item'));
+ returnval([branch, items_added]);
+ }''', branch)
+ assert items_added == 1
+ assert branch['children']['com']['children']['example']['literal_match'] \
+ == ['some_item', 'other_item']
+
+ for i in range(3):
+ for expected_array in [['third_item'], ['third_item', '4th_item']]:
+ wildcard = '*' * (i + 1)
+ branch, items_added = execute_in_page(
+ '''{
+ const branch = arguments[0];
+ modify_sequence(branch, ['com', 'sample', arguments[1]],
+ item_adder(arguments[2]));
+ returnval([branch, items_added]);
+ }''',
+ branch, wildcard, expected_array[-1])
+ assert items_added == 2
+ sample = branch['children']['com']['children']['sample']
+ assert sample['wildcard_matches'][i] == expected_array
+ assert sample['children'][wildcard]['literal_match'] \
+ == expected_array
+
+ branch, items_added = execute_in_page(
+ '''{
+ const branch = arguments[0];
+ modify_sequence(branch, ['org', 'koszko', '***', '123'],
+ item_adder('5th_item'));
+ returnval([branch, items_added]);
+ }''',
+ branch)
+ assert items_added == 1
+ assert branch['children']['org']['children']['koszko']['children']['***']\
+ ['children']['123']['literal_match'] == ['5th_item']
+
+ # Let's verify that removing a nonexistent element doesn't modify the tree.
+ branch2, items_removed = execute_in_page(
+ '''{
+ const branch = arguments[0];
+ modify_sequence(branch, ['com', 'not', 'registered', '*'],
+ item_remover());
+ returnval([branch, items_removed]);
+ }''',
+ branch)
+ assert branch == branch2
+ assert items_removed == 0
+
+ # Let's remove all elements in the tree branch while checking that each
+ # removal gives the right result.
+ branch, items_removed = execute_in_page(
+ '''{
+ const branch = arguments[0];
+ modify_sequence(branch, ['org', 'koszko', '***', '123'],
+ item_remover());
+ returnval([branch, items_removed]);
+ }''',
+ branch)
+ assert items_removed == 1
+ assert 'org' not in branch['children']
+
+ for i in range(3):
+ for expected_array in [['third_item'], None]:
+ wildcard = '*' * (i + 1)
+ branch, items_removed = execute_in_page(
+ '''{
+ const branch = arguments[0];
+ modify_sequence(branch, ['com', 'sample', arguments[1]],
+ item_remover());
+ returnval([branch, items_removed]);
+ }''',
+ branch, wildcard)
+ assert items_removed == 2
+ if i == 2 and expected_array == []:
+ break
+ sample = branch['children']['com']['children'].get('sample', {})
+ assert sample.get('wildcard_matches', [None, None, None])[i] \
+ == expected_array
+ assert sample.get('children', {}).get(wildcard, {})\
+ .get('literal_match') == expected_array
+
+ for i in range(2):
+ branch, items_removed = execute_in_page(
+ '''{
+ const branch = arguments[0];
+ modify_sequence(branch, ['com', 'example'], item_remover());
+ returnval([branch, items_removed]);
+ }''',
+ branch)
+ assert items_removed == 1
+ if i == 0:
+ assert branch['children']['com']['children']['example']\
+ ['literal_match'] == ['some_item']
+ else:
+ assert branch == {
+ 'literal_match': None,
+ 'wildcard_matches': [None, None, None],
+ 'children': {
+ }
+ }
+
+@pytest.mark.get_page('https://gotmyowndoma.in')
+def test_search_branch(execute_in_page):
+ """
+ patterns_query_tree.js contains Pattern Tree data structure that allows
+ arrays of string labels to be mapped to items.
+ Verify searching a single branch of such tree work properly.
+ """
+ execute_in_page(load_script('common/patterns_query_tree.js'))
+ execute_in_page(
+ '''
+ const item_adder = item => (array => [...(array || []), item]);
+ ''')
+
+ # Let's construct some tree branch to test on.
+ execute_in_page(
+ '''
+ var branch = empty_node();
+
+ for (const [item, sequence] of [
+ ['(root)', []],
+ ['***', ['***']],
+ ['**', ['**']],
+ ['*', ['*']],
+
+ ['a', ['a']],
+ ['A', ['a']],
+ ['b', ['b']],
+
+ ['a/***', ['a', '***']],
+ ['A/***', ['a', '***']],
+ ['a/**', ['a', '**']],
+ ['A/**', ['a', '**']],
+ ['a/*', ['a', '*']],
+ ['A/*', ['a', '*']],
+ ['a/sth', ['a', 'sth']],
+ ['A/sth', ['a', 'sth']],
+
+ ['b/***', ['b', '***']],
+ ['b/**', ['b', '**']],
+ ['b/*', ['b', '*']],
+ ['b/sth', ['b', 'sth']],
+ ])
+ modify_sequence(branch, sequence, item_adder(item));
+ ''')
+
+ # Let's make the actual searches on our testing branch.
+ for sequence, expected in [
+ ([], [{'(root)'}, {'***'}]),
+ (['a'], [{'a', 'A'}, {'a/***', 'A/***'}, {'*'}, {'***'}]),
+ (['b'], [{'b'}, {'b/***'}, {'*'}, {'***'}]),
+ (['c'], [ {'*'}, {'***'}]),
+ (['***'], [{'***'}, {'*'} ]),
+ (['**'], [{'**'}, {'*'}, {'***'}]),
+ (['**'], [{'**'}, {'*'}, {'***'}]),
+ (['*'], [{'*'}, {'***'}]),
+
+ (['a', 'sth'], [{'a/sth', 'A/sth'}, {'a/*', 'A/*'}, {'a/***', 'A/***'}, {'**'}, {'***'}]),
+ (['b', 'sth'], [{'b/sth'}, {'b/*'}, {'b/***'}, {'**'}, {'***'}]),
+ (['a', 'hts'], [ {'a/*', 'A/*'}, {'a/***', 'A/***'}, {'**'}, {'***'}]),
+ (['b', 'hts'], [ {'b/*'}, {'b/***'}, {'**'}, {'***'}]),
+ (['a', '***'], [{'a/***', 'A/***'}, {'a/*', 'A/*'}, {'**'}, {'***'}]),
+ (['b', '***'], [{'b/***'}, {'b/*'}, {'**'}, {'***'}]),
+ (['a', '**'], [{'a/**', 'A/**'}, {'a/*', 'A/*'}, {'a/***', 'A/***'}, {'**'}, {'***'}]),
+ (['b', '**'], [{'b/**'}, {'b/*'}, {'b/***'}, {'**'}, {'***'}]),
+ (['a', '*'], [{'a/*', 'A/*'}, {'a/***', 'A/***'}, {'**'}, {'***'}]),
+ (['b', '*'], [{'b/*'}, {'b/***'}, {'**'}, {'***'}]),
+
+ (['a', 'c', 'd'], [{'a/**', 'A/**'}, {'a/***', 'A/***'}, {'**'}, {'***'}]),
+ (['b', 'c', 'd'], [{'b/**'}, {'b/***'}, {'**'}, {'***'}])
+ ]:
+ result = execute_in_page(
+ '''
+ returnval([...search_sequence(branch, arguments[0])]);
+ ''',
+ sequence)
+
+ try:
+ assert len(result) == len(expected)
+
+ for expected_set, result_array in zip(expected, result):
+ assert len(expected_set) == len(result_array)
+ assert expected_set == set(result_array)
+ except Exception as e:
+ import sys
+ print('sequence:', sequence, '\nexpected:', expected,
+ '\nresult:', result, file=sys.stderr)
+ raise e from None
+
+@pytest.mark.get_page('https://gotmyowndoma.in')
+def test_pattern_tree(execute_in_page):
+ """
+ patterns_query_tree.js contains Pattern Tree data structure that allows
+ arrays of string labels to be mapped to items.
+ Verify operations on entire such tree work properly.
+ """
+ execute_in_page(load_script('common/patterns_query_tree.js'))
+
+ # Perform tests with all possible patterns for a simple URL.
+ url = 'https://example.com'
+ patterns = [
+ 'https://example.com',
+ 'https://example.com/***',
+ 'https://***.example.com',
+ 'https://***.example.com/***'
+ ]
+ bad_patterns = [
+ 'http://example.com',
+ 'https://a.example.com',
+ 'https://*.example.com',
+ 'https://**.example.com',
+ 'https://example.com/a',
+ 'https://example.com/*',
+ 'https://example.com/**',
+ ]
+
+ expected = [{'key': p} for p in patterns]
+
+ tree, result = execute_in_page(
+ '''{
+ const tree = pattern_tree_make();
+ for (const pattern of arguments[0].concat(arguments[1])) {
+ pattern_tree_register(tree, pattern, 'key', pattern);
+ pattern_tree_register(tree, pattern + '/', 'key', pattern + '/');
+ }
+ returnval([tree, [...pattern_tree_search(tree, arguments[2])]]);
+ }''',
+ patterns, bad_patterns, url)
+ assert expected == result
+
+ # Also verify that deregistering half of the good patterns works correctly.
+ patterns_removed = [pattern for i, pattern in enumerate(patterns) if i % 2]
+ patterns = [pattern for i, pattern in enumerate(patterns) if not (i % 2)]
+ expected = [{'key': p} for p in patterns]
+ tree, result = execute_in_page(
+ '''{
+ const tree = arguments[0];
+ for (const pattern of arguments[1]) {
+ pattern_tree_deregister(tree, pattern, 'key');
+ pattern_tree_deregister(tree, pattern + '/', 'key');
+ }
+ returnval([tree, [...pattern_tree_search(tree, arguments[2])]]);
+ }''',
+ tree, patterns_removed, url)
+ assert expected == result
+
+ # Also verify that deregistering all the patterns works correctly.
+ tree = execute_in_page(
+ '''{
+ const tree = arguments[0];
+ for (const pattern of arguments[1].concat(arguments[2])) {
+ pattern_tree_deregister(tree, pattern, 'key');
+ pattern_tree_deregister(tree, pattern + '/', 'key');
+ }
+ returnval(tree);
+ }''',
+ tree, patterns, bad_patterns)
+ assert tree == {}
+
+ # Perform tests with all possible patterns for a complex URL.
+ url = 'http://settings.query.example.com/google/tries/destroy/adblockers//'
+ patterns = [
+ 'http://settings.query.example.com/google/tries/destroy/adblockers',
+ 'http://settings.query.example.com/google/tries/destroy/adblockers/***',
+ 'http://settings.query.example.com/google/tries/destroy/*',
+ 'http://settings.query.example.com/google/tries/destroy/***',
+ 'http://settings.query.example.com/google/tries/**',
+ 'http://settings.query.example.com/google/tries/***',
+ 'http://settings.query.example.com/google/**',
+ 'http://settings.query.example.com/google/***',
+ 'http://settings.query.example.com/**',
+ 'http://settings.query.example.com/***',
+
+ 'http://***.settings.query.example.com/google/tries/destroy/adblockers',
+ 'http://***.settings.query.example.com/google/tries/destroy/adblockers/***',
+ 'http://***.settings.query.example.com/google/tries/destroy/*',
+ 'http://***.settings.query.example.com/google/tries/destroy/***',
+ 'http://***.settings.query.example.com/google/tries/**',
+ 'http://***.settings.query.example.com/google/tries/***',
+ 'http://***.settings.query.example.com/google/**',
+ 'http://***.settings.query.example.com/google/***',
+ 'http://***.settings.query.example.com/**',
+ 'http://***.settings.query.example.com/***',
+ 'http://*.query.example.com/google/tries/destroy/adblockers',
+ 'http://*.query.example.com/google/tries/destroy/adblockers/***',
+ 'http://*.query.example.com/google/tries/destroy/*',
+ 'http://*.query.example.com/google/tries/destroy/***',
+ 'http://*.query.example.com/google/tries/**',
+ 'http://*.query.example.com/google/tries/***',
+ 'http://*.query.example.com/google/**',
+ 'http://*.query.example.com/google/***',
+ 'http://*.query.example.com/**',
+ 'http://*.query.example.com/***',
+ 'http://***.query.example.com/google/tries/destroy/adblockers',
+ 'http://***.query.example.com/google/tries/destroy/adblockers/***',
+ 'http://***.query.example.com/google/tries/destroy/*',
+ 'http://***.query.example.com/google/tries/destroy/***',
+ 'http://***.query.example.com/google/tries/**',
+ 'http://***.query.example.com/google/tries/***',
+ 'http://***.query.example.com/google/**',
+ 'http://***.query.example.com/google/***',
+ 'http://***.query.example.com/**',
+ 'http://***.query.example.com/***',
+ 'http://**.example.com/google/tries/destroy/adblockers',
+ 'http://**.example.com/google/tries/destroy/adblockers/***',
+ 'http://**.example.com/google/tries/destroy/*',
+ 'http://**.example.com/google/tries/destroy/***',
+ 'http://**.example.com/google/tries/**',
+ 'http://**.example.com/google/tries/***',
+ 'http://**.example.com/google/**',
+ 'http://**.example.com/google/***',
+ 'http://**.example.com/**',
+ 'http://**.example.com/***',
+ 'http://***.example.com/google/tries/destroy/adblockers',
+ 'http://***.example.com/google/tries/destroy/adblockers/***',
+ 'http://***.example.com/google/tries/destroy/*',
+ 'http://***.example.com/google/tries/destroy/***',
+ 'http://***.example.com/google/tries/**',
+ 'http://***.example.com/google/tries/***',
+ 'http://***.example.com/google/**',
+ 'http://***.example.com/google/***',
+ 'http://***.example.com/**',
+ 'http://***.example.com/***'
+ ]
+ bad_patterns = [
+ 'https://settings.query.example.com/google/tries/destroy/adblockers',
+ 'http://settings.query.example.com/google/tries/destroy/adblockers/a',
+ 'http://settings.query.example.com/google/tries/destroy/adblockers/*',
+ 'http://settings.query.example.com/google/tries/destroy/adblockers/**',
+ 'http://settings.query.example.com/google/tries/destroy/a',
+ 'http://settings.query.example.com/google/tries/destroy/**',
+ 'http://settings.query.example.com/google/tries/*',
+ 'http://a.settings.query.example.com/google/tries/destroy/adblockers',
+ 'http://*.settings.query.example.com/google/tries/destroy/adblockers',
+ 'http://**.settings.query.example.com/google/tries/destroy/adblockers',
+ 'http://a.query.example.com/google/tries/destroy/adblockers',
+ 'http://**.query.example.com/google/tries/destroy/adblockers',
+ 'http://*.example.com/google/tries/destroy/adblockers'
+ ]
+
+ expected = [{'key': p + s} for p in patterns for s in ['/', '']]
+
+ tree, result = execute_in_page(
+ '''{
+ const tree = pattern_tree_make();
+ for (const pattern of arguments[0].concat(arguments[1])) {
+ pattern_tree_register(tree, pattern, 'key', pattern);
+ pattern_tree_register(tree, pattern + '/', 'key', pattern + '/');
+ }
+ returnval([tree, [...pattern_tree_search(tree, arguments[2])]]);
+ }''',
+ patterns, bad_patterns, url)
+ assert expected == result
+
+ # Also verify that deregistering all patterns with trailing slash works
+ # correctly.
+ expected = [{'key': p} for p in patterns]
+ tree, result = execute_in_page(
+ '''{
+ const tree = arguments[0];
+ for (const pattern of arguments[1])
+ pattern_tree_deregister(tree, pattern + '/', 'key');
+ returnval([tree, [...pattern_tree_search(tree, arguments[2])]]);
+ }''',
+ tree, patterns, url)
+ assert expected == result
+
+ # Also verify that deregistering all the patterns works correctly.
+ tree = execute_in_page(
+ '''{
+ const tree = arguments[0];
+ for (const pattern of arguments[1])
+ pattern_tree_deregister(tree, pattern, 'key');
+ for (const pattern of arguments[2]) {
+ pattern_tree_deregister(tree, pattern, 'key');
+ pattern_tree_deregister(tree, pattern + '/', 'key');
+ }
+ returnval(tree);
+ }''',
+ tree, patterns, bad_patterns)
+ assert tree == {}