# SPDX-License-Identifier: CC0-1.0 """ Haketilo unit tests - URL patterns """ # This file is part of Haketilo # # Copyright (C) 2021, Wojtek Kosior # # This program is free software: you can redistribute it and/or modify # it under the terms of the CC0 1.0 Universal License as published by # the Creative Commons Corporation. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # CC0 1.0 Universal License for more details. import pytest from ..script_loader import load_script @pytest.mark.get_page('https://gotmyowndoma.in') def test_modify_branch(execute_in_page): """ patterns_query_tree.js contains Pattern Tree data structure that allows arrays of string labels to be mapped to items. Verify operations modifying a single branch of such tree work properly. """ execute_in_page(load_script('common/patterns_query_tree.js')) execute_in_page( ''' let items_added; let items_removed; function _item_adder(item, array) { items_added++; return [...(array || []), item]; } function item_adder(item) { items_added = 0; return array => _item_adder(item, array); } function _item_remover(array) { if (array !== null) { items_removed++; array.pop(); } return (array && array.length > 0) ? array : null; } function item_remover() { items_removed = 0; return _item_remover; }''') # Let's construct some tree branch while checking that each addition gives # the right result. branch = execute_in_page( '''{ const branch = empty_node(); modify_sequence(branch, ['com', 'example'], item_adder('some_item')); returnval(branch); }''') assert branch == { 'literal_match': None, 'wildcard_matches': [None, None, None], 'children': { 'com': { 'literal_match': None, 'wildcard_matches': [None, None, None], 'children': { 'example': { 'literal_match': ['some_item'], 'wildcard_matches': [None, None, None], 'children': { } } } } } } branch, items_added = execute_in_page( '''{ const branch = arguments[0]; modify_sequence(branch, ['com', 'example'], item_adder('other_item')); returnval([branch, items_added]); }''', branch) assert items_added == 1 assert branch['children']['com']['children']['example']['literal_match'] \ == ['some_item', 'other_item'] for i in range(3): for expected_array in [['third_item'], ['third_item', '4th_item']]: wildcard = '*' * (i + 1) branch, items_added = execute_in_page( '''{ const branch = arguments[0]; modify_sequence(branch, ['com', 'sample', arguments[1]], item_adder(arguments[2])); returnval([branch, items_added]); }''', branch, wildcard, expected_array[-1]) assert items_added == 2 sample = branch['children']['com']['children']['sample'] assert sample['wildcard_matches'][i] == expected_array assert sample['children'][wildcard]['literal_match'] \ == expected_array branch, items_added = execute_in_page( '''{ const branch = arguments[0]; modify_sequence(branch, ['org', 'koszko', '***', '123'], item_adder('5th_item')); returnval([branch, items_added]); }''', branch) assert items_added == 1 assert branch['children']['org']['children']['koszko']['children']['***']\ ['children']['123']['literal_match'] == ['5th_item'] # Let's verify that removing a nonexistent element doesn't modify the tree. branch2, items_removed = execute_in_page( '''{ const branch = arguments[0]; modify_sequence(branch, ['com', 'not', 'registered', '*'], item_remover()); returnval([branch, items_removed]); }''', branch) assert branch == branch2 assert items_removed == 0 # Let's remove all elements in the tree branch while checking that each # removal gives the right result. branch, items_removed = execute_in_page( '''{ const branch = arguments[0]; modify_sequence(branch, ['org', 'koszko', '***', '123'], item_remover()); returnval([branch, items_removed]); }''', branch) assert items_removed == 1 assert 'org' not in branch['children'] for i in range(3): for expected_array in [['third_item'], None]: wildcard = '*' * (i + 1) branch, items_removed = execute_in_page( '''{ const branch = arguments[0]; modify_sequence(branch, ['com', 'sample', arguments[1]], item_remover()); returnval([branch, items_removed]); }''', branch, wildcard) assert items_removed == 2 if i == 2 and expected_array == []: break sample = branch['children']['com']['children'].get('sample', {}) assert sample.get('wildcard_matches', [None, None, None])[i] \ == expected_array assert sample.get('children', {}).get(wildcard, {})\ .get('literal_match') == expected_array for i in range(2): branch, items_removed = execute_in_page( '''{ const branch = arguments[0]; modify_sequence(branch, ['com', 'example'], item_remover()); returnval([branch, items_removed]); }''', branch) assert items_removed == 1 if i == 0: assert branch['children']['com']['children']['example']\ ['literal_match'] == ['some_item'] else: assert branch == { 'literal_match': None, 'wildcard_matches': [None, None, None], 'children': { } } @pytest.mark.get_page('https://gotmyowndoma.in') def test_search_branch(execute_in_page): """ patterns_query_tree.js contains Pattern Tree data structure that allows arrays of string labels to be mapped to items. Verify searching a single branch of such tree work properly. """ execute_in_page(load_script('common/patterns_query_tree.js')) execute_in_page( ''' const item_adder = item => (array => [...(array || []), item]); ''') # Let's construct some tree branch to test on. execute_in_page( ''' var branch = empty_node(); for (const [item, sequence] of [ ['(root)', []], ['***', ['***']], ['**', ['**']], ['*', ['*']], ['a', ['a']], ['A', ['a']], ['b', ['b']], ['a/***', ['a', '***']], ['A/***', ['a', '***']], ['a/**', ['a', '**']], ['A/**', ['a', '**']], ['a/*', ['a', '*']], ['A/*', ['a', '*']], ['a/sth', ['a', 'sth']], ['A/sth', ['a', 'sth']], ['b/***', ['b', '***']], ['b/**', ['b', '**']], ['b/*', ['b', '*']], ['b/sth', ['b', 'sth']], ]) modify_sequence(branch, sequence, item_adder(item)); ''') # Let's make the actual searches on our testing branch. for sequence, expected in [ ([], [{'(root)'}, {'***'}]), (['a'], [{'a', 'A'}, {'a/***', 'A/***'}, {'*'}, {'***'}]), (['b'], [{'b'}, {'b/***'}, {'*'}, {'***'}]), (['c'], [ {'*'}, {'***'}]), (['***'], [{'***'}, {'*'} ]), (['**'], [{'**'}, {'*'}, {'***'}]), (['**'], [{'**'}, {'*'}, {'***'}]), (['*'], [{'*'}, {'***'}]), (['a', 'sth'], [{'a/sth', 'A/sth'}, {'a/*', 'A/*'}, {'a/***', 'A/***'}, {'**'}, {'***'}]), (['b', 'sth'], [{'b/sth'}, {'b/*'}, {'b/***'}, {'**'}, {'***'}]), (['a', 'hts'], [ {'a/*', 'A/*'}, {'a/***', 'A/***'}, {'**'}, {'***'}]), (['b', 'hts'], [ {'b/*'}, {'b/***'}, {'**'}, {'***'}]), (['a', '***'], [{'a/***', 'A/***'}, {'a/*', 'A/*'}, {'**'}, {'***'}]), (['b', '***'], [{'b/***'}, {'b/*'}, {'**'}, {'***'}]), (['a', '**'], [{'a/**', 'A/**'}, {'a/*', 'A/*'}, {'a/***', 'A/***'}, {'**'}, {'***'}]), (['b', '**'], [{'b/**'}, {'b/*'}, {'b/***'}, {'**'}, {'***'}]), (['a', '*'], [{'a/*', 'A/*'}, {'a/***', 'A/***'}, {'**'}, {'***'}]), (['b', '*'], [{'b/*'}, {'b/***'}, {'**'}, {'***'}]), (['a', 'c', 'd'], [{'a/**', 'A/**'}, {'a/***', 'A/***'}, {'**'}, {'***'}]), (['b', 'c', 'd'], [{'b/**'}, {'b/***'}, {'**'}, {'***'}]) ]: result = execute_in_page( ''' returnval([...search_sequence(branch, arguments[0])]); ''', sequence) try: assert len(result) == len(expected) for expected_set, result_array in zip(expected, result): assert len(expected_set) == len(result_array) assert expected_set == set(result_array) except Exception as e: import sys print('sequence:', sequence, '\nexpected:', expected, '\nresult:', result, file=sys.stderr) raise e from None @pytest.mark.get_page('https://gotmyowndoma.in') def test_pattern_tree(execute_in_page): """ patterns_query_tree.js contains Pattern Tree data structure that allows arrays of string labels to be mapped to items. Verify operations on entire such tree work properly. """ execute_in_page(load_script('common/patterns_query_tree.js')) # Perform tests with all possible patterns for a simple URL. url = 'https://example.com' patterns = [ 'https://example.com', 'https://example.com/***', 'https://***.example.com', 'https://***.example.com/***' ] bad_patterns = [ 'http://example.com', 'https://a.example.com', 'https://*.example.com', 'https://**.example.com', 'https://example.com/a', 'https://example.com/*', 'https://example.com/**', ] expected = [{'key': p} for p in patterns] tree, result = execute_in_page( '''{ const tree = pattern_tree_make(); for (const pattern of arguments[0].concat(arguments[1])) { pattern_tree_register(tree, pattern, 'key', pattern); pattern_tree_register(tree, pattern + '/', 'key', pattern + '/'); } returnval([tree, [...pattern_tree_search(tree, arguments[2])]]); }''', patterns, bad_patterns, url) assert expected == result # Also verify that deregistering half of the good patterns works correctly. patterns_removed = [pattern for i, pattern in enumerate(patterns) if i % 2] patterns = [pattern for i, pattern in enumerate(patterns) if not (i % 2)] expected = [{'key': p} for p in patterns] tree, result = execute_in_page( '''{ const tree = arguments[0]; for (const pattern of arguments[1]) { pattern_tree_deregister(tree, pattern, 'key'); pattern_tree_deregister(tree, pattern + '/', 'key'); } returnval([tree, [...pattern_tree_search(tree, arguments[2])]]); }''', tree, patterns_removed, url) assert expected == result # Also verify that deregistering all the patterns works correctly. tree = execute_in_page( '''{ const tree = arguments[0]; for (const pattern of arguments[1].concat(arguments[2])) { pattern_tree_deregister(tree, pattern, 'key'); pattern_tree_deregister(tree, pattern + '/', 'key'); } returnval(tree); }''', tree, patterns, bad_patterns) assert tree == {} # Perform tests with all possible patterns for a complex URL. url = 'http://settings.query.example.com/google/tries/destroy/adblockers//' patterns = [ 'http://settings.query.example.com/google/tries/destroy/adblockers', 'http://settings.query.example.com/google/tries/destroy/adblockers/***', 'http://settings.query.example.com/google/tries/destroy/*', 'http://settings.query.example.com/google/tries/destroy/***', 'http://settings.query.example.com/google/tries/**', 'http://settings.query.example.com/google/tries/***', 'http://settings.query.example.com/google/**', 'http://settings.query.example.com/google/***', 'http://settings.query.example.com/**', 'http://settings.query.example.com/***', 'http://***.settings.query.example.com/google/tries/destroy/adblockers', 'http://***.settings.query.example.com/google/tries/destroy/adblockers/***', 'http://***.settings.query.example.com/google/tries/destroy/*', 'http://***.settings.query.example.com/google/tries/destroy/***', 'http://***.settings.query.example.com/google/tries/**', 'http://***.settings.query.example.com/google/tries/***', 'http://***.settings.query.example.com/google/**', 'http://***.settings.query.example.com/google/***', 'http://***.settings.query.example.com/**', 'http://***.settings.query.example.com/***', 'http://*.query.example.com/google/tries/destroy/adblockers', 'http://*.query.example.com/google/tries/destroy/adblockers/***', 'http://*.query.example.com/google/tries/destroy/*', 'http://*.query.example.com/google/tries/destroy/***', 'http://*.query.example.com/google/tries/**', 'http://*.query.example.com/google/tries/***', 'http://*.query.example.com/google/**', 'http://*.query.example.com/google/***', 'http://*.query.example.com/**', 'http://*.query.example.com/***', 'http://***.query.example.com/google/tries/destroy/adblockers', 'http://***.query.example.com/google/tries/destroy/adblockers/***', 'http://***.query.example.com/google/tries/destroy/*', 'http://***.query.example.com/google/tries/destroy/***', 'http://***.query.example.com/google/tries/**', 'http://***.query.example.com/google/tries/***', 'http://***.query.example.com/google/**', 'http://***.query.example.com/google/***', 'http://***.query.example.com/**', 'http://***.query.example.com/***', 'http://**.example.com/google/tries/destroy/adblockers', 'http://**.example.com/google/tries/destroy/adblockers/***', 'http://**.example.com/google/tries/destroy/*', 'http://**.example.com/google/tries/destroy/***', 'http://**.example.com/google/tries/**', 'http://**.example.com/google/tries/***', 'http://**.example.com/google/**', 'http://**.example.com/google/***', 'http://**.example.com/**', 'http://**.example.com/***', 'http://***.example.com/google/tries/destroy/adblockers', 'http://***.example.com/google/tries/destroy/adblockers/***', 'http://***.example.com/google/tries/destroy/*', 'http://***.example.com/google/tries/destroy/***', 'http://***.example.com/google/tries/**', 'http://***.example.com/google/tries/***', 'http://***.example.com/google/**', 'http://***.example.com/google/***', 'http://***.example.com/**', 'http://***.example.com/***' ] bad_patterns = [ 'https://settings.query.example.com/google/tries/destroy/adblockers', 'http://settings.query.example.com/google/tries/destroy/adblockers/a', 'http://settings.query.example.com/google/tries/destroy/adblockers/*', 'http://settings.query.example.com/google/tries/destroy/adblockers/**', 'http://settings.query.example.com/google/tries/destroy/a', 'http://settings.query.example.com/google/tries/destroy/**', 'http://settings.query.example.com/google/tries/*', 'http://a.settings.query.example.com/google/tries/destroy/adblockers', 'http://*.settings.query.example.com/google/tries/destroy/adblockers', 'http://**.settings.query.example.com/google/tries/destroy/adblockers', 'http://a.query.example.com/google/tries/destroy/adblockers', 'http://**.query.example.com/google/tries/destroy/adblockers', 'http://*.example.com/google/tries/destroy/adblockers' ] expected = [{'key': p + s} for p in patterns for s in ['/', '']] tree, result = execute_in_page( '''{ const tree = pattern_tree_make(); for (const pattern of arguments[0].concat(arguments[1])) { pattern_tree_register(tree, pattern, 'key', pattern); pattern_tree_register(tree, pattern + '/', 'key', pattern + '/'); } returnval([tree, [...pattern_tree_search(tree, arguments[2])]]); }''', patterns, bad_patterns, url) assert expected == result # Also verify that deregistering all patterns with trailing slash works # correctly. expected = [{'key': p} for p in patterns] tree, result = execute_in_page( '''{ const tree = arguments[0]; for (const pattern of arguments[1]) pattern_tree_deregister(tree, pattern + '/', 'key'); returnval([tree, [...pattern_tree_search(tree, arguments[2])]]); }''', tree, patterns, url) assert expected == result # Also verify that deregistering all the patterns works correctly. tree = execute_in_page( '''{ const tree = arguments[0]; for (const pattern of arguments[1]) pattern_tree_deregister(tree, pattern, 'key'); for (const pattern of arguments[2]) { pattern_tree_deregister(tree, pattern, 'key'); pattern_tree_deregister(tree, pattern + '/', 'key'); } returnval(tree); }''', tree, patterns, bad_patterns) assert tree == {}