diff options
Diffstat (limited to 'tests/test_pattern_tree.py')
-rw-r--r-- | tests/test_pattern_tree.py | 454 |
1 files changed, 454 insertions, 0 deletions
diff --git a/tests/test_pattern_tree.py b/tests/test_pattern_tree.py new file mode 100644 index 0000000..4238d66 --- /dev/null +++ b/tests/test_pattern_tree.py @@ -0,0 +1,454 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org> +# +# Available under the terms of Creative Commons Zero v1.0 Universal. + +import pytest +import re +import dataclasses as dc + +from immutables import Map + +from hydrilla import pattern_tree + +from .url_patterns_common import * + +@pytest.mark.parametrize('_in, out', [ + (Map(), True), + ({'children': Map(non_empty='non_emtpy')}, False), + ({'literal_match': 'non-None'}, False), + ({'children': Map(non_empty='non_emtpy')}, False), + ({'literal_match': 'non-None', 'children': 'non-empty'}, False) +]) +def test_pattern_tree_node_is_empty(_in, out): + """....""" + assert pattern_tree.PatternTreeNode(**_in).is_empty() == out + +def test_pattern_tree_node_update_literal_match(): + """....""" + node1 = pattern_tree.PatternTreeNode() + node2 = node1.update_literal_match('dummy match item') + + assert node1.literal_match is None + assert node2.literal_match == 'dummy match item' + +def test_pattern_tree_node_get_child(): + """....""" + node = pattern_tree.PatternTreeNode(children=Map(dummy_key='dummy_val')) + + assert node.get_child('dummy_key') == 'dummy_val' + assert node.get_child('other_key') is None + +def test_pattern_tree_node_remove_child(): + """....""" + node1 = pattern_tree.PatternTreeNode(children=Map(dummy_key='dummy_val')) + node2 = node1.remove_child('dummy_key') + + assert node1.children == Map(dummy_key='dummy_val') + assert node2.children == Map() + +def test_pattern_tree_node_set_child(): + """....""" + node1 = pattern_tree.PatternTreeNode(children=Map(dummy_key='dummy_val')) + node2 = node1.set_child('other_key', 'other_val') + + assert node1.children == Map(dummy_key='dummy_val') + assert node2.children == Map(dummy_key='dummy_val', other_key='other_val') + +@pytest.mark.parametrize('root_empty', [True, False]) +def test_pattern_tree_branch_is_empty(root_empty): + """....""" + class DummyEmptyRoot: + """....""" + is_empty = lambda: root_empty + + branch = pattern_tree.PatternTreeBranch(root_node=DummyEmptyRoot) + assert branch.is_empty() == root_empty + +# def test_pattern_tree_branch_copy(): +# """....""" +# class DummyRoot: +# """....""" +# pass + +# branch1 = pattern_tree.PatternTreeBranch(root_node=DummyRoot) +# branch2 = branch1.copy() + +# assert branch1 is not branch2 +# for val_b1, val_b2 in zip(dc.astuple(branch1), dc.astuple(branch2)): +# assert val_b1 is val_b2 + +@pytest.fixture +def empty_branch(): + """....""" + return pattern_tree.PatternTreeBranch( + root_node = pattern_tree.PatternTreeNode() + ) + +@pytest.fixture +def branch_with_a_b(): + """....""" + return pattern_tree.PatternTreeBranch( + root_node = pattern_tree.PatternTreeNode( + children = Map( + a = pattern_tree.PatternTreeNode( + children = Map( + b = pattern_tree.PatternTreeNode( + literal_match = frozenset({'myitem'}) + ) + ) + ) + ) + ) + ) + +def test_pattern_tree_branch_update_add_first(empty_branch, branch_with_a_b): + """....""" + updated_branch = empty_branch.update( + ['a', 'b'], + lambda s: frozenset({*(s or []), 'myitem'}) + ) + + assert updated_branch == branch_with_a_b + assert empty_branch.root_node.children == Map() + +def test_pattern_tree_branch_update_add_second(branch_with_a_b): + """....""" + updated_branch = branch_with_a_b.update( + ['a', 'b'], + lambda s: frozenset({*(s or []), 'myotheritem'}) + ) + + leaf_node = updated_branch.root_node.children['a'].children['b'] + assert leaf_node.literal_match == frozenset({'myitem', 'myotheritem'}) + +def test_pattern_tree_branch_update_add_different_path(branch_with_a_b): + """....""" + updated_branch = branch_with_a_b.update( + ['a', 'not_b'], + lambda s: frozenset({*(s or []), 'myotheritem'}) + ) + + for segment, item in [('b', 'myitem'), ('not_b', 'myotheritem')]: + leaf_node = updated_branch.root_node.children['a'].children[segment] + assert leaf_node.literal_match == frozenset({item}) + +# def test_pattern_tree_branch_update_is_value_copied(branch_with_a_b): +# """....""" +# updated_branch = branch_with_a_b.update(['a', 'b'], lambda s: s) + +# leaf_node_orig = updated_branch.root_node.children['a'].children['b'] +# leaf_node_new = branch_with_a_b.root_node.children['a'].children['b'] + +# assert leaf_node_orig.literal_match == leaf_node_new.literal_match +# assert leaf_node_orig.literal_match is not leaf_node_new.literal_match + +def test_pattern_tree_branch_remove(branch_with_a_b, empty_branch): + """....""" + updated_branch = branch_with_a_b.update(['a', 'b'], lambda s: None) + + assert updated_branch == empty_branch + +def test_pattern_tree_branch_search_empty(empty_branch): + """....""" + assert [*empty_branch.search(['a', 'b'])] == [] + +@pytest.fixture +def branch_with_wildcards(): + """....""" + return pattern_tree.PatternTreeBranch( + root_node = pattern_tree.PatternTreeNode( + children = Map( + a = pattern_tree.PatternTreeNode( + children = Map( + b = pattern_tree.PatternTreeNode( + children = Map({ + 'c': pattern_tree.PatternTreeNode( + literal_match = 'dummy/c' + ), + '*': pattern_tree.PatternTreeNode( + literal_match = 'dummy/*' + ), + '**': pattern_tree.PatternTreeNode( + literal_match = 'dummy/**' + ), + '***': pattern_tree.PatternTreeNode( + literal_match = 'dummy/***' + ) + }) + ) + ) + ) + ) + ) + ) + +@pytest.mark.parametrize('_in, out', [ + (['a'], []), + (['a', 'x', 'y', 'z'], []), + (['a', 'b'], ['dummy/***']), + (['a', 'b', 'c'], ['dummy/c', 'dummy/*', 'dummy/***']), + (['a', 'b', 'u'], ['dummy/*', 'dummy/***']), + (['a', 'b', '*'], ['dummy/*', 'dummy/***']), + (['a', 'b', '**'], ['dummy/**', 'dummy/*', 'dummy/***']), + (['a', 'b', '***'], ['dummy/***', 'dummy/*']), + (['a', 'b', 'u', 'l'], ['dummy/**', 'dummy/***']), + (['a', 'b', 'u', 'l', 'y'], ['dummy/**', 'dummy/***']) +]) +def test_pattern_tree_branch_search_wildcards(_in, out, branch_with_wildcards): + """....""" + assert [*branch_with_wildcards.search(_in)] == out + +def test_filter_by_trailing_slash(sample_url_parsed): + """....""" + sample_url_parsed2 = dc.replace(sample_url_parsed, has_trailing_slash=True) + item1 = pattern_tree.StoredTreeItem(sample_url_parsed, 'dummy_it1') + item2 = pattern_tree.StoredTreeItem(sample_url_parsed2, 'dummy_it2') + + assert pattern_tree.filter_by_trailing_slash((item1, item2), False) == \ + frozenset({item1}) + + assert pattern_tree.filter_by_trailing_slash((item1, item2), True) == \ + frozenset({item2}) + +@pytest.mark.parametrize('register_mode', [True, False]) +@pytest.mark.parametrize('empty_at_start', [True, False]) +@pytest.mark.parametrize('empty_at_end', [True, False]) +def test_pattern_tree_privatemethod_register( + register_mode, + empty_at_start, + empty_at_end, + monkeypatch, + sample_url_parsed +): + """....""" + dummy_it = pattern_tree.StoredTreeItem(sample_url_parsed, 'dummy_it') + other_dummy_it = pattern_tree.StoredTreeItem( + sample_url_parsed, + 'other_dummy_it' + ) + + class MockedTreeBranch: + """....""" + def is_empty(self): + """....""" + return empty_at_end + + def update(self, segments, item_updater): + """....""" + if segments == ('com', 'example'): + return self._update_as_domain_branch(item_updater) + else: + assert segments == ('aa', 'bb') + return self._update_as_path_branch(item_updater) + + def _update_as_domain_branch(self, item_updater): + """....""" + for updater_input in (None, MockedTreeBranch()): + updated = item_updater(updater_input) + if empty_at_end: + assert updated is None + else: + assert type(updated) is MockedTreeBranch + + return MockedTreeBranch() + + def _update_as_path_branch(self, item_updater): + """....""" + set_with_1_item = frozenset() + set_with_2_items = frozenset({dummy_it, other_dummy_it}) + for updater_input in (None, set_with_1_item, set_with_2_items): + updated = item_updater(updater_input) + if register_mode: + assert dummy_it in updated + elif updater_input is set_with_2_items: + assert dummy_it not in updated + else: + assert updated is None + + return MockedTreeBranch() + + monkeypatch.setattr(pattern_tree, 'PatternTreeBranch', MockedTreeBranch) + + initial_root = Map() if empty_at_start else \ + Map({('http', 80): MockedTreeBranch()}) + + tree = pattern_tree.PatternTree(_by_scheme_and_port=initial_root) + + new_tree = tree._register( + sample_url_parsed, + 'dummy_it', + register=register_mode + ) + + assert new_tree is not tree + + if empty_at_end: + assert new_tree._by_scheme_and_port == Map() + else: + assert len(new_tree._by_scheme_and_port) == 1 + assert type(new_tree._by_scheme_and_port[('http', 80)]) is \ + MockedTreeBranch + +# @pytest.mark.parametrize('register_mode', [True, False]) +# def test_pattern_tree_privatemethod_register( +# register_mode, +# monkeypatch, +# sample_url_parsed +# ): +# """....""" +# registered_count = 0 + +# def mocked_parse_pattern(url_pattern): +# """....""" +# assert url_pattern == 'dummy_pattern' + +# for _ in range(2): +# yield sample_url_parsed + +# monkeypatch.setattr(pattern_tree, 'parse_pattern', mocked_parse_pattern) + +# def mocked_reconstruct_url(self): +# """....""" +# return 'dummy_reconstructed_pattern' + +# monkeypatch.setattr(pattern_tree.ParsedUrl, 'reconstruct_url', +# mocked_reconstruct_url) + +# def mocked_register_with_parsed_pattern( +# self, +# parsed_pat, +# wrapped_item, +# register=True +# ): +# """....""" +# nonlocal registered_count + +# assert parsed_pat is sample_url_parsed +# assert wrapped_item.pattern == 'dummy_reconstructed_pattern' +# assert register == register_mode + +# registered_count += 1 + +# return 'dummy_new_tree' if registered_count == 2 else dc.replace(self) + +# monkeypatch.setattr( +# pattern_tree.PatternTree, +# '_register_with_parsed_pattern', +# mocked_register_with_parsed_pattern +# ) + +# pattern_tree = pattern_tree.PatternTree() + +# new_tree = pattern_tree._register( +# 'dummy_pattern', +# 'dummy_item', +# register_mode +# ) + +# assert new_tree == 'dummy_new_tree' + +@pytest.mark.parametrize('method_name, register_mode', [ + ('register', True), + ('deregister', False) +]) +def test_pattern_tree_register(method_name, register_mode, monkeypatch): + """....""" + def mocked_privatemethod_register(self, parsed_pat, item, register=True): + """....""" + assert (parsed_pat, item, register) == \ + ('dummy_pattern', 'dummy_url', register_mode) + + return 'dummy_new_tree' + + monkeypatch.setattr( + pattern_tree.PatternTree, + '_register', + mocked_privatemethod_register + ) + + method = getattr(pattern_tree.PatternTree(), method_name) + assert method('dummy_pattern', 'dummy_url') == 'dummy_new_tree' + +@pytest.fixture +def mock_parse_url(monkeypatch, sample_url_parsed): + """....""" + def mocked_parse_url(url): + """....""" + assert url == 'dummy_url' + return dc.replace( + sample_url_parsed, + **getattr(mocked_parse_url, 'url_mod', {}) + ) + + monkeypatch.setattr(pattern_tree, 'parse_url', mocked_parse_url) + + return mocked_parse_url + +@pytest.mark.usefixtures('mock_parse_url') +def test_pattern_tree_search_empty(sample_url_parsed): + """....""" + for url in ('dummy_url', sample_url_parsed): + assert [*pattern_tree.PatternTree().search(url)] == [] + +@pytest.mark.parametrize('url_mod, out', [ + ({}, + ['dummy_set_A', 'dummy_set_B', 'dummy_set_C']), + + ({'has_trailing_slash': True}, + ['dummy_set_A_with_slash', 'dummy_set_A', + 'dummy_set_B_with_slash', 'dummy_set_B', + 'dummy_set_C_with_slash', 'dummy_set_C']) +]) +def test_pattern_tree_search( + url_mod, + out, + monkeypatch, + sample_url_parsed, + mock_parse_url, +): + """....""" + mock_parse_url.url_mod = url_mod + + dummy_tree_contents = [ + ['dummy_set_A', 'dummy_set_B'], + [], + ['dummy_empty_set'] * 3, + ['dummy_set_C'] + ] + + def mocked_filter_by_trailing_slash(items, with_slash): + """....""" + if items == 'dummy_empty_set': + return frozenset() + + return items + ('_with_slash' if with_slash else '') + + monkeypatch.setattr(pattern_tree, 'filter_by_trailing_slash', + mocked_filter_by_trailing_slash) + + class MockedDomainBranch: + """....""" + def search(self, labels): + """....""" + assert labels == sample_url_parsed.domain_labels + + for item_sets in dummy_tree_contents: + class MockedPathBranch: + """....""" + def search(self, segments, item_sets=item_sets): + """....""" + assert segments == sample_url_parsed.path_segments + + for dummy_items_set in item_sets: + yield dummy_items_set + + yield MockedPathBranch() + + tree = pattern_tree.PatternTree( + _by_scheme_and_port = {('http', 80): MockedDomainBranch()} + ) + + for url in ('dummy_url', mock_parse_url('dummy_url')): + assert [*tree.search(url)] == out |