aboutsummaryrefslogtreecommitdiff
# SPDX-License-Identifier: CC0-1.0

"""
Haketilo unit tests - URL patterns
"""

# This file is part of Haketilo
#
# Copyright (C) 2021, 2022 Wojtek Kosior <koszko@koszko.org>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the CC0 1.0 Universal License as published by
# the Creative Commons Corporation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# CC0 1.0 Universal License for more details.

import pytest

from ..script_loader import load_script

@pytest.mark.get_page('https://gotmyowndoma.in')
def test_modify_branch(execute_in_page):
    """
    patterns_query_tree.js contains Pattern Tree data structure that allows
    arrays of string labels to be mapped to items.
    Verify operations modifying a single branch of such tree work properly.
    """
    execute_in_page(load_script('common/patterns_query_tree.js'))
    execute_in_page(
        '''
        let items_added;
        let items_removed;

        function _item_adder(item, array)
        {
            items_added++;
            return [...(array || []), item];
        }

        function item_adder(item)
        {
            items_added = 0;
            return array => _item_adder(item, array);
        }

        function _item_remover(array)
        {
            if (array !== null) {
                items_removed++;
                array.pop();
            }
            return (array && array.length > 0) ? array : null;
        }

        function item_remover()
        {
            items_removed = 0;
            return _item_remover;
        }''')

    # Let's construct some tree branch while checking that each addition gives
    # the right result.
    branch = execute_in_page(
        '''{
        const branch = empty_node();
        modify_sequence(branch, ['com', 'example'], item_adder('some_item'));
        returnval(branch);
        }''')
    assert branch == {
        'c': {
            'com': {
                'c': {
                    'example': {
                        'l': ['some_item']
                    }
                }
            }
        }
    }

    branch, items_added = execute_in_page(
        '''{
        const branch = arguments[0];
        modify_sequence(branch, ['com', 'example'], item_adder('other_item'));
        returnval([branch, items_added]);
        }''', branch)
    assert items_added == 1
    assert branch['c']['com']['c']['example']['l'] \
        == ['some_item', 'other_item']

    for i in range(3):
        for expected_array in [['third_item'], ['third_item', '4th_item']]:
            wildcard = '*' * (i + 1)
            branch, items_added = execute_in_page(
                '''{
                const branch = arguments[0];
                modify_sequence(branch, ['com', 'sample', arguments[1]],
                                item_adder(arguments[2]));
                returnval([branch, items_added]);
                }''',
                branch, wildcard, expected_array[-1])
            assert items_added == 2
            sample = branch['c']['com']['c']['sample']
            assert sample[wildcard] == expected_array
            assert sample['c'][wildcard]['l'] == expected_array

    branch, items_added = execute_in_page(
        '''{
        const branch = arguments[0];
        modify_sequence(branch, ['org', 'koszko', '***', '123'],
                        item_adder('5th_item'));
        returnval([branch, items_added]);
        }''',
        branch)
    assert items_added == 1
    assert branch['c']['org']['c']['koszko']['c']['***']['c']['123']['l'] \
        == ['5th_item']

    # Let's verify that removing a nonexistent element doesn't modify the tree.
    branch2, items_removed = execute_in_page(
        '''{
        const branch = arguments[0];
        modify_sequence(branch, ['com', 'not', 'registered', '*'],
                        item_remover());
        returnval([branch, items_removed]);
        }''',
        branch)
    assert branch == branch2
    assert items_removed == 0

    # Let's remove all elements in the tree branch while checking that each
    # removal gives the right result.
    branch, items_removed = execute_in_page(
        '''{
        const branch = arguments[0];
        modify_sequence(branch, ['org', 'koszko', '***', '123'],
                        item_remover());
        returnval([branch, items_removed]);
        }''',
        branch)
    assert items_removed == 1
    assert 'org' not in branch['c']

    for i in range(3):
        for expected_array in [['third_item'], None]:
            wildcard = '*' * (i + 1)
            branch, items_removed = execute_in_page(
                '''{
                const branch = arguments[0];
                modify_sequence(branch, ['com', 'sample', arguments[1]],
                                item_remover());
                returnval([branch, items_removed]);
                }''',
                branch, wildcard)
            assert items_removed == 2
            if i == 2 and expected_array == []:
                break
            sample = branch['c']['com']['c'].get('sample', {})
            assert sample.get(wildcard) == expected_array
            assert sample.get('c', {}).get(wildcard, {}).get('l') \
                == expected_array

    for i in range(2):
        branch, items_removed = execute_in_page(
            '''{
            const branch = arguments[0];
            modify_sequence(branch, ['com', 'example'], item_remover());
            returnval([branch, items_removed]);
            }''',
            branch)
        assert items_removed == 1
        if i == 0:
            assert branch['c']['com']['c']['example']['l'] == ['some_item']

    assert branch == {}

@pytest.mark.get_page('https://gotmyowndoma.in')
def test_search_branch(execute_in_page):
    """
    patterns_query_tree.js contains Pattern Tree data structure that allows
    arrays of string labels to be mapped to items.
    Verify searching a single branch of such tree work properly.
    """
    execute_in_page(load_script('common/patterns_query_tree.js'))
    execute_in_page(
        '''
        const item_adder = item => (array => [...(array || []), item]);
        ''')

    # Let's construct some tree branch to test on.
    execute_in_page(
        '''
        var branch = empty_node();

        for (const [item, sequence] of [
            ['(root)', []],
            ['***',    ['***']],
            ['**',     ['**']],
            ['*',      ['*']],

            ['a',      ['a']],
            ['A',      ['a']],
            ['b',      ['b']],

            ['a/***',  ['a', '***']],
            ['A/***',  ['a', '***']],
            ['a/**',   ['a', '**']],
            ['A/**',   ['a', '**']],
            ['a/*',    ['a', '*']],
            ['A/*',    ['a', '*']],
            ['a/sth',  ['a', 'sth']],
            ['A/sth',  ['a', 'sth']],

            ['b/***',  ['b', '***']],
            ['b/**',   ['b', '**']],
            ['b/*',    ['b', '*']],
            ['b/sth',  ['b', 'sth']],
        ])
            modify_sequence(branch, sequence, item_adder(item));
        ''')

    # Let's make the actual searches on our testing branch.
    for sequence, expected in [
            ([],      [{'(root)'},                            {'***'}]),
            (['a'],   [{'a', 'A'}, {'a/***', 'A/***'}, {'*'}, {'***'}]),
            (['b'],   [{'b'},      {'b/***'},          {'*'}, {'***'}]),
            (['c'],   [                                {'*'}, {'***'}]),
            (['***'], [{'***'},                        {'*'}         ]),
            (['**'],  [{'**'},                         {'*'}, {'***'}]),
            (['**'],  [{'**'},                         {'*'}, {'***'}]),
            (['*'],   [{'*'},                                 {'***'}]),

            (['a', 'sth'], [{'a/sth', 'A/sth'}, {'a/*', 'A/*'}, {'a/***', 'A/***'}, {'**'}, {'***'}]),
            (['b', 'sth'], [{'b/sth'},          {'b/*'},        {'b/***'},          {'**'}, {'***'}]),
            (['a', 'hts'], [                    {'a/*', 'A/*'}, {'a/***', 'A/***'}, {'**'}, {'***'}]),
            (['b', 'hts'], [                    {'b/*'},        {'b/***'},          {'**'}, {'***'}]),
            (['a', '***'], [{'a/***', 'A/***'}, {'a/*', 'A/*'},                     {'**'}, {'***'}]),
            (['b', '***'], [{'b/***'},          {'b/*'},                            {'**'}, {'***'}]),
            (['a', '**'],  [{'a/**', 'A/**'},   {'a/*', 'A/*'}, {'a/***', 'A/***'}, {'**'}, {'***'}]),
            (['b', '**'],  [{'b/**'},           {'b/*'},        {'b/***'},          {'**'}, {'***'}]),
            (['a', '*'],   [{'a/*', 'A/*'},                     {'a/***', 'A/***'}, {'**'}, {'***'}]),
            (['b', '*'],   [{'b/*'},                            {'b/***'},          {'**'}, {'***'}]),

            (['a', 'c', 'd'], [{'a/**', 'A/**'}, {'a/***', 'A/***'}, {'**'}, {'***'}]),
            (['b', 'c', 'd'], [{'b/**'},         {'b/***'},          {'**'}, {'***'}])
    ]:
        result = execute_in_page(
            '''
            returnval([...search_sequence(branch, arguments[0])]);
            ''',
            sequence)

        try:
            assert len(result) == len(expected)

            for expected_set, result_array in zip(expected, result):
                assert len(expected_set) == len(result_array)
                assert expected_set      == set(result_array)
        except Exception as e:
            import sys
            print('sequence:', sequence, '\nexpected:', expected,
                  '\nresult:', result, file=sys.stderr)
            raise e from None

@pytest.mark.get_page('https://gotmyowndoma.in')
def test_pattern_tree(execute_in_page):
    """
    patterns_query_tree.js contains Pattern Tree data structure that allows
    arrays of string labels to be mapped to items.
    Verify operations on entire such tree work properly.
    """
    execute_in_page(load_script('common/patterns_query_tree.js'))

    # Perform tests with all possible patterns for a simple URL.
    url = 'https://example.com'
    patterns = [
        'https://example.com',
        'https://example.com/***',
        'https://***.example.com',
        'https://***.example.com/***'
    ]
    bad_patterns = [
        'http://example.com',
        'https://a.example.com',
        'https://*.example.com',
        'https://**.example.com',
        'https://example.com/a',
        'https://example.com/*',
        'https://example.com/**',
    ]

    expected = [{'key': p} for p in patterns]

    tree, result = execute_in_page(
        '''{
        const tree = pattern_tree_make();
        for (const pattern of arguments[0].concat(arguments[1])) {
            pattern_tree_register(tree, pattern,       'key', pattern);
            pattern_tree_register(tree, pattern + '/', 'key', pattern + '/');
        }
        returnval([tree, [...pattern_tree_search(tree, arguments[2])]]);
        }''',
        patterns, bad_patterns, url)
    assert expected == result

    # Also verify that deregistering half of the good patterns works correctly.
    patterns_removed = [pattern for i, pattern in enumerate(patterns) if i % 2]
    patterns = [pattern for i, pattern in enumerate(patterns) if not (i % 2)]
    expected = [{'key': p} for p in patterns]
    tree, result = execute_in_page(
        '''{
        const tree = arguments[0];
        for (const pattern of arguments[1]) {
            pattern_tree_deregister(tree, pattern,       'key');
            pattern_tree_deregister(tree, pattern + '/', 'key');
        }
        returnval([tree, [...pattern_tree_search(tree, arguments[2])]]);
        }''',
        tree, patterns_removed, url)
    assert expected == result

    # Also verify that deregistering all the patterns works correctly.
    tree = execute_in_page(
        '''{
        const tree = arguments[0];
        for (const pattern of arguments[1].concat(arguments[2])) {
            pattern_tree_deregister(tree, pattern,       'key');
            pattern_tree_deregister(tree, pattern + '/', 'key');
        }
        returnval(tree);
        }''',
        tree, patterns, bad_patterns)
    assert tree == {}

    # Perform tests with all possible patterns for a complex URL.
    url = 'http://settings.query.example.com/google/tries/destroy/adblockers//'
    patterns = [
        'http://settings.query.example.com/google/tries/destroy/adblockers',
        'http://settings.query.example.com/google/tries/destroy/adblockers/***',
        'http://settings.query.example.com/google/tries/destroy/*',
        'http://settings.query.example.com/google/tries/destroy/***',
        'http://settings.query.example.com/google/tries/**',
        'http://settings.query.example.com/google/tries/***',
        'http://settings.query.example.com/google/**',
        'http://settings.query.example.com/google/***',
        'http://settings.query.example.com/**',
        'http://settings.query.example.com/***',

        'http://***.settings.query.example.com/google/tries/destroy/adblockers',
        'http://***.settings.query.example.com/google/tries/destroy/adblockers/***',
        'http://***.settings.query.example.com/google/tries/destroy/*',
        'http://***.settings.query.example.com/google/tries/destroy/***',
        'http://***.settings.query.example.com/google/tries/**',
        'http://***.settings.query.example.com/google/tries/***',
        'http://***.settings.query.example.com/google/**',
        'http://***.settings.query.example.com/google/***',
        'http://***.settings.query.example.com/**',
        'http://***.settings.query.example.com/***',
        'http://*.query.example.com/google/tries/destroy/adblockers',
        'http://*.query.example.com/google/tries/destroy/adblockers/***',
        'http://*.query.example.com/google/tries/destroy/*',
        'http://*.query.example.com/google/tries/destroy/***',
        'http://*.query.example.com/google/tries/**',
        'http://*.query.example.com/google/tries/***',
        'http://*.query.example.com/google/**',
        'http://*.query.example.com/google/***',
        'http://*.query.example.com/**',
        'http://*.query.example.com/***',
        'http://***.query.example.com/google/tries/destroy/adblockers',
        'http://***.query.example.com/google/tries/destroy/adblockers/***',
        'http://***.query.example.com/google/tries/destroy/*',
        'http://***.query.example.com/google/tries/destroy/***',
        'http://***.query.example.com/google/tries/**',
        'http://***.query.example.com/google/tries/***',
        'http://***.query.example.com/google/**',
        'http://***.query.example.com/google/***',
        'http://***.query.example.com/**',
        'http://***.query.example.com/***',
        'http://**.example.com/google/tries/destroy/adblockers',
        'http://**.example.com/google/tries/destroy/adblockers/***',
        'http://**.example.com/google/tries/destroy/*',
        'http://**.example.com/google/tries/destroy/***',
        'http://**.example.com/google/tries/**',
        'http://**.example.com/google/tries/***',
        'http://**.example.com/google/**',
        'http://**.example.com/google/***',
        'http://**.example.com/**',
        'http://**.example.com/***',
        'http://***.example.com/google/tries/destroy/adblockers',
        'http://***.example.com/google/tries/destroy/adblockers/***',
        'http://***.example.com/google/tries/destroy/*',
        'http://***.example.com/google/tries/destroy/***',
        'http://***.example.com/google/tries/**',
        'http://***.example.com/google/tries/***',
        'http://***.example.com/google/**',
        'http://***.example.com/google/***',
        'http://***.example.com/**',
        'http://***.example.com/***'
    ]
    bad_patterns = [
        'https://settings.query.example.com/google/tries/destroy/adblockers',
        'http://settings.query.example.com/google/tries/destroy/adblockers/a',
        'http://settings.query.example.com/google/tries/destroy/adblockers/*',
        'http://settings.query.example.com/google/tries/destroy/adblockers/**',
        'http://settings.query.example.com/google/tries/destroy/a',
        'http://settings.query.example.com/google/tries/destroy/**',
        'http://settings.query.example.com/google/tries/*',
        'http://a.settings.query.example.com/google/tries/destroy/adblockers',
        'http://*.settings.query.example.com/google/tries/destroy/adblockers',
        'http://**.settings.query.example.com/google/tries/destroy/adblockers',
        'http://a.query.example.com/google/tries/destroy/adblockers',
        'http://**.query.example.com/google/tries/destroy/adblockers',
        'http://*.example.com/google/tries/destroy/adblockers'
    ]

    expected = [{'key': p + s} for p in patterns for s in ['/', '']]

    tree, result = execute_in_page(
        '''{
        const tree = pattern_tree_make();
        for (const pattern of arguments[0].concat(arguments[1])) {
            pattern_tree_register(tree, pattern,       'key', pattern);
            pattern_tree_register(tree, pattern + '/', 'key', pattern + '/');
        }
        returnval([tree, [...pattern_tree_search(tree, arguments[2])]]);
        }''',
        patterns, bad_patterns, url)
    assert expected == result

    # Also verify that deregistering all patterns with trailing slash works
    # correctly.
    expected = [{'key': p} for p in patterns]
    tree, result = execute_in_page(
        '''{
        const tree = arguments[0];
        for (const pattern of arguments[1])
            pattern_tree_deregister(tree, pattern + '/', 'key');
        returnval([tree, [...pattern_tree_search(tree, arguments[2])]]);
        }''',
        tree, patterns, url)
    assert expected == result

    # Also verify that deregistering all the patterns works correctly.
    tree = execute_in_page(
        '''{
        const tree = arguments[0];
        for (const pattern of arguments[1])
            pattern_tree_deregister(tree, pattern,       'key');
        for (const pattern of arguments[2]) {
            pattern_tree_deregister(tree, pattern,       'key');
            pattern_tree_deregister(tree, pattern + '/', 'key');
        }
        returnval(tree);
        }''',
        tree, patterns, bad_patterns)
    assert tree == {}