diff options
Diffstat (limited to 'test/unit/test_patterns.py')
-rw-r--r-- | test/unit/test_patterns.py | 154 |
1 files changed, 154 insertions, 0 deletions
diff --git a/test/unit/test_patterns.py b/test/unit/test_patterns.py new file mode 100644 index 0000000..802bf4e --- /dev/null +++ b/test/unit/test_patterns.py @@ -0,0 +1,154 @@ +# SPDX-License-Identifier: CC0-1.0 + +""" +Haketilo unit tests - URL patterns +""" + +# This file is part of Haketilo +# +# Copyright (C) 2021, Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the CC0 1.0 Universal License as published by +# the Creative Commons Corporation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# CC0 1.0 Universal License for more details. + +import pytest + +from ..script_loader import load_script + +@pytest.fixture(scope="session") +def patterns_code(): + yield load_script('common/patterns.js', ['common']) + +def test_regexes(execute_in_page, patterns_code): + """ + patterns.js contains regexes used for URL parsing. + Verify they work properly. + """ + execute_in_page(patterns_code, page='https://gotmyowndoma.in') + + valid_url = 'https://example.com/a/b?ver=1.2.3#heading2' + valid_url_rest = 'example.com/a/b?ver=1.2.3#heading2' + + # Test matching of URL protocol. + match = execute_in_page('returnval(proto_regex.exec(arguments[0]));', + valid_url) + assert match + assert match[1] == 'https' + assert match[2] == valid_url_rest + + match = execute_in_page('returnval(proto_regex.exec(arguments[0]));', + '://bad-url.missing/protocol') + assert match is None + + # Test matching of http(s) URLs. + match = execute_in_page('returnval(http_regex.exec(arguments[0]));', + valid_url_rest) + assert match + assert match[1] == 'example.com' + assert match[2] == '/a/b' + assert match[3] == '?ver=1.2.3' + + match = execute_in_page('returnval(http_regex.exec(arguments[0]));', + 'another.example.com') + assert match + assert match[1] == 'another.example.com' + assert match[2] == '' + assert match[3] == '' + + match = execute_in_page('returnval(http_regex.exec(arguments[0]));', + '/bad/http/example') + assert match == None + + # Test matching of file URLs. + match = execute_in_page('returnval(file_regex.exec(arguments[0]));', + '/good/file/example') + assert match + assert match[1] == '/good/file/example' + + # Test matching of ftp URLs. + match = execute_in_page('returnval(ftp_regex.exec(arguments[0]));', + 'example.com/a/b#heading2') + assert match + assert match[1] is None + assert match[2] == 'example.com' + assert match[3] == '/a/b' + + match = execute_in_page('returnval(ftp_regex.exec(arguments[0]));', + 'some_user@localhost') + assert match + assert match[1] == 'some_user@' + assert match[2] == 'localhost' + assert match[3] == '' + + match = execute_in_page('returnval(ftp_regex.exec(arguments[0]));', + '@bad.url/') + assert match is None + +def test_deconstruct_url(execute_in_page, patterns_code): + """ + patterns.js contains deconstruct_url() function that handles URL parsing. + Verify it works properly. + """ + execute_in_page(patterns_code, page='https://gotmyowndoma.in') + + deco = execute_in_page('returnval(deconstruct_url(arguments[0]));', + 'https://eXaMpLe.com/a/b?ver=1.2.3#heading2') + assert deco + assert deco['trailing_slash'] == False + assert deco['proto'] == 'https' + assert deco['domain'] == ['example', 'com'] + assert deco['path'] == ['a', 'b'] + + deco = execute_in_page('returnval(deconstruct_url(arguments[0]));', + 'http://**.example.com/') + assert deco + assert deco['trailing_slash'] == True + assert deco['proto'] == 'http' + assert deco['domain'] == ['**', 'example', 'com'] + assert deco['path'] == [] + + deco = execute_in_page('returnval(deconstruct_url(arguments[0]));', + 'ftp://user@ftp.example.com/all///passwords.txt/') + assert deco + assert deco['trailing_slash'] == True + assert deco['proto'] == 'ftp' + assert deco['domain'] == ['ftp', 'example', 'com'] + assert deco['path'] == ['all', 'passwords.txt'] + + deco = execute_in_page('returnval(deconstruct_url(arguments[0]));', + 'ftp://mirror.edu.pl.eu.org') + assert deco + assert deco['trailing_slash'] == False + assert deco['proto'] == 'ftp' + assert deco['domain'] == ['mirror', 'edu', 'pl', 'eu', 'org'] + assert deco['path'] == [] + + deco = execute_in_page('returnval(deconstruct_url(arguments[0]));', + 'file:///mnt/parabola_chroot///etc/passwd') + assert deco + assert deco['trailing_slash'] == False + assert deco['proto'] == 'file' + assert deco['path'] == ['mnt', 'parabola_chroot', 'etc', 'passwd'] + assert 'domain' not in deco + + for bad_url in [ + '://bad-url.missing/protocol', + 'http:/example.com/a/b', + 'unknown://example.com/a/b', + 'idontfancypineapple', + 'ftp://@example.org/', + 'https:///some/path/', + 'file://non-absolute/path' + ]: + with pytest.raises(Exception, match=r'Error in injected script'): + deco = execute_in_page('returnval(deconstruct_url(arguments[0]));', + bad_url) + + # at some point we might also consider testing url deconstruction with + # length limits... |