From 5c583de820c0d5f666a830ca1e8205fe7d55e61e Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Wed, 1 Dec 2021 21:08:03 +0100 Subject: start implementing more efficient querying of URL patterns --- test/unit/test_patterns.py | 62 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'test/unit/test_patterns.py') diff --git a/test/unit/test_patterns.py b/test/unit/test_patterns.py index 4162fc0..4cfc10c 100644 --- a/test/unit/test_patterns.py +++ b/test/unit/test_patterns.py @@ -89,3 +89,65 @@ def test_regexes(execute_in_page, patterns_code): match = execute_in_page('returnval(ftp_regex.exec(arguments[0]));', '@bad.url/') assert match is None + +def test_deconstruct_url(execute_in_page, patterns_code): + """ + patterns.js contains deconstruct_url() function that handles URL parsing. + Verify it works properly. + """ + execute_in_page(patterns_code, page='https://gotmyowndoma.in') + + deco = execute_in_page('returnval(deconstruct_url(arguments[0]));', + 'https://eXaMpLe.com/a/b?ver=1.2.3#heading2') + assert deco + assert deco['trailing_dash'] == False + assert deco['proto'] == 'https' + assert deco['domain'] == ['example', 'com'] + assert deco['path'] == ['a', 'b'] + + deco = execute_in_page('returnval(deconstruct_url(arguments[0]));', + 'http://**.example.com/') + assert deco + assert deco['trailing_dash'] == True + assert deco['proto'] == 'http' + assert deco['domain'] == ['**', 'example', 'com'] + assert deco['path'] == [] + + deco = execute_in_page('returnval(deconstruct_url(arguments[0]));', + 'ftp://user@ftp.example.com/all///passwords.txt/') + assert deco + assert deco['trailing_dash'] == True + assert deco['proto'] == 'ftp' + assert deco['domain'] == ['ftp', 'example', 'com'] + assert deco['path'] == ['all', 'passwords.txt'] + + deco = execute_in_page('returnval(deconstruct_url(arguments[0]));', + 'ftp://mirror.edu.pl.eu.org') + assert deco + assert deco['trailing_dash'] == False + assert deco['proto'] == 'ftp' + assert deco['domain'] == ['mirror', 'edu', 'pl', 'eu', 'org'] + assert deco['path'] == [] + + deco = execute_in_page('returnval(deconstruct_url(arguments[0]));', + 'file:///mnt/parabola_chroot///etc/passwd') + assert deco + assert deco['trailing_dash'] == False + assert deco['proto'] == 'file' + assert deco['path'] == ['mnt', 'parabola_chroot', 'etc', 'passwd'] + + for bad_url in [ + '://bad-url.missing/protocol', + 'http:/example.com/a/b', + 'unknown://example.com/a/b', + 'idontfancypineapple', + 'ftp://@example.org/', + 'https:///some/path/', + 'file://non-absolute/path' + ]: + with pytest.raises(Exception, match=r'Error in injected script'): + deco = execute_in_page('returnval(deconstruct_url(arguments[0]));', + bad_url) + + # at some point we might also consider testing url deconstruction with + # length limits... -- cgit v1.2.3