diff options
-rw-r--r-- | background/webrequest.js | 136 | ||||
-rw-r--r-- | test/haketilo_test/unit/test_webrequest.py | 120 |
2 files changed, 223 insertions, 33 deletions
diff --git a/background/webrequest.js b/background/webrequest.js index a523772..5ec7b7f 100644 --- a/background/webrequest.js +++ b/background/webrequest.js @@ -3,7 +3,7 @@ * * Function: Modify HTTP traffic usng webRequest API. * - * Copyright (C) 2021 Wojtek Kosior <koszko@koszko.org> + * Copyright (C) 2021, 2022 Wojtek Kosior <koszko@koszko.org> * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -41,53 +41,145 @@ * proprietary program, I am not going to enforce this in court. */ -#IMPORT common/indexeddb.js AS haketilodb +#IMPORT common/indexeddb.js AS haketilodb + #IF MOZILLA #IMPORT background/stream_filter.js #ENDIF #FROM common/browser.js IMPORT browser -#FROM common/misc.js IMPORT is_privileged_url, csp_header_regex +#FROM common/misc.js IMPORT is_privileged_url, csp_header_regex, \ + sha256_async AS sha256 #FROM common/policy.js IMPORT decide_policy #FROM background/patterns_query_manager.js IMPORT tree, default_allow let secret; -function on_headers_received(details) -{ +#IF MOZILLA +/* + * Under Mozilla-based browsers, responses are cached together with headers as + * they appear *after* modifications by Haketilo. This means Haketilo's CSP + * script-blocking headers might be present in responses loaded from cache. In + * the meantime the user might have changes Haketilo settings to instead allow + * the scripts on the page in question. This causes a problem and creates the + * need to somehow restore the response headers to the state in which they + * arrived from the server. + * To cope with this, Haketilo will inject some additional headers with private + * data. Those will include a hard-to-guess value derived from extension's + * internal ID. It is assumed the internal ID has a longer lifetime than cached + * responses. + */ + +const settings_page_url = browser.runtime.getURL("html/settings.html"); +const header_prefix_prom = sha256(settings_page_url) + .then(hash => `X-Haketilo-${hash}`); + +/* + * Mozilla, unlike Chrome, allows webRequest callbacks to return promises. Here + * we leverage that to be able to use asynchronous sha256 computation. + */ +async function on_headers_received(details) { +#IF NEVER +} /* Help auto-indent in editors. */ +#ENDIF +#ELSE +function on_headers_received(details) { +#ENDIF const url = details.url; if (is_privileged_url(details.url)) return; let headers = details.responseHeaders; +#IF MOZILLA + const prefix = await header_prefix_prom; + + /* + * We assume that the original CSP headers of a response are always + * preserved under names of the form: + * X-Haketilo-<some_secret>-<original_name> + * In some cases the original response may contain no CSP headers. To still + * be able to tell whether the headers we were provided were modified by + * Haketilo in the past, all modifications are accompanied by addition of an + * extra header with name: + * X-Haketilo-<some_secret> + */ + + const restore_old_headers = details.fromCache && + !!headers.filter(h => h.name === prefix).length; + + if (restore_old_headers) { + const restored_headers = []; + + for (const h of headers) { + if (csp_header_regex.test(h.name) || h.name === prefix) + continue; + + if (h.name.startsWith(prefix)) { + restored_headers.push({ + name: h.name.substring(prefix.length + 1), + value: h.value + }); + } else { + restored_headers.push(h); + } + } + + headers = restored_headers; + } +#ENDIF + const policy = decide_policy(tree, details.url, !!default_allow.value, secret); - if (policy.allow) - return; - if (policy.payload) - headers = headers.filter(h => !csp_header_regex.test(h.name)); + if (!policy.allow) { +#IF MOZILLA + const to_append = [{name: prefix, value: ":)"}]; + + for (const h of headers.filter(h => csp_header_regex.test(h.name))) { + if (!policy.payload) + to_append.push(Object.assign({}, h)); + + h.name = `${prefix}-${h.name}`; + } - headers.push({name: "Content-Security-Policy", value: policy.csp}); + headers.push(...to_append); +#ELSE + if (policy.payload) + headers = headers.filter(h => !csp_header_regex.test(h.name)); +#ENDIF + + headers.push({name: "Content-Security-Policy", value: policy.csp}); + } #IF MOZILLA - let skip = false; - for (const header of headers) { - if (header.name.toLowerCase().trim() !== "content-disposition") - continue; - - if (/^\s*attachment\s*(;.*)$/i.test(header.value)) { - skip = true; - } else { - skip = false; - break; + /* + * When page is meant to be viewed in the browser, use streamFilter to + * inject a dummy <script> at the very beginning of it. This <script> + * will cause extension's content scripts to run before page's first <meta> + * tag is rendered so that they can prevent CSP rules inside <meta> tags + * from blocking the payload we want to inject. + */ + + let use_stream_filter = !!policy.payload; + if (use_stream_filter) { + for (const header of headers) { + if (header.name.toLowerCase().trim() !== "content-disposition") + continue; + + if (/^\s*attachment\s*(;.*)$/i.test(header.value)) { + use_stream_filter = false; + } else { + use_stream_filter = true; + break; + } } } - skip = skip || (details.statusCode >= 300 && details.statusCode < 400); + use_stream_filter = use_stream_filter && + (details.statusCode < 300 || details.statusCode >= 400); - if (!skip) + if (use_stream_filter) headers = stream_filter.apply(details, headers, policy); #ENDIF diff --git a/test/haketilo_test/unit/test_webrequest.py b/test/haketilo_test/unit/test_webrequest.py index fb24b3d..1244117 100644 --- a/test/haketilo_test/unit/test_webrequest.py +++ b/test/haketilo_test/unit/test_webrequest.py @@ -24,6 +24,10 @@ import pytest from ..script_loader import load_script from .utils import are_scripts_allowed +allowed_url = 'https://site.with.scripts.allow.ed/' +blocked_url = 'https://site.with.scripts.block.ed/' +payload_url = 'https://site.with.paylo.ad/' + def webrequest_js(): return (load_script('background/webrequest.js', '#IMPORT common/patterns_query_tree.js AS pqt') + @@ -34,24 +38,118 @@ def webrequest_js(): default_allow = {name: "default_allow", value: true}; // Rule to block scripts. - pqt.register(tree, "https://site.with.scripts.block.ed/***", + pqt.register(tree, "%(blocked)s***", "~allow", 0); // Rule to allow scripts, but overridden by payload assignment. - pqt.register(tree, "https://site.with.paylo.ad/***", "~allow", 1); - pqt.register(tree, "https://site.with.paylo.ad/***", - "somemapping", {identifier: "someresource"}); + pqt.register(tree, "%(payload)s***", "~allow", 1); + pqt.register(tree, "%(payload)s***", "somemapping", + {identifier: "someresource"}); // Mock stream_filter. stream_filter.apply = (details, headers, policy) => headers; + ''' % {'blocked': blocked_url, 'payload': payload_url}) + +def webrequest_js_start_called(): + return webrequest_js() + ';\nstart("somesecret");' + +ext_url = 'moz-extension://49de6ce9-49fc-49e1-8102-7ef35286389c/html/settings.html' +prefix = 'X-Haketilo-' + sha256(ext_url.encode()).digest().hex() + +# Prepare a list of headers as could be sent by a website. +sample_csp_header = { + 'name': 'Content-Security-Policy', + 'value': "script-src 'self';" +} +sample_csp_header_idx = 7 + +sample_headers = [ + {'name': 'Content-Type', 'value': 'text/html;charset=utf-8'}, + {'name': 'Content-Length', 'value': '61954'}, + {'name': 'Content-Language', 'value': 'en'}, + {'name': 'Expires', 'value': 'Mon, 12 Mar 2012 11:04...'}, + {'name': 'Last-Modified', 'value': 'Fri, 26 Jul 2013 22:50...'}, + {'name': 'Cache-Control', 'value': 'max-age=0, s-maxage=86...'}, + {'name': 'Age', 'value': '224'}, + {'name': 'Server', 'value': 'nginx/1.1.19'}, + {'name': 'Date', 'value': 'Thu, 10 Mar 2022 12:09...'} +] + +sample_headers.insert(sample_csp_header_idx, sample_csp_header) + +# Prepare a list of headers as would be crafted by Haketilo when there is a +# payload to inject. +nonce_source = f'somemapping:someresource:{payload_url}:somesecret'.encode() +nonce = f'nonce-{sha256(nonce_source).digest().hex()}' + +payload_csp_header = { + 'name': f'Content-Security-Policy', + 'value': ("prefetch-src 'none'; script-src-attr 'none'; " + f"script-src '{nonce}'; script-src-elem '{nonce}';") +} + +sample_payload_headers = [ + *sample_headers, + {'name': prefix, 'value': ':)'}, + payload_csp_header +] + +sample_payload_headers[sample_csp_header_idx] = { + **sample_csp_header, + 'name': f'{prefix}-{sample_csp_header["name"]}', +} + +# Prepare a list of headers as would be crafted by Haketilo when scripts are +# blocked. +sample_blocked_headers = [*sample_payload_headers] +sample_blocked_headers.pop() +sample_blocked_headers.append(sample_csp_header) +sample_blocked_headers.append({ + 'name': f'Content-Security-Policy', + 'value': ("prefetch-src 'none'; script-src-attr 'none'; " + f"script-src 'none'; script-src-elem 'none';") +}) + +@pytest.mark.get_page('https://gotmyowndoma.in') +@pytest.mark.parametrize('params', [ + (sample_headers, allowed_url), + (sample_blocked_headers, blocked_url), + (sample_payload_headers, payload_url), +]) +def test_webrequest_on_headers_received(driver, execute_in_page, params): + """Unit-test the on_headers_received() function.""" + headers_out, url = params + + execute_in_page( + '''{ + // Mock browser object. + const url = arguments[0]; + this.browser = {runtime: {getURL: () => url}}; + }''', + ext_url) + + execute_in_page(webrequest_js()) + + execute_in_page('secret = "somesecret";') + + for headers_in in [ + sample_headers, + sample_blocked_headers, + sample_payload_headers + ]: + details = {'url': url, 'responseHeaders': headers_in, 'fromCache': True} + res = execute_in_page('returnval(on_headers_received(arguments[0]));', + details) - // Mock secret and start webrequest operations. - start("somesecret"); - ''') + assert res == {'responseHeaders': headers_out} -@pytest.mark.ext_data({'background_script': webrequest_js}) +@pytest.mark.ext_data({'background_script': webrequest_js_start_called}) @pytest.mark.usefixtures('webextension') -def test_on_headers_received(driver, execute_in_page): +def test_webrequest_real_pages(driver, execute_in_page): + """ + Test webRequest-based header modifications by loading actual pages and + attempting to run scripts within them. + """ for attempt in range(10): driver.get('https://site.with.scripts.block.ed/') @@ -59,10 +157,10 @@ def test_on_headers_received(driver, execute_in_page): break assert attempt != 9 - driver.get('https://site.with.scripts.allow.ed/') + driver.get(allowed_url) assert are_scripts_allowed(driver) - driver.get('https://site.with.paylo.ad/') + driver.get(payload_url) assert not are_scripts_allowed(driver) source = 'somemapping:someresource:https://site.with.paylo.ad/index.html:somesecret' assert are_scripts_allowed(driver, sha256(source.encode()).digest().hex()) |