From 6b53d6c840140fc5df6d7638808b978d96502a35 Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Mon, 23 Aug 2021 11:05:51 +0200 Subject: use StreamFilter under Mozilla to prevent csp tags from blocking our injected scripts --- background/main.js | 60 +++++++++++++- background/policy_injector.js | 61 ++++----------- background/stream_filter.js | 176 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 247 insertions(+), 50 deletions(-) create mode 100644 background/stream_filter.js (limited to 'background') diff --git a/background/main.js b/background/main.js index 7c50fd5..85f8ce8 100644 --- a/background/main.js +++ b/background/main.js @@ -11,18 +11,21 @@ * IMPORT get_storage * IMPORT start_storage_server * IMPORT start_page_actions_server - * IMPORT start_policy_injector * IMPORT browser + * IMPORT is_privileged_url + * IMPORT query_best + * IMPORT gen_nonce + * IMPORT inject_csp_headers + * IMPORT apply_stream_filter + * IMPORT is_chrome * IMPORTS_END */ start_storage_server(); start_page_actions_server(); -start_policy_injector(); async function init_ext(install_details) { - console.log("details:", install_details); if (install_details.reason != "install") return; @@ -44,4 +47,53 @@ async function init_ext(install_details) browser.runtime.onInstalled.addListener(init_ext); -console.log("hello, hachette"); + +let storage; + +function on_headers_received(details) +{ + const url = details.url; + if (is_privileged_url(details.url)) + return; + + const [pattern, settings] = query_best(storage, details.url); + const allow = !!(settings && settings.allow); + const nonce = gen_nonce(); + const policy = {allow, url, nonce}; + + let headers = details.responseHeaders; + let skip = false; + for (const header of headers) { + if ((header.name.toLowerCase().trim() === "content-disposition" && + /^\s*attachment\s*(;.*)$/i.test(header.value))) + skip = true; + } + + headers = inject_csp_headers(details, headers, policy); + + skip = skip || (details.statusCode >= 300 && details.statusCode < 400); + if (!skip) { + /* Check for API availability. */ + if (browser.webRequest.filterResponseData) + headers = apply_stream_filter(details, headers, policy); + } + + return {responseHeaders: headers}; +} + +async function start_webRequest_operations() +{ + storage = await get_storage(); + + const extra_opts = ["blocking", "responseHeaders"]; + if (is_chrome) + extra_opts.push("extraHeaders"); + + browser.webRequest.onHeadersReceived.addListener( + on_headers_received, + {urls: [""], types: ["main_frame", "sub_frame"]}, + extra_opts + ); +} + +start_webRequest_operations(); diff --git a/background/policy_injector.js b/background/policy_injector.js index 3398b53..1d4db6f 100644 --- a/background/policy_injector.js +++ b/background/policy_injector.js @@ -8,36 +8,21 @@ /* * IMPORTS_START - * IMPORT get_storage - * IMPORT browser - * IMPORT is_chrome - * IMPORT gen_nonce - * IMPORT is_privileged_url * IMPORT sign_data * IMPORT extract_signed - * IMPORT query_best * IMPORT sanitize_csp_header * IMPORT csp_rule * IMPORT is_csp_header_name * IMPORTS_END */ -var storage; - -function headers_inject(details) +function inject_csp_headers(details, headers, policy) { const url = details.url; - if (is_privileged_url(url)) - return; - - const [pattern, settings] = query_best(storage, url); - const allow = !!(settings && settings.allow); - const nonce = gen_nonce(); let orig_csp_headers; let old_signature; let hachette_header; - let headers = details.responseHeaders; for (const header of headers.filter(h => h.name === "x-hachette")) { const match = /^([^%])(%.*)$/.exec(header.value); @@ -50,7 +35,7 @@ function headers_inject(details) /* Confirmed- it's the originals, smuggled in! */ orig_csp_headers = old_data.csp_headers; - old_signature = old_data.policy_signature; + old_signature = old_data.policy_sig; hachette_header = header; break; @@ -65,21 +50,20 @@ function headers_inject(details) headers.filter(h => is_csp_header_name(h.name)); /* When blocking remove report-only CSP headers that snitch on us. */ - headers = headers.filter(h => !is_csp_header_name(h.name, !allow)); + headers = headers.filter(h => !is_csp_header_name(h.name, !policy.allow)); if (old_signature) headers = headers.filter(h => h.name.search(old_signature) === -1); - const policy_object = {allow, nonce, url}; - const sanitizer = h => sanitize_csp_header(h, policy_object); + const sanitizer = h => sanitize_csp_header(h, policy); headers.push(...orig_csp_headers.map(sanitizer)); - const policy = encodeURIComponent(JSON.stringify(policy_object)); - const policy_signature = sign_data(policy, new Date()); + const policy_str = encodeURIComponent(JSON.stringify(policy)); + const policy_sig = sign_data(policy_str, new Date()); const later_30sec = new Date(new Date().getTime() + 30000).toGMTString(); headers.push({ name: "Set-Cookie", - value: `hachette-${policy_signature}=${policy}; Expires=${later_30sec};` + value: `hachette-${policy_sig}=${policy_str}; Expires=${later_30sec};` }); /* @@ -87,37 +71,22 @@ function headers_inject(details) * These are signed with a time of 0, as it's not clear there is a limit on * how long Firefox might retain headers in the cache. */ - let hachette_data = {csp_headers: orig_csp_headers, policy_signature, url}; + let hachette_data = {csp_headers: orig_csp_headers, policy_sig, url}; hachette_data = encodeURIComponent(JSON.stringify(hachette_data)); hachette_header.value = sign_data(hachette_data, 0) + hachette_data; /* To ensure there is a CSP header if required */ - if (!allow) - headers.push({name: "content-security-policy", value: csp_rule(nonce)}); + if (!policy.allow) + headers.push({ + name: "content-security-policy", + value: csp_rule(policy.nonce) + }); - return {responseHeaders: headers}; -} - -async function start_policy_injector() -{ - storage = await get_storage(); - - let extra_opts = ["blocking", "responseHeaders"]; - if (is_chrome) - extra_opts.push("extraHeaders"); - - browser.webRequest.onHeadersReceived.addListener( - headers_inject, - { - urls: [""], - types: ["main_frame", "sub_frame"] - }, - extra_opts - ); + return headers; } /* * EXPORTS_START - * EXPORT start_policy_injector + * EXPORT inject_csp_headers * EXPORTS_END */ diff --git a/background/stream_filter.js b/background/stream_filter.js new file mode 100644 index 0000000..2dce811 --- /dev/null +++ b/background/stream_filter.js @@ -0,0 +1,176 @@ +/** + * Hachette modifying a web page using the StreamFilter API + * + * Copyright (C) 2018 Giorgio Maone + * Copyright (C) 2021 Wojtek Kosior + * Redistribution terms are gathered in the `copyright' file. + * + * Derived from `bg/ResponseProcessor.js' and `bg/ResponseMetaData.js' + * in LibreJS. + */ + +/* + * IMPORTS_START + * IMPORT browser + * IMPORTS_END + */ + +function validate_encoding(charset) +{ + try { + new TextDecoder(); + return charset; + } catch(e) { + return undefined; + } +} + +function is_content_type_header(header) +{ + header.name.toLowerCase().trim() === "content-type"; +} + +const charset_reg = /;\s*charset\s*=\s*([\w-]+)/i; + +function properties_from_headers(headers) +{ + const properties = {}; + + for (const header of headers.filter(is_content_type_header)) { + const match = charset_reg.exec(header.value); + if (!properties.detected_charset && validate_encoding(match[1])) + properties.detected_charset = match[1]; + + if (/html/i.test(header.value)) + properties.html = true; + } + + return properties; +} + +const UTF8_BOM = [0xef, 0xbb, 0xbf]; +const BOMs = [ + [UTF8_BOM, "utf-8"], + [[0xfe, 0xff], "utf-16be"], + [[0xff, 0xfe], "utf-16le"] +]; + +function charset_from_BOM(data) +{ + for (const [BOM, charset] of BOMs) { + if (BOM.reduce((ac, byte, i) => ac && byte === data[i], true)) + return charset; + } + + return ""; +} + +const charset_attrs = + ['charset', 'http-equiv="content-type"', 'content*="charset"']; +const charset_meta_selector = + charset_attrs.map(a => `head>meta[${a}]`).join(", "); + +function charset_from_meta_tags(doc) +{ + for (const meta of doc.querySelectorAll(charset_meta_selector)) { + const maybe_charset = meta.getAttribute("charset"); + if (maybe_charset && validate_encoding(maybe_charset)) + return maybe_charset; + + const match = charset_reg.exec(meta.getAttribute("content")); + if (match && validate_encoding(match[1])) + return match[1]; + } + + return undefined; +} + +function create_decoder(properties, data) +{ + let charset = charset_from_BOM(data) || properties.detected_charset; + if (!charset && data.indexOf(0) !== -1) { + console.debug("Warning: zeroes in bytestream, probable cached encoding mismatch. Trying to decode it as UTF-16.", + properties); + return new TextDecoder("utf-16be"); + } + + /* Missing HTTP charset, sniffing in content... */ + /* + * TODO: I recall there is some standard saying how early in the doc the + * charset has to be specified. We could process just this part of data. + */ + const text = new TextDecoder("latin1").decode(data, {stream: true}); + properties.html = properties.html || /html/i.test(text); + + if (properties.html) { + const tmp_doc = new DOMParser().parseFromString(text, "text/html"); + charset = charset_from_meta_tags(tmp_doc); + } + + return new TextDecoder(charset || "latin1"); +} + +function filter_data(properties, event) +{ + const data = new Uint8Array(event.data); + let first_chunk = false; + if (!properties.decoder) { + first_chunk = true; + properties.decoder = create_decoder(properties, data); + properties.encoder = new TextEncoder(); + /* Force UTF-8, this is the only encoding we can produce. */ + properties.filter.write(new Uint8Array(UTF8_BOM)); + } + + let decoded = properties.decoder.decode(data); + + if (first_chunk) { + /* + * HAX! Our content scripts that execute at `document_start' will always + * run before the first script in the document, but under Mozilla some + * `' tags might already be loaded at that point. Here we inject a + * dummy ``; + const doctype_decl = /^(\s*"']*>)?/i.exec(decoded)[0]; + decoded = doctype_decl + dummy_script + + decoded.substring(doctype_decl.length); + } + + properties.filter.write(properties.encoder.encode(decoded)); + + if (properties.decoder.encoding === "utf-8") + properties.filter.disconnect(); +} + +function apply_stream_filter(details, headers, policy) +{ + if (policy.allow) + return headers; + + const properties = properties_from_headers(headers); + properties.policy = policy; + + properties.filter = + browser.webRequest.filterResponseData(details.requestId); + + properties.filter.ondata = event => filter_data(properties, event); + properties.filter.onstop = () => properties.filter.close(); + + /* + * In the future we might consider modifying the headers that specify + * encoding. For now we are not yet doing it, though. However, we + * prepend the data with UTF-8 BOM which should be enough. + */ + return headers; +} + +/* + * EXPORTS_START + * EXPORT apply_stream_filter + * EXPORTS_END + */ -- cgit v1.2.3