From 03d041ce03f630d2a28494946ae71588e36d257d Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Fri, 3 Sep 2021 18:46:26 +0200 Subject: only apply stream filter modifications when reasonably necessary --- background/main.js | 5 +++-- background/stream_filter.js | 47 +++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/background/main.js b/background/main.js index 85f8ce8..2e9fa50 100644 --- a/background/main.js +++ b/background/main.js @@ -57,9 +57,10 @@ function on_headers_received(details) return; const [pattern, settings] = query_best(storage, details.url); - const allow = !!(settings && settings.allow); + const has_payload = !!(settings && settings.components); + const allow = !has_payload && !!(settings && settings.allow); const nonce = gen_nonce(); - const policy = {allow, url, nonce}; + const policy = {allow, url, nonce, has_payload}; let headers = details.responseHeaders; let skip = false; diff --git a/background/stream_filter.js b/background/stream_filter.js index 2dce811..96b6132 100644 --- a/background/stream_filter.js +++ b/background/stream_filter.js @@ -12,6 +12,7 @@ /* * IMPORTS_START * IMPORT browser + * IMPORT is_csp_header_name * IMPORTS_END */ @@ -110,6 +111,35 @@ function create_decoder(properties, data) return new TextDecoder(charset || "latin1"); } +function may_define_csp_rules(html) +{ + const doc = new DOMParser().parseFromString(html, "text/html"); + + for (const meta of doc.querySelectorAll("head>meta[http-equiv]")) { + if (is_csp_header_name(meta.getAttribute("http-equiv"), true) && + meta.content) + return true; + } + + /* + * Even if no naughty `' tags were found, subsequent chunk of HTML + * data could add some. Before we return `false' we need to be sure we + * reached the start of `' where `' tags are no longer valid. + */ + + if (doc.documentElement.nextSibling || doc.body.nextSibling || + doc.body.childNodes.length > 1) + return false; + + if (!doc.body.firstChild) + return true; + + if (doc.body.firstChild.nodeName !== "#text") + return false; + + return /^(<\/|&#|.)$/.test(doc.body.firstChild.wholeText); +} + function filter_data(properties, event) { const data = new Uint8Array(event.data); @@ -118,13 +148,15 @@ function filter_data(properties, event) first_chunk = true; properties.decoder = create_decoder(properties, data); properties.encoder = new TextEncoder(); - /* Force UTF-8, this is the only encoding we can produce. */ - properties.filter.write(new Uint8Array(UTF8_BOM)); } let decoded = properties.decoder.decode(data); - if (first_chunk) { + /* Force UTF-8, this is the only encoding we can produce. */ + if (first_chunk) + properties.filter.write(new Uint8Array(UTF8_BOM)); + + if (first_chunk && may_define_csp_rules(decoded)) { /* * HAX! Our content scripts that execute at `document_start' will always * run before the first script in the document, but under Mozilla some @@ -133,7 +165,14 @@ function filter_data(properties, event) * will force `document_start' to happen earlier. This way our content * scripts will be able to sanitize `http-equiv' tags with CSP rules * that would otherwise stop our injected scripts from executing. + * + * As we want to only process HTML files that happen to have naughty + * `' tags in `', we use a DOMParser-based heuristic in + * `may_define_rules()'. We don't do any additional MIME sniffing as it + * is too unreliable (and our heuristic will likely mark non-HTML files + * as harmless anyway). */ + const dummy_script = ``; const doctype_decl = /^(\s*"']*>)?/i.exec(decoded)[0]; @@ -149,7 +188,7 @@ function filter_data(properties, event) function apply_stream_filter(details, headers, policy) { - if (policy.allow) + if (!policy.has_payload) return headers; const properties = properties_from_headers(headers); -- cgit v1.2.3