diff options
author | Wojtek Kosior <koszko@koszko.org> | 2021-09-04 12:32:02 +0200 |
---|---|---|
committer | Wojtek Kosior <koszko@koszko.org> | 2021-09-04 12:32:13 +0200 |
commit | e48e20de13de78a46cd1dec47ef609eb156ca839 (patch) | |
tree | 35b1e9f467bff9b7b717b894088aed5c0aa9e399 /background/stream_filter.js | |
parent | f0951bced86fe20cb4ae4d353fa85fb97c2ab454 (diff) | |
parent | 03d041ce03f630d2a28494946ae71588e36d257d (diff) | |
download | browser-extension-e48e20de13de78a46cd1dec47ef609eb156ca839.tar.gz browser-extension-e48e20de13de78a46cd1dec47ef609eb156ca839.zip |
merge changes before version 0.1
Diffstat (limited to 'background/stream_filter.js')
-rw-r--r-- | background/stream_filter.js | 47 |
1 files changed, 43 insertions, 4 deletions
diff --git a/background/stream_filter.js b/background/stream_filter.js index 2dce811..96b6132 100644 --- a/background/stream_filter.js +++ b/background/stream_filter.js @@ -12,6 +12,7 @@ /* * IMPORTS_START * IMPORT browser + * IMPORT is_csp_header_name * IMPORTS_END */ @@ -110,6 +111,35 @@ function create_decoder(properties, data) return new TextDecoder(charset || "latin1"); } +function may_define_csp_rules(html) +{ + const doc = new DOMParser().parseFromString(html, "text/html"); + + for (const meta of doc.querySelectorAll("head>meta[http-equiv]")) { + if (is_csp_header_name(meta.getAttribute("http-equiv"), true) && + meta.content) + return true; + } + + /* + * Even if no naughty `<meta>' tags were found, subsequent chunk of HTML + * data could add some. Before we return `false' we need to be sure we + * reached the start of `<body>' where `<meta>' tags are no longer valid. + */ + + if (doc.documentElement.nextSibling || doc.body.nextSibling || + doc.body.childNodes.length > 1) + return false; + + if (!doc.body.firstChild) + return true; + + if (doc.body.firstChild.nodeName !== "#text") + return false; + + return /^(<\/|&#|.)$/.test(doc.body.firstChild.wholeText); +} + function filter_data(properties, event) { const data = new Uint8Array(event.data); @@ -118,13 +148,15 @@ function filter_data(properties, event) first_chunk = true; properties.decoder = create_decoder(properties, data); properties.encoder = new TextEncoder(); - /* Force UTF-8, this is the only encoding we can produce. */ - properties.filter.write(new Uint8Array(UTF8_BOM)); } let decoded = properties.decoder.decode(data); - if (first_chunk) { + /* Force UTF-8, this is the only encoding we can produce. */ + if (first_chunk) + properties.filter.write(new Uint8Array(UTF8_BOM)); + + if (first_chunk && may_define_csp_rules(decoded)) { /* * HAX! Our content scripts that execute at `document_start' will always * run before the first script in the document, but under Mozilla some @@ -133,7 +165,14 @@ function filter_data(properties, event) * will force `document_start' to happen earlier. This way our content * scripts will be able to sanitize `http-equiv' tags with CSP rules * that would otherwise stop our injected scripts from executing. + * + * As we want to only process HTML files that happen to have naughty + * `<meta>' tags in `<head>', we use a DOMParser-based heuristic in + * `may_define_rules()'. We don't do any additional MIME sniffing as it + * is too unreliable (and our heuristic will likely mark non-HTML files + * as harmless anyway). */ + const dummy_script = `<script data-hachette-deleteme="${properties.policy.nonce}" nonce="${properties.policy.nonce}">null</script>`; const doctype_decl = /^(\s*<!doctype[^<>"']*>)?/i.exec(decoded)[0]; @@ -149,7 +188,7 @@ function filter_data(properties, event) function apply_stream_filter(details, headers, policy) { - if (policy.allow) + if (!policy.has_payload) return headers; const properties = properties_from_headers(headers); |