aboutsummaryrefslogtreecommitdiff
path: root/background/stream_filter.js
diff options
context:
space:
mode:
authorWojtek Kosior <koszko@koszko.org>2021-09-04 12:32:02 +0200
committerWojtek Kosior <koszko@koszko.org>2021-09-04 12:32:13 +0200
commite48e20de13de78a46cd1dec47ef609eb156ca839 (patch)
tree35b1e9f467bff9b7b717b894088aed5c0aa9e399 /background/stream_filter.js
parentf0951bced86fe20cb4ae4d353fa85fb97c2ab454 (diff)
parent03d041ce03f630d2a28494946ae71588e36d257d (diff)
downloadbrowser-extension-e48e20de13de78a46cd1dec47ef609eb156ca839.tar.gz
browser-extension-e48e20de13de78a46cd1dec47ef609eb156ca839.zip
merge changes before version 0.1
Diffstat (limited to 'background/stream_filter.js')
-rw-r--r--background/stream_filter.js47
1 files changed, 43 insertions, 4 deletions
diff --git a/background/stream_filter.js b/background/stream_filter.js
index 2dce811..96b6132 100644
--- a/background/stream_filter.js
+++ b/background/stream_filter.js
@@ -12,6 +12,7 @@
/*
* IMPORTS_START
* IMPORT browser
+ * IMPORT is_csp_header_name
* IMPORTS_END
*/
@@ -110,6 +111,35 @@ function create_decoder(properties, data)
return new TextDecoder(charset || "latin1");
}
+function may_define_csp_rules(html)
+{
+ const doc = new DOMParser().parseFromString(html, "text/html");
+
+ for (const meta of doc.querySelectorAll("head>meta[http-equiv]")) {
+ if (is_csp_header_name(meta.getAttribute("http-equiv"), true) &&
+ meta.content)
+ return true;
+ }
+
+ /*
+ * Even if no naughty `<meta>' tags were found, subsequent chunk of HTML
+ * data could add some. Before we return `false' we need to be sure we
+ * reached the start of `<body>' where `<meta>' tags are no longer valid.
+ */
+
+ if (doc.documentElement.nextSibling || doc.body.nextSibling ||
+ doc.body.childNodes.length > 1)
+ return false;
+
+ if (!doc.body.firstChild)
+ return true;
+
+ if (doc.body.firstChild.nodeName !== "#text")
+ return false;
+
+ return /^(<\/|&#|.)$/.test(doc.body.firstChild.wholeText);
+}
+
function filter_data(properties, event)
{
const data = new Uint8Array(event.data);
@@ -118,13 +148,15 @@ function filter_data(properties, event)
first_chunk = true;
properties.decoder = create_decoder(properties, data);
properties.encoder = new TextEncoder();
- /* Force UTF-8, this is the only encoding we can produce. */
- properties.filter.write(new Uint8Array(UTF8_BOM));
}
let decoded = properties.decoder.decode(data);
- if (first_chunk) {
+ /* Force UTF-8, this is the only encoding we can produce. */
+ if (first_chunk)
+ properties.filter.write(new Uint8Array(UTF8_BOM));
+
+ if (first_chunk && may_define_csp_rules(decoded)) {
/*
* HAX! Our content scripts that execute at `document_start' will always
* run before the first script in the document, but under Mozilla some
@@ -133,7 +165,14 @@ function filter_data(properties, event)
* will force `document_start' to happen earlier. This way our content
* scripts will be able to sanitize `http-equiv' tags with CSP rules
* that would otherwise stop our injected scripts from executing.
+ *
+ * As we want to only process HTML files that happen to have naughty
+ * `<meta>' tags in `<head>', we use a DOMParser-based heuristic in
+ * `may_define_rules()'. We don't do any additional MIME sniffing as it
+ * is too unreliable (and our heuristic will likely mark non-HTML files
+ * as harmless anyway).
*/
+
const dummy_script =
`<script data-hachette-deleteme="${properties.policy.nonce}" nonce="${properties.policy.nonce}">null</script>`;
const doctype_decl = /^(\s*<!doctype[^<>"']*>)?/i.exec(decoded)[0];
@@ -149,7 +188,7 @@ function filter_data(properties, event)
function apply_stream_filter(details, headers, policy)
{
- if (policy.allow)
+ if (!policy.has_payload)
return headers;
const properties = properties_from_headers(headers);