aboutsummaryrefslogtreecommitdiff
path: root/background
diff options
context:
space:
mode:
authorjahoti <jahoti@tilde.team>2021-12-03 00:00:00 +0000
committerjahoti <jahoti@tilde.team>2021-12-03 00:00:00 +0000
commitd16e763e240a2aefe3d4490cddff61893a35a1ea (patch)
tree1e90890a39798f6cd9a1c0886d1234ccc187f5b3 /background
parent591c48a6903bbf324361610f81c628302cae7049 (diff)
parent93dd73600e91eb19e11f5ca57f9429a85cf0150f (diff)
downloadbrowser-extension-d16e763e240a2aefe3d4490cddff61893a35a1ea.tar.gz
browser-extension-d16e763e240a2aefe3d4490cddff61893a35a1ea.zip
Merge branch 'koszko' into jahoti
Diffstat (limited to 'background')
-rw-r--r--background/main.js168
-rw-r--r--background/page_actions_server.js31
-rw-r--r--background/policy_injector.js183
-rw-r--r--background/storage.js121
-rw-r--r--background/storage_server.js4
-rw-r--r--background/stream_filter.js214
6 files changed, 441 insertions, 280 deletions
diff --git a/background/main.js b/background/main.js
index 7c50fd5..358d549 100644
--- a/background/main.js
+++ b/background/main.js
@@ -1,5 +1,7 @@
/**
- * Hachette main background script
+ * This file is part of Haketilo.
+ *
+ * Function: Main background script.
*
* Copyright (C) 2021 Wojtek Kosior
* Redistribution terms are gathered in the `copyright' file.
@@ -9,20 +11,24 @@
* IMPORTS_START
* IMPORT TYPE_PREFIX
* IMPORT get_storage
+ * IMPORT light_storage
* IMPORT start_storage_server
* IMPORT start_page_actions_server
- * IMPORT start_policy_injector
* IMPORT browser
+ * IMPORT is_privileged_url
+ * IMPORT query_best
+ * IMPORT inject_csp_headers
+ * IMPORT apply_stream_filter
+ * IMPORT is_chrome
+ * IMPORT is_mozilla
* IMPORTS_END
*/
start_storage_server();
start_page_actions_server();
-start_policy_injector();
async function init_ext(install_details)
{
- console.log("details:", install_details);
if (install_details.reason != "install")
return;
@@ -44,4 +50,156 @@ async function init_ext(install_details)
browser.runtime.onInstalled.addListener(init_ext);
-console.log("hello, hachette");
+/*
+ * The function below implements a more practical interface for what it does by
+ * wrapping the old query_best() function.
+ */
+function decide_policy_for_url(storage, policy_observable, url)
+{
+ if (storage === undefined)
+ return {allow: false};
+
+ const settings =
+ {allow: policy_observable !== undefined && policy_observable.value};
+
+ const [pattern, queried_settings] = query_best(storage, url);
+
+ if (queried_settings) {
+ settings.payload = queried_settings.components;
+ settings.allow = !!queried_settings.allow && !settings.payload;
+ settings.pattern = pattern;
+ }
+
+ return settings;
+}
+
+let storage;
+let policy_observable = {};
+
+function sanitize_web_page(details)
+{
+ const url = details.url;
+ if (is_privileged_url(details.url))
+ return;
+
+ const policy =
+ decide_policy_for_url(storage, policy_observable, details.url);
+
+ let headers = details.responseHeaders;
+
+ headers = inject_csp_headers(headers, policy);
+
+ let skip = false;
+ for (const header of headers) {
+ if ((header.name.toLowerCase().trim() === "content-disposition" &&
+ /^\s*attachment\s*(;.*)$/i.test(header.value)))
+ skip = true;
+ }
+ skip = skip || (details.statusCode >= 300 && details.statusCode < 400);
+
+ if (!skip) {
+ /* Check for API availability. */
+ if (browser.webRequest.filterResponseData)
+ headers = apply_stream_filter(details, headers, policy);
+ }
+
+ return {responseHeaders: headers};
+}
+
+const request_url_regex = /^[^?]*\?url=(.*)$/;
+const redirect_url_template = browser.runtime.getURL("dummy") + "?settings=";
+
+function synchronously_smuggle_policy(details)
+{
+ /*
+ * Content script will make a synchronous XmlHttpRequest to extension's
+ * `dummy` file to query settings for given URL. We smuggle that
+ * information in query parameter of the URL we redirect to.
+ * A risk of fingerprinting arises if a page with script execution allowed
+ * guesses the dummy file URL and makes an AJAX call to it. It is currently
+ * a problem in ManifestV2 Chromium-family port of Haketilo because Chromium
+ * uses predictable URLs for web-accessible resources. We plan to fix it in
+ * the future ManifestV3 port.
+ */
+ if (details.type !== "xmlhttprequest")
+ return {cancel: true};
+
+ console.debug(`Settings queried using XHR for '${details.url}'.`);
+
+ let policy = {allow: false};
+
+ try {
+ /*
+ * request_url should be of the following format:
+ * <url_for_extension's_dummy_file>?url=<valid_urlencoded_url>
+ */
+ const match = request_url_regex.exec(details.url);
+ const queried_url = decodeURIComponent(match[1]);
+
+ if (details.initiator && !queried_url.startsWith(details.initiator)) {
+ console.warn(`Blocked suspicious query of '${url}' by '${details.initiator}'. This might be the result of page fingerprinting the browser.`);
+ return {cancel: true};
+ }
+
+ policy = decide_policy_for_url(storage, policy_observable, queried_url);
+ } catch (e) {
+ console.warn(`Bad request! Expected ${browser.runtime.getURL("dummy")}?url=<valid_urlencoded_url>. Got ${request_url}. This might be the result of page fingerprinting the browser.`);
+ }
+
+ const encoded_policy = encodeURIComponent(JSON.stringify(policy));
+
+ return {redirectUrl: redirect_url_template + encoded_policy};
+}
+
+const all_types = [
+ "main_frame", "sub_frame", "stylesheet", "script", "image", "font",
+ "object", "xmlhttprequest", "ping", "csp_report", "media", "websocket",
+ "other", "main_frame", "sub_frame"
+];
+
+async function start_webRequest_operations()
+{
+ storage = await get_storage();
+
+ const extra_opts = ["blocking"];
+ if (is_chrome)
+ extra_opts.push("extraHeaders");
+
+ browser.webRequest.onHeadersReceived.addListener(
+ sanitize_web_page,
+ {urls: ["<all_urls>"], types: ["main_frame", "sub_frame"]},
+ extra_opts.concat("responseHeaders")
+ );
+
+ const dummy_url_pattern = browser.runtime.getURL("dummy") + "?url=*";
+ browser.webRequest.onBeforeRequest.addListener(
+ synchronously_smuggle_policy,
+ {urls: [dummy_url_pattern], types: ["xmlhttprequest"]},
+ extra_opts
+ );
+
+ policy_observable = await light_storage.observe_var("default_allow");
+}
+
+start_webRequest_operations();
+
+const code = `\
+console.warn("Hi, I'm Mr Dynamic!");
+
+console.debug("let's see how window.haketilo_exports looks like now");
+
+console.log("haketilo_exports", window.haketilo_exports);
+`
+
+async function test_dynamic_content_scripts()
+{
+ browser.contentScripts.register({
+ "js": [{code}],
+ "matches": ["<all_urls>"],
+ "allFrames": true,
+ "runAt": "document_start"
+});
+}
+
+if (is_mozilla)
+ test_dynamic_content_scripts();
diff --git a/background/page_actions_server.js b/background/page_actions_server.js
index 58a0073..74783c9 100644
--- a/background/page_actions_server.js
+++ b/background/page_actions_server.js
@@ -1,5 +1,7 @@
/**
- * Hachette serving of page actions to content scripts
+ * This file is part of Haketilo.
+ *
+ * Function: Serving page actions to content scripts.
*
* Copyright (C) 2021 Wojtek Kosior
* Redistribution terms are gathered in the `copyright' file.
@@ -8,12 +10,12 @@
/*
* IMPORTS_START
* IMPORT get_storage
+ * IMPORT light_storage
* IMPORT TYPE_PREFIX
* IMPORT CONNECTION_TYPE
* IMPORT browser
* IMPORT listen_for_connection
* IMPORT sha256
- * IMPORT query_best
* IMPORT make_ajax_request
* IMPORTS_END
*/
@@ -21,23 +23,6 @@
var storage;
var handler;
-function send_actions(url, port)
-{
- const [pattern, settings] = query_best(storage, url);
- const repos = storage.get_all(TYPE_PREFIX.REPO);
-
- port.postMessage(["settings", [pattern, settings, repos]]);
-
- if (settings === undefined)
- return;
-
- let components = settings.components;
- let processed_bags = new Set();
-
- if (components !== undefined)
- send_scripts([components], port, processed_bags);
-}
-
// TODO: parallelize script fetching
async function send_scripts(components, port, processed_bags)
{
@@ -109,9 +94,11 @@ async function fetch_remote_script(script_data)
function handle_message(port, message, handler)
{
port.onMessage.removeListener(handler[0]);
- let url = message.url;
- console.log({url});
- send_actions(url, port);
+ console.debug(`Loading payload '${message.payload}'.`);
+
+ const processed_bags = new Set();
+
+ send_scripts([message.payload], port, processed_bags);
}
function new_connection(port)
diff --git a/background/policy_injector.js b/background/policy_injector.js
index 9725e99..b49ec47 100644
--- a/background/policy_injector.js
+++ b/background/policy_injector.js
@@ -1,5 +1,7 @@
/**
- * Hachette injecting policy to page using webRequest
+ * This file is part of Haketilo.
+ *
+ * Function: Injecting policy to page by modifying HTTP headers.
*
* Copyright (C) 2021 Wojtek Kosior
* Copyright (C) 2021 jahoti
@@ -8,186 +10,39 @@
/*
* IMPORTS_START
- * IMPORT TYPE_PREFIX
- * IMPORT get_storage
- * IMPORT browser
- * IMPORT is_chrome
- * IMPORT is_mozilla
- * IMPORT gen_unique
- * IMPORT gen_nonce
- * IMPORT is_privileged_url
- * IMPORT url_item
- * IMPORT url_extract_target
- * IMPORT sign_policy
- * IMPORT query_best
- * IMPORT sanitize_csp_header
+ * IMPORT make_csp_rule
+ * IMPORT csp_header_regex
+ * Re-enable the import below once nonce stuff here is ready
+ * !mport gen_nonce
* IMPORTS_END
*/
-var storage;
-
-const csp_header_names = new Set([
- "content-security-policy",
- "x-webkit-csp",
- "x-content-security-policy"
-]);
-
-/* TODO: variable no longer in use; remove if not needed */
-const unwanted_csp_directives = new Set([
- "report-to",
- "report-uri",
- "script-src",
- "script-src-elem",
- "prefetch-src"
-]);
-
-const report_only = "content-security-policy-report-only";
-
-function url_inject(details)
-{
- if (is_privileged_url(details.url))
- return;
-
- const targets = url_extract_target(details.url);
- if (targets.current)
- return;
-
- /* Redirect; update policy */
- if (targets.policy)
- targets.target = "";
-
- let [pattern, settings] = query_best(storage, targets.base_url);
- /* Defaults */
- if (!pattern)
- settings = {};
-
- const policy = encodeURIComponent(
- JSON.stringify({
- allow: settings.allow,
- nonce: gen_nonce(),
- base_url: targets.base_url
- })
- );
-
- return {
- redirectUrl: [
- targets.base_url,
- '#', sign_policy(policy, new Date()), policy,
- targets.target,
- targets.target2
- ].join("")
- };
-}
-
-function headers_inject(details)
+function inject_csp_headers(headers, policy)
{
- const targets = url_extract_target(details.url);
- /* Block mis-/unsigned requests */
- if (!targets.current)
- return {cancel: true};
-
- let orig_csp_headers = is_chrome ? null : [];
- let headers = [];
- let csp_headers = is_chrome ? headers : [];
+ let csp_headers;
- const rule = `'nonce-${targets.policy.nonce}'`;
- const block = !targets.policy.allow;
+ if (policy.payload) {
+ headers = headers.filter(h => !csp_header_regex.test(h.name));
- for (const header of details.responseHeaders) {
- if (!csp_header_names.has(header)) {
- /* Remove headers that only snitch on us */
- if (header.name.toLowerCase() === report_only && block)
- continue;
- headers.push(header);
+ // TODO: make CSP rules with nonces and facilitate passing them to
+ // content scripts via dynamic content script registration or
+ // synchronous XHRs
- /* If these are the original CSP headers, use them instead */
- /* Test based on url_extract_target() in misc.js */
- if (is_mozilla && header.name === "x-orig-csp") {
- let index = header.value.indexOf('%5B');
- if (index === -1)
- continue;
-
- let sig = header.value.substring(0, index);
- let data = header.value.substring(index);
- if (sig !== sign_policy(data, 0))
- continue;
-
- /* Confirmed- it's the originals, smuggled in! */
- try {
- data = JSON.parse(decodeURIComponent(data));
- } catch (e) {
- /* This should not be reached -
- it's our self-produced valid JSON. */
- console.log("Unexpected internal error - invalid JSON smuggled!", e);
- }
-
- orig_csp_headers = csp_headers = null;
- for (const header of data)
- headers.push(sanitize_csp_header(header, rule, block));
- }
- } else if (is_chrome || !orig_csp_headers) {
- csp_headers.push(sanitize_csp_header(header, rule, block));
- if (is_mozilla)
- orig_csp_headers.push(header);
- }
- }
-
- if (orig_csp_headers) {
- /** Smuggle in the original CSP headers for future use.
- * These are signed with a time of 0, as it's not clear there
- * is a limit on how long Firefox might retain these headers in
- * the cache.
- */
- orig_csp_headers = encodeURIComponent(JSON.stringify(orig_csp_headers));
- headers.push({
- name: "x-orig-csp",
- value: sign_policy(orig_csp_headers, 0) + orig_csp_headers
- });
-
- headers = headers.concat(csp_headers);
+ // policy.nonce = gen_nonce();
}
- /* To ensure there is a CSP header if required */
- if (block) {
+ if (!policy.allow && (policy.nonce || !policy.payload)) {
headers.push({
name: "content-security-policy",
- value: `script-src ${rule}; script-src-elem ${rule}; ` +
- "script-src-attr 'none'; prefetch-src 'none';"
+ value: make_csp_rule(policy)
});
}
- return {responseHeaders: headers};
-}
-
-async function start_policy_injector()
-{
- storage = await get_storage();
-
- let extra_opts = ["blocking", "responseHeaders"];
- if (is_chrome)
- extra_opts.push("extraHeaders");
-
- browser.webRequest.onBeforeRequest.addListener(
- url_inject,
- {
- urls: ["<all_urls>"],
- types: ["main_frame", "sub_frame"]
- },
- ["blocking"]
- );
-
- browser.webRequest.onHeadersReceived.addListener(
- headers_inject,
- {
- urls: ["<all_urls>"],
- types: ["main_frame", "sub_frame"]
- },
- extra_opts
- );
+ return headers;
}
/*
* EXPORTS_START
- * EXPORT start_policy_injector
+ * EXPORT inject_csp_headers
* EXPORTS_END
*/
diff --git a/background/storage.js b/background/storage.js
index c2160b0..a4e626a 100644
--- a/background/storage.js
+++ b/background/storage.js
@@ -1,5 +1,7 @@
/**
- * Hachette storage manager
+ * This file is part of Haketilo.
+ *
+ * Function: Storage manager.
*
* Copyright (C) 2021 Wojtek Kosior
* Redistribution terms are gathered in the `copyright' file.
@@ -7,7 +9,7 @@
/*
* IMPORTS_START
- * IMPORT TYPE_PREFIX
+ * IMPORT raw_storage
* IMPORT TYPE_NAME
* IMPORT list_prefixes
* IMPORT make_lock
@@ -15,76 +17,17 @@
* IMPORT unlock
* IMPORT make_once
* IMPORT browser
- * IMPORT is_chrome
* IMPORT observables
* IMPORTS_END
*/
var exports = {};
-/* We're yet to decide how to handle errors... */
-
-/* Here are some basic wrappers for storage API functions */
-
-async function get(key)
-{
- try {
- /* Fix for fact that Chrome does not use promises here */
- let promise = is_chrome ?
- new Promise((resolve, reject) =>
- chrome.storage.local.get(key,
- val => resolve(val))) :
- browser.storage.local.get(key);
-
- return (await promise)[key];
- } catch (e) {
- console.log(e);
- }
-}
-
-async function set(key, value)
-{
- try {
- return browser.storage.local.set({[key]: value});
- } catch (e) {
- console.log(e);
- }
-}
-
-async function setn(keys_and_values)
-{
- let obj = Object();
- while (keys_and_values.length > 1) {
- let value = keys_and_values.pop();
- let key = keys_and_values.pop();
- obj[key] = value;
- }
-
- try {
- return browser.storage.local.set(obj);
- } catch (e) {
- console.log(e);
- }
-}
-
-async function set_var(name, value)
-{
- return set(TYPE_PREFIX.VAR + name, value);
-}
-
-async function get_var(name)
-{
- return get(TYPE_PREFIX.VAR + name);
-}
-
-/*
- * A special case of persisted variable is one that contains list
- * of items.
- */
+/* A special case of persisted variable is one that contains list of items. */
async function get_list_var(name)
{
- let list = await get_var(name);
+ let list = await raw_storage.get_var(name);
return list === undefined ? [] : list;
}
@@ -97,7 +40,7 @@ async function list(prefix)
let map = new Map();
for (let item of await get_list_var(name))
- map.set(item, await get(prefix + item));
+ map.set(item, await raw_storage.get(prefix + item));
return {map, prefix, name, observable: observables.make(),
lock: make_lock()};
@@ -175,19 +118,19 @@ async function set_item(item, value, list)
}
async function _set_item(item, value, list)
{
- let key = list.prefix + item;
- let old_val = list.map.get(item);
+ const key = list.prefix + item;
+ const old_val = list.map.get(item);
+ const set_obj = {[key]: value};
if (old_val === undefined) {
- let items = list_items(list);
+ const items = list_items(list);
items.push(item);
- await setn([key, value, "_" + list.name, items]);
- } else {
- await set(key, value);
+ set_obj["_" + list.name] = items;
}
- list.map.set(item, value)
+ await raw_storage.set(set_obj);
+ list.map.set(item, value);
- let change = {
+ const change = {
prefix : list.prefix,
item,
old_val,
@@ -212,20 +155,21 @@ async function remove_item(item, list)
}
async function _remove_item(item, list)
{
- let old_val = list.map.get(item);
+ const old_val = list.map.get(item);
if (old_val === undefined)
return;
- let key = list.prefix + item;
- let items = list_items(list);
- let index = items.indexOf(item);
+ const items = list_items(list);
+ const index = items.indexOf(item);
items.splice(index, 1);
- await setn([key, undefined, "_" + list.name, items]);
-
+ await raw_storage.set({
+ [list.prefix + item]: undefined,
+ ["_" + list.name]: items
+ });
list.map.delete(item);
- let change = {
+ const change = {
prefix : list.prefix,
item,
old_val,
@@ -247,11 +191,11 @@ async function replace_item(old_item, new_item, list, new_val=undefined)
}
async function _replace_item(old_item, new_item, list, new_val=undefined)
{
- let old_val = list.map.get(old_item);
+ const old_val = list.map.get(old_item);
if (new_val === undefined) {
if (old_val === undefined)
return;
- new_val = old_val
+ new_val = old_val;
} else if (new_val === old_val && new_item === old_item) {
return old_val;
}
@@ -261,17 +205,18 @@ async function _replace_item(old_item, new_item, list, new_val=undefined)
return old_val;
}
- let new_key = list.prefix + new_item;
- let old_key = list.prefix + old_item;
- let items = list_items(list);
- let index = items.indexOf(old_item);
+ const items = list_items(list);
+ const index = items.indexOf(old_item);
items[index] = new_item;
- await setn([old_key, undefined, new_key, new_val,
- "_" + list.name, items]);
+ await raw_storage.set({
+ [list.prefix + old_item]: undefined,
+ [list.prefix + new_item]: new_val,
+ ["_" + list.name]: items
+ });
list.map.delete(old_item);
- let change = {
+ const change = {
prefix : list.prefix,
item : old_item,
old_val,
diff --git a/background/storage_server.js b/background/storage_server.js
index 2252eb5..73126d4 100644
--- a/background/storage_server.js
+++ b/background/storage_server.js
@@ -1,5 +1,7 @@
/**
- * Hachette storage through connection (server side)
+ * This file is part of Haketilo.
+ *
+ * Function: Storage through messages (server side).
*
* Copyright (C) 2021 Wojtek Kosior
* Redistribution terms are gathered in the `copyright' file.
diff --git a/background/stream_filter.js b/background/stream_filter.js
new file mode 100644
index 0000000..e5d124c
--- /dev/null
+++ b/background/stream_filter.js
@@ -0,0 +1,214 @@
+/**
+ * This file is part of Haketilo.
+ *
+ * Function: Modifying a web page using the StreamFilter API.
+ *
+ * Copyright (C) 2018 Giorgio Maone <giorgio@maone.net>
+ * Copyright (C) 2021 Wojtek Kosior
+ * Redistribution terms are gathered in the `copyright' file.
+ *
+ * Derived from `bg/ResponseProcessor.js' and `bg/ResponseMetaData.js'
+ * in LibreJS.
+ */
+
+/*
+ * IMPORTS_START
+ * IMPORT browser
+ * IMPORT csp_header_regex
+ * IMPORTS_END
+ */
+
+function validate_encoding(charset)
+{
+ try {
+ new TextDecoder();
+ return charset;
+ } catch(e) {
+ return undefined;
+ }
+}
+
+function is_content_type_header(header)
+{
+ header.name.toLowerCase().trim() === "content-type";
+}
+
+const charset_reg = /;\s*charset\s*=\s*([\w-]+)/i;
+
+function properties_from_headers(headers)
+{
+ const properties = {};
+
+ for (const header of headers.filter(is_content_type_header)) {
+ const match = charset_reg.exec(header.value);
+ if (!properties.detected_charset && validate_encoding(match[1]))
+ properties.detected_charset = match[1];
+
+ if (/html/i.test(header.value))
+ properties.html = true;
+ }
+
+ return properties;
+}
+
+const UTF8_BOM = [0xef, 0xbb, 0xbf];
+const BOMs = [
+ [UTF8_BOM, "utf-8"],
+ [[0xfe, 0xff], "utf-16be"],
+ [[0xff, 0xfe], "utf-16le"]
+];
+
+function charset_from_BOM(data)
+{
+ for (const [BOM, charset] of BOMs) {
+ if (BOM.reduce((ac, byte, i) => ac && byte === data[i], true))
+ return charset;
+ }
+
+ return "";
+}
+
+const charset_attrs =
+ ['charset', 'http-equiv="content-type"', 'content*="charset"'];
+const charset_meta_selector =
+ charset_attrs.map(a => `head>meta[${a}]`).join(", ");
+
+function charset_from_meta_tags(doc)
+{
+ for (const meta of doc.querySelectorAll(charset_meta_selector)) {
+ const maybe_charset = meta.getAttribute("charset");
+ if (maybe_charset && validate_encoding(maybe_charset))
+ return maybe_charset;
+
+ const match = charset_reg.exec(meta.getAttribute("content"));
+ if (match && validate_encoding(match[1]))
+ return match[1];
+ }
+
+ return undefined;
+}
+
+function create_decoder(properties, data)
+{
+ let charset = charset_from_BOM(data) || properties.detected_charset;
+ if (!charset && data.indexOf(0) !== -1) {
+ console.debug("Warning: zeroes in bytestream, probable cached encoding mismatch. Trying to decode it as UTF-16.",
+ properties);
+ return new TextDecoder("utf-16be");
+ }
+
+ /* Missing HTTP charset, sniffing in content... */
+ /*
+ * TODO: I recall there is some standard saying how early in the doc the
+ * charset has to be specified. We could process just this part of data.
+ */
+ const text = new TextDecoder("latin1").decode(data, {stream: true});
+ properties.html = properties.html || /html/i.test(text);
+
+ if (properties.html) {
+ const tmp_doc = new DOMParser().parseFromString(text, "text/html");
+ charset = charset_from_meta_tags(tmp_doc);
+ }
+
+ return new TextDecoder(charset || "latin1");
+}
+
+function may_define_csp_rules(html)
+{
+ const doc = new DOMParser().parseFromString(html, "text/html");
+
+ for (const meta of doc.querySelectorAll("head>meta[http-equiv]")) {
+ if (csp_header_regex.test(meta.httpEquiv) && meta.content)
+ return true;
+ }
+
+ /*
+ * Even if no naughty `<meta>' tags were found, subsequent chunk of HTML
+ * data could add some. Before we return `false' we need to be sure we
+ * reached the start of `<body>' where `<meta>' tags are no longer valid.
+ */
+
+ if (doc.documentElement.nextSibling || doc.body.nextSibling ||
+ doc.body.childNodes.length > 1)
+ return false;
+
+ if (!doc.body.firstChild)
+ return true;
+
+ if (doc.body.firstChild.nodeName !== "#text")
+ return false;
+
+ return /^(<\/|&#|.)$/.test(doc.body.firstChild.wholeText);
+}
+
+function filter_data(properties, event)
+{
+ const data = new Uint8Array(event.data);
+ let first_chunk = false;
+ if (!properties.decoder) {
+ first_chunk = true;
+ properties.decoder = create_decoder(properties, data);
+ properties.encoder = new TextEncoder();
+ }
+
+ let decoded = properties.decoder.decode(data);
+
+ /* Force UTF-8, this is the only encoding we can produce. */
+ if (first_chunk)
+ properties.filter.write(new Uint8Array(UTF8_BOM));
+
+ if (first_chunk && may_define_csp_rules(decoded)) {
+ /*
+ * HAX! Our content scripts that execute at `document_start' will always
+ * run before the first script in the document, but under Mozilla some
+ * `<meta>' tags might already be loaded at that point. Here we inject a
+ * dummy `<script>' at the beginning (before any `<meta>' tags) that
+ * will force `document_start' to happen earlier. This way our content
+ * scripts will be able to sanitize `http-equiv' tags with CSP rules
+ * that would otherwise stop our injected scripts from executing.
+ *
+ * As we want to only process HTML files that happen to have naughty
+ * `<meta>' tags in `<head>', we use a DOMParser-based heuristic in
+ * `may_define_rules()'. We don't do any additional MIME sniffing as it
+ * is too unreliable (and our heuristic will likely mark non-HTML files
+ * as harmless anyway).
+ */
+
+ const dummy_script = `<script>null</script>`;
+ const doctype_decl = /^(\s*<!doctype[^<>"']*>)?/i.exec(decoded)[0];
+ decoded = doctype_decl + dummy_script +
+ decoded.substring(doctype_decl.length);
+ }
+
+ properties.filter.write(properties.encoder.encode(decoded));
+
+ if (properties.decoder.encoding === "utf-8")
+ properties.filter.disconnect();
+}
+
+function apply_stream_filter(details, headers, policy)
+{
+ if (!policy.payload)
+ return headers;
+
+ const properties = properties_from_headers(headers);
+
+ properties.filter =
+ browser.webRequest.filterResponseData(details.requestId);
+
+ properties.filter.ondata = event => filter_data(properties, event);
+ properties.filter.onstop = () => properties.filter.close();
+
+ /*
+ * In the future we might consider modifying the headers that specify
+ * encoding. For now we are not yet doing it, though. However, we
+ * prepend the data with UTF-8 BOM which should be enough.
+ */
+ return headers;
+}
+
+/*
+ * EXPORTS_START
+ * EXPORT apply_stream_filter
+ * EXPORTS_END
+ */