Merge branch 'koszko' into jahoti

author: jahoti <jahoti@tilde.team> 2021-12-03 00:00:00 +0000
committer: jahoti <jahoti@tilde.team> 2021-12-03 00:00:00 +0000
commit: d16e763e240a2aefe3d4490cddff61893a35a1ea (patch)
tree: 1e90890a39798f6cd9a1c0886d1234ccc187f5b3 /background
parent: 591c48a6903bbf324361610f81c628302cae7049 (diff)
parent: 93dd73600e91eb19e11f5ca57f9429a85cf0150f (diff)
download: browser-extension-d16e763e240a2aefe3d4490cddff61893a35a1ea.tar.gz
browser-extension-d16e763e240a2aefe3d4490cddff61893a35a1ea.zip
6 files changed, 441 insertions, 280 deletions
diff --git a/background/main.js b/background/main.js
index 7c50fd5..358d549 100644
--- a/background/main.js
+++ b/background/main.js
@@ -1,5 +1,7 @@
 /**
- * Hachette main background script
+ * This file is part of Haketilo.
+ *
+ * Function: Main background script.
  *
  * Copyright (C) 2021 Wojtek Kosior
  * Redistribution terms are gathered in the `copyright' file.
@@ -9,20 +11,24 @@
  * IMPORTS_START
  * IMPORT TYPE_PREFIX
  * IMPORT get_storage
+ * IMPORT light_storage
  * IMPORT start_storage_server
  * IMPORT start_page_actions_server
- * IMPORT start_policy_injector
  * IMPORT browser
+ * IMPORT is_privileged_url
+ * IMPORT query_best
+ * IMPORT inject_csp_headers
+ * IMPORT apply_stream_filter
+ * IMPORT is_chrome
+ * IMPORT is_mozilla
  * IMPORTS_END
  */
 
 start_storage_server();
 start_page_actions_server();
-start_policy_injector();
 
 async function init_ext(install_details)
 {
-    console.log("details:", install_details);
     if (install_details.reason != "install")
 	return;
 
@@ -44,4 +50,156 @@ async function init_ext(install_details)
 
 browser.runtime.onInstalled.addListener(init_ext);
 
-console.log("hello, hachette");
+/*
+ * The function below implements a more practical interface for what it does by
+ * wrapping the old query_best() function.
+ */
+function decide_policy_for_url(storage, policy_observable, url)
+{
+    if (storage === undefined)
+	return {allow: false};
+
+    const settings =
+	{allow: policy_observable !== undefined && policy_observable.value};
+
+    const [pattern, queried_settings] = query_best(storage, url);
+
+    if (queried_settings) {
+	settings.payload = queried_settings.components;
+	settings.allow = !!queried_settings.allow && !settings.payload;
+	settings.pattern = pattern;
+    }
+
+    return settings;
+}
+
+let storage;
+let policy_observable = {};
+
+function sanitize_web_page(details)
+{
+    const url = details.url;
+    if (is_privileged_url(details.url))
+	return;
+
+    const policy =
+	  decide_policy_for_url(storage, policy_observable, details.url);
+
+    let headers = details.responseHeaders;
+
+    headers = inject_csp_headers(headers, policy);
+
+    let skip = false;
+    for (const header of headers) {
+	if ((header.name.toLowerCase().trim() === "content-disposition" &&
+	     /^\s*attachment\s*(;.*)$/i.test(header.value)))
+	    skip = true;
+    }
+    skip = skip || (details.statusCode >= 300 && details.statusCode < 400);
+
+    if (!skip) {
+	/* Check for API availability. */
+	if (browser.webRequest.filterResponseData)
+	    headers = apply_stream_filter(details, headers, policy);
+    }
+
+    return {responseHeaders: headers};
+}
+
+const request_url_regex = /^[^?]*\?url=(.*)$/;
+const redirect_url_template = browser.runtime.getURL("dummy") + "?settings=";
+
+function synchronously_smuggle_policy(details)
+{
+    /*
+     * Content script will make a synchronous XmlHttpRequest to extension's
+     * `dummy` file to query settings for given URL. We smuggle that
+     * information in query parameter of the URL we redirect to.
+     * A risk of fingerprinting arises if a page with script execution allowed
+     * guesses the dummy file URL and makes an AJAX call to it. It is currently
+     * a problem in ManifestV2 Chromium-family port of Haketilo because Chromium
+     * uses predictable URLs for web-accessible resources. We plan to fix it in
+     * the future ManifestV3 port.
+     */
+    if (details.type !== "xmlhttprequest")
+	return {cancel: true};
+
+    console.debug(`Settings queried using XHR for '${details.url}'.`);
+
+    let policy = {allow: false};
+
+    try {
+	/*
+	 * request_url should be of the following format:
+	 *     <url_for_extension's_dummy_file>?url=<valid_urlencoded_url>
+	 */
+	const match = request_url_regex.exec(details.url);
+	const queried_url = decodeURIComponent(match[1]);
+
+	if (details.initiator && !queried_url.startsWith(details.initiator)) {
+	    console.warn(`Blocked suspicious query of '${url}' by '${details.initiator}'. This might be the result of page fingerprinting the browser.`);
+	    return {cancel: true};
+	}
+
+	policy = decide_policy_for_url(storage, policy_observable, queried_url);
+    } catch (e) {
+	console.warn(`Bad request! Expected ${browser.runtime.getURL("dummy")}?url=<valid_urlencoded_url>. Got ${request_url}. This might be the result of page fingerprinting the browser.`);
+    }
+
+    const encoded_policy = encodeURIComponent(JSON.stringify(policy));
+
+    return {redirectUrl: redirect_url_template + encoded_policy};
+}
+
+const all_types = [
+    "main_frame", "sub_frame", "stylesheet", "script", "image", "font",
+    "object", "xmlhttprequest", "ping", "csp_report", "media", "websocket",
+    "other", "main_frame", "sub_frame"
+];
+
+async function start_webRequest_operations()
+{
+    storage = await get_storage();
+
+    const extra_opts = ["blocking"];
+    if (is_chrome)
+	extra_opts.push("extraHeaders");
+
+    browser.webRequest.onHeadersReceived.addListener(
+	sanitize_web_page,
+	{urls: ["<all_urls>"], types: ["main_frame", "sub_frame"]},
+	extra_opts.concat("responseHeaders")
+    );
+
+    const dummy_url_pattern = browser.runtime.getURL("dummy") + "?url=*";
+    browser.webRequest.onBeforeRequest.addListener(
+	synchronously_smuggle_policy,
+	{urls: [dummy_url_pattern], types: ["xmlhttprequest"]},
+	extra_opts
+    );
+
+    policy_observable = await light_storage.observe_var("default_allow");
+}
+
+start_webRequest_operations();
+
+const code = `\
+console.warn("Hi, I'm Mr Dynamic!");
+
+console.debug("let's see how window.haketilo_exports looks like now");
+
+console.log("haketilo_exports", window.haketilo_exports);
+`
+
+async function test_dynamic_content_scripts()
+{
+    browser.contentScripts.register({
+	"js": [{code}],
+	"matches": ["<all_urls>"],
+	"allFrames": true,
+	"runAt": "document_start"
+});
+}
+
+if (is_mozilla)
+    test_dynamic_content_scripts();
diff --git a/background/page_actions_server.js b/background/page_actions_server.js
index 58a0073..74783c9 100644
--- a/background/page_actions_server.js
+++ b/background/page_actions_server.js
@@ -1,5 +1,7 @@
 /**
- * Hachette serving of page actions to content scripts
+ * This file is part of Haketilo.
+ *
+ * Function: Serving page actions to content scripts.
  *
  * Copyright (C) 2021 Wojtek Kosior
  * Redistribution terms are gathered in the `copyright' file.
@@ -8,12 +10,12 @@
 /*
  * IMPORTS_START
  * IMPORT get_storage
+ * IMPORT light_storage
  * IMPORT TYPE_PREFIX
  * IMPORT CONNECTION_TYPE
  * IMPORT browser
  * IMPORT listen_for_connection
  * IMPORT sha256
- * IMPORT query_best
  * IMPORT make_ajax_request
  * IMPORTS_END
  */
@@ -21,23 +23,6 @@
 var storage;
 var handler;
 
-function send_actions(url, port)
-{
-    const [pattern, settings] = query_best(storage, url);
-    const repos = storage.get_all(TYPE_PREFIX.REPO);
-
-    port.postMessage(["settings", [pattern, settings, repos]]);
-
-    if (settings === undefined)
-	return;
-
-    let components = settings.components;
-    let processed_bags = new Set();
-
-    if (components !== undefined)
-	send_scripts([components], port, processed_bags);
-}
-
 // TODO: parallelize script fetching
 async function send_scripts(components, port, processed_bags)
 {
@@ -109,9 +94,11 @@ async function fetch_remote_script(script_data)
 function handle_message(port, message, handler)
 {
     port.onMessage.removeListener(handler[0]);
-    let url = message.url;
-    console.log({url});
-    send_actions(url, port);
+    console.debug(`Loading payload '${message.payload}'.`);
+
+    const processed_bags = new Set();
+
+    send_scripts([message.payload], port, processed_bags);
 }
 
 function new_connection(port)
diff --git a/background/policy_injector.js b/background/policy_injector.js
index 9725e99..b49ec47 100644
--- a/background/policy_injector.js
+++ b/background/policy_injector.js
@@ -1,5 +1,7 @@
 /**
- * Hachette injecting policy to page using webRequest
+ * This file is part of Haketilo.
+ *
+ * Function: Injecting policy to page by modifying HTTP headers.
  *
  * Copyright (C) 2021 Wojtek Kosior
  * Copyright (C) 2021 jahoti
@@ -8,186 +10,39 @@
 
 /*
  * IMPORTS_START
- * IMPORT TYPE_PREFIX
- * IMPORT get_storage
- * IMPORT browser
- * IMPORT is_chrome
- * IMPORT is_mozilla
- * IMPORT gen_unique
- * IMPORT gen_nonce
- * IMPORT is_privileged_url
- * IMPORT url_item
- * IMPORT url_extract_target
- * IMPORT sign_policy
- * IMPORT query_best
- * IMPORT sanitize_csp_header
+ * IMPORT make_csp_rule
+ * IMPORT csp_header_regex
+ * Re-enable the import below once nonce stuff here is ready
+ * !mport gen_nonce
  * IMPORTS_END
  */
 
-var storage;
-
-const csp_header_names = new Set([
-    "content-security-policy",
-    "x-webkit-csp",
-    "x-content-security-policy"
-]);
-
-/* TODO: variable no longer in use; remove if not needed */
-const unwanted_csp_directives = new Set([
-    "report-to",
-    "report-uri",
-    "script-src",
-    "script-src-elem",
-    "prefetch-src"
-]);
-
-const report_only = "content-security-policy-report-only";
-
-function url_inject(details)
-{
-    if (is_privileged_url(details.url))
-	return;
-
-    const targets = url_extract_target(details.url);
-    if (targets.current)
-	return;
-
-    /* Redirect; update policy */
-    if (targets.policy)
-	targets.target = "";
-
-    let [pattern, settings] = query_best(storage, targets.base_url);
-    /* Defaults */
-    if (!pattern)
-	settings = {};
-
-    const policy = encodeURIComponent(
-	JSON.stringify({
-	    allow: settings.allow,
-	    nonce: gen_nonce(),
-	    base_url: targets.base_url
-	})
-    );
-
-    return {
-	redirectUrl: [
-	    targets.base_url,
-	    '#', sign_policy(policy, new Date()), policy,
-	    targets.target,
-	    targets.target2
-	].join("")
-    };
-}
-
-function headers_inject(details)
+function inject_csp_headers(headers, policy)
 {
-    const targets = url_extract_target(details.url);
-    /* Block mis-/unsigned requests */
-    if (!targets.current)
-	return {cancel: true};
-
-    let orig_csp_headers = is_chrome ? null : [];
-    let headers = [];
-    let csp_headers = is_chrome ? headers : [];
+    let csp_headers;
 
-    const rule = `'nonce-${targets.policy.nonce}'`;
-    const block = !targets.policy.allow;
+    if (policy.payload) {
+	headers = headers.filter(h => !csp_header_regex.test(h.name));
 
-    for (const header of details.responseHeaders) {
-	if (!csp_header_names.has(header)) {
-	    /* Remove headers that only snitch on us */
-	    if (header.name.toLowerCase() === report_only && block)
-		continue;
-	    headers.push(header);
+	// TODO: make CSP rules with nonces and facilitate passing them to
+	// content scripts via dynamic content script registration or
+	// synchronous XHRs
 
-	    /* If these are the original CSP headers, use them instead */
-	    /* Test based on url_extract_target() in misc.js */
-	    if (is_mozilla && header.name === "x-orig-csp") {
-		let index = header.value.indexOf('%5B');
-		if (index === -1)
-		    continue;
-
-		let sig = header.value.substring(0, index);
-		let data = header.value.substring(index);
-		if (sig !== sign_policy(data, 0))
-		    continue;
-
-		/* Confirmed- it's the originals, smuggled in! */
-		try {
-		    data = JSON.parse(decodeURIComponent(data));
-		} catch (e) {
-		    /* This should not be reached -
-			it's our self-produced valid JSON. */
-		    console.log("Unexpected internal error - invalid JSON smuggled!", e);
-		}
-
-		orig_csp_headers = csp_headers = null;
-		for (const header of data)
-		    headers.push(sanitize_csp_header(header, rule, block));
-	    }
-	} else if (is_chrome || !orig_csp_headers) {
-	    csp_headers.push(sanitize_csp_header(header, rule, block));
-	    if (is_mozilla)
-		orig_csp_headers.push(header);
-	}
-    }
-
-    if (orig_csp_headers) {
-	/** Smuggle in the original CSP headers for future use.
-	  * These are signed with a time of 0, as it's not clear there
-	  * is a limit on how long Firefox might retain these headers in
-	  * the cache.
-	  */
-	orig_csp_headers = encodeURIComponent(JSON.stringify(orig_csp_headers));
-	headers.push({
-	    name: "x-orig-csp",
-	    value: sign_policy(orig_csp_headers, 0) + orig_csp_headers
-	});
-
-	headers = headers.concat(csp_headers);
+	// policy.nonce = gen_nonce();
     }
 
-    /* To ensure there is a CSP header if required */
-    if (block) {
+    if (!policy.allow && (policy.nonce || !policy.payload)) {
 	headers.push({
 	    name: "content-security-policy",
-	    value: `script-src ${rule}; script-src-elem ${rule}; ` +
-		"script-src-attr 'none'; prefetch-src 'none';"
+	    value: make_csp_rule(policy)
 	});
     }
 
-    return {responseHeaders: headers};
-}
-
-async function start_policy_injector()
-{
-    storage = await get_storage();
-
-    let extra_opts = ["blocking", "responseHeaders"];
-    if (is_chrome)
-	extra_opts.push("extraHeaders");
-
-    browser.webRequest.onBeforeRequest.addListener(
-	url_inject,
-	{
-	    urls: ["<all_urls>"],
-	    types: ["main_frame", "sub_frame"]
-	},
-	["blocking"]
-    );
-
-    browser.webRequest.onHeadersReceived.addListener(
-	headers_inject,
-	{
-	    urls: ["<all_urls>"],
-	    types: ["main_frame", "sub_frame"]
-	},
-	extra_opts
-    );
+    return headers;
 }
 
 /*
  * EXPORTS_START
- * EXPORT start_policy_injector
+ * EXPORT inject_csp_headers
  * EXPORTS_END
  */
diff --git a/background/storage.js b/background/storage.js
index c2160b0..a4e626a 100644
--- a/background/storage.js
+++ b/background/storage.js
@@ -1,5 +1,7 @@
 /**
- * Hachette storage manager
+ * This file is part of Haketilo.
+ *
+ * Function: Storage manager.
  *
  * Copyright (C) 2021 Wojtek Kosior
  * Redistribution terms are gathered in the `copyright' file.
@@ -7,7 +9,7 @@
 
 /*
  * IMPORTS_START
- * IMPORT TYPE_PREFIX
+ * IMPORT raw_storage
  * IMPORT TYPE_NAME
  * IMPORT list_prefixes
  * IMPORT make_lock
@@ -15,76 +17,17 @@
  * IMPORT unlock
  * IMPORT make_once
  * IMPORT browser
- * IMPORT is_chrome
  * IMPORT observables
  * IMPORTS_END
  */
 
 var exports = {};
 
-/* We're yet to decide how to handle errors... */
-
-/* Here are some basic wrappers for storage API functions */
-
-async function get(key)
-{
-    try {
-	/* Fix for fact that Chrome does not use promises here */
-	let promise = is_chrome ?
-	    new Promise((resolve, reject) =>
-			chrome.storage.local.get(key,
-						 val => resolve(val))) :
-	    browser.storage.local.get(key);
-
-	return (await promise)[key];
-    } catch (e) {
-	console.log(e);
-    }
-}
-
-async function set(key, value)
-{
-    try {
-	return browser.storage.local.set({[key]: value});
-    } catch (e) {
-	console.log(e);
-    }
-}
-
-async function setn(keys_and_values)
-{
-    let obj = Object();
-    while (keys_and_values.length > 1) {
-	let value = keys_and_values.pop();
-	let key = keys_and_values.pop();
-	obj[key] = value;
-    }
-
-    try {
-	return browser.storage.local.set(obj);
-    } catch (e) {
-	console.log(e);
-    }
-}
-
-async function set_var(name, value)
-{
-    return set(TYPE_PREFIX.VAR + name, value);
-}
-
-async function get_var(name)
-{
-    return get(TYPE_PREFIX.VAR + name);
-}
-
-/*
- * A special case of persisted variable is one that contains list
- * of items.
- */
+/* A special case of persisted variable is one that contains list of items. */
 
 async function get_list_var(name)
 {
-    let list = await get_var(name);
+    let list = await raw_storage.get_var(name);
 
     return list === undefined ? [] : list;
 }
@@ -97,7 +40,7 @@ async function list(prefix)
     let map = new Map();
 
     for (let item of await get_list_var(name))
-	map.set(item, await get(prefix + item));
+	map.set(item, await raw_storage.get(prefix + item));
 
     return {map, prefix, name, observable: observables.make(),
 	    lock: make_lock()};
@@ -175,19 +118,19 @@ async function set_item(item, value, list)
 }
 async function _set_item(item, value, list)
 {
-    let key = list.prefix + item;
-    let old_val = list.map.get(item);
+    const key = list.prefix + item;
+    const old_val = list.map.get(item);
+    const set_obj = {[key]: value};
     if (old_val === undefined) {
-	let items = list_items(list);
+	const items = list_items(list);
 	items.push(item);
-	await setn([key, value, "_" + list.name, items]);
-    } else {
-	await set(key, value);
+	set_obj["_" + list.name] = items;
     }
 
-    list.map.set(item, value)
+    await raw_storage.set(set_obj);
+    list.map.set(item, value);
 
-    let change = {
+    const change = {
 	prefix : list.prefix,
 	item,
 	old_val,
@@ -212,20 +155,21 @@ async function remove_item(item, list)
 }
 async function _remove_item(item, list)
 {
-    let old_val = list.map.get(item);
+    const old_val = list.map.get(item);
     if (old_val === undefined)
 	return;
 
-    let key = list.prefix + item;
-    let items = list_items(list);
-    let index = items.indexOf(item);
+    const items = list_items(list);
+    const index = items.indexOf(item);
     items.splice(index, 1);
 
-    await setn([key, undefined, "_" + list.name, items]);
-
+    await raw_storage.set({
+	[list.prefix + item]: undefined,
+	["_" + list.name]: items
+    });
     list.map.delete(item);
 
-    let change = {
+    const change = {
 	prefix : list.prefix,
 	item,
 	old_val,
@@ -247,11 +191,11 @@ async function replace_item(old_item, new_item, list, new_val=undefined)
 }
 async function _replace_item(old_item, new_item, list, new_val=undefined)
 {
-    let old_val = list.map.get(old_item);
+    const old_val = list.map.get(old_item);
     if (new_val === undefined) {
 	if (old_val === undefined)
 	    return;
-	new_val = old_val
+	new_val = old_val;
     } else if (new_val === old_val && new_item === old_item) {
 	return old_val;
     }
@@ -261,17 +205,18 @@ async function _replace_item(old_item, new_item, list, new_val=undefined)
 	return old_val;
     }
 
-    let new_key = list.prefix + new_item;
-    let old_key = list.prefix + old_item;
-    let items = list_items(list);
-    let index = items.indexOf(old_item);
+    const items = list_items(list);
+    const index = items.indexOf(old_item);
     items[index] = new_item;
-    await setn([old_key, undefined, new_key, new_val,
-		"_" + list.name, items]);
 
+    await raw_storage.set({
+	[list.prefix + old_item]: undefined,
+	[list.prefix + new_item]: new_val,
+	["_" + list.name]: items
+    });
     list.map.delete(old_item);
 
-    let change = {
+    const change = {
 	prefix : list.prefix,
 	item : old_item,
 	old_val,
diff --git a/background/storage_server.js b/background/storage_server.js
index 2252eb5..73126d4 100644
--- a/background/storage_server.js
+++ b/background/storage_server.js
@@ -1,5 +1,7 @@
 /**
- * Hachette storage through connection (server side)
+ * This file is part of Haketilo.
+ *
+ * Function: Storage through messages (server side).
  *
  * Copyright (C) 2021 Wojtek Kosior
  * Redistribution terms are gathered in the `copyright' file.
diff --git a/background/stream_filter.js b/background/stream_filter.js
new file mode 100644
index 0000000..e5d124c
--- /dev/null
+++ b/background/stream_filter.js
@@ -0,0 +1,214 @@
+/**
+ * This file is part of Haketilo.
+ *
+ * Function: Modifying a web page using the StreamFilter API.
+ *
+ * Copyright (C) 2018 Giorgio Maone <giorgio@maone.net>
+ * Copyright (C) 2021 Wojtek Kosior
+ * Redistribution terms are gathered in the `copyright' file.
+ *
+ * Derived from `bg/ResponseProcessor.js' and `bg/ResponseMetaData.js'
+ * in LibreJS.
+ */
+
+/*
+ * IMPORTS_START
+ * IMPORT browser
+ * IMPORT csp_header_regex
+ * IMPORTS_END
+ */
+
+function validate_encoding(charset)
+{
+    try {
+	new TextDecoder();
+	return charset;
+    } catch(e) {
+	return undefined;
+    }
+}
+
+function is_content_type_header(header)
+{
+    header.name.toLowerCase().trim() === "content-type";
+}
+
+const charset_reg = /;\s*charset\s*=\s*([\w-]+)/i;
+
+function properties_from_headers(headers)
+{
+    const properties = {};
+
+    for (const header of headers.filter(is_content_type_header)) {
+	const match = charset_reg.exec(header.value);
+	if (!properties.detected_charset && validate_encoding(match[1]))
+	    properties.detected_charset = match[1];
+
+	if (/html/i.test(header.value))
+	    properties.html = true;
+    }
+
+    return properties;
+}
+
+const UTF8_BOM = [0xef, 0xbb, 0xbf];
+const BOMs = [
+    [UTF8_BOM, "utf-8"],
+    [[0xfe, 0xff], "utf-16be"],
+    [[0xff, 0xfe], "utf-16le"]
+];
+
+function charset_from_BOM(data)
+{
+    for (const [BOM, charset] of BOMs) {
+	if (BOM.reduce((ac, byte, i) => ac && byte === data[i], true))
+	    return charset;
+    }
+
+    return "";
+}
+
+const charset_attrs =
+      ['charset', 'http-equiv="content-type"', 'content*="charset"'];
+const charset_meta_selector =
+      charset_attrs.map(a => `head>meta[${a}]`).join(", ");
+
+function charset_from_meta_tags(doc)
+{
+    for (const meta of doc.querySelectorAll(charset_meta_selector)) {
+	const maybe_charset = meta.getAttribute("charset");
+	if (maybe_charset && validate_encoding(maybe_charset))
+	    return maybe_charset;
+
+        const match = charset_reg.exec(meta.getAttribute("content"));
+        if (match && validate_encoding(match[1]))
+	    return match[1];
+    }
+
+    return undefined;
+}
+
+function create_decoder(properties, data)
+{
+    let charset = charset_from_BOM(data) || properties.detected_charset;
+    if (!charset && data.indexOf(0) !== -1) {
+        console.debug("Warning: zeroes in bytestream, probable cached encoding mismatch. Trying to decode it as UTF-16.",
+		      properties);
+	return new TextDecoder("utf-16be");
+    }
+
+    /* Missing HTTP charset, sniffing in content... */
+    /*
+     * TODO: I recall there is some standard saying how early in the doc the
+     * charset has to be specified. We could process just this part of data.
+     */
+    const text = new TextDecoder("latin1").decode(data, {stream: true});
+    properties.html = properties.html || /html/i.test(text);
+
+    if (properties.html) {
+	const tmp_doc = new DOMParser().parseFromString(text, "text/html");
+	charset = charset_from_meta_tags(tmp_doc);
+    }
+
+    return new TextDecoder(charset || "latin1");
+}
+
+function may_define_csp_rules(html)
+{
+    const doc = new DOMParser().parseFromString(html, "text/html");
+
+    for (const meta of doc.querySelectorAll("head>meta[http-equiv]")) {
+	if (csp_header_regex.test(meta.httpEquiv) && meta.content)
+	    return true;
+    }
+
+    /*
+     * Even if no naughty `<meta>' tags were found, subsequent chunk of HTML
+     * data could add some. Before we return `false' we need to be sure we
+     * reached the start of `<body>' where `<meta>' tags are no longer valid.
+     */
+
+    if (doc.documentElement.nextSibling || doc.body.nextSibling ||
+	doc.body.childNodes.length > 1)
+	return false;
+
+    if (!doc.body.firstChild)
+	return true;
+
+    if (doc.body.firstChild.nodeName !== "#text")
+	return false;
+
+    return /^(<\/|&#|.)$/.test(doc.body.firstChild.wholeText);
+}
+
+function filter_data(properties, event)
+{
+    const data = new Uint8Array(event.data);
+    let first_chunk = false;
+    if (!properties.decoder) {
+	first_chunk = true;
+	properties.decoder = create_decoder(properties, data);
+	properties.encoder = new TextEncoder();
+    }
+
+    let decoded = properties.decoder.decode(data);
+
+    /* Force UTF-8, this is the only encoding we can produce. */
+    if (first_chunk)
+	properties.filter.write(new Uint8Array(UTF8_BOM));
+
+    if (first_chunk && may_define_csp_rules(decoded)) {
+	/*
+	 * HAX! Our content scripts that execute at `document_start' will always
+	 * run before the first script in the document, but under Mozilla some
+	 * `<meta>' tags might already be loaded at that point. Here we inject a
+	 * dummy `<script>' at the beginning (before any `<meta>' tags) that
+	 * will force `document_start' to happen earlier. This way our content
+	 * scripts will be able to sanitize `http-equiv' tags with CSP rules
+	 * that would otherwise stop our injected scripts from executing.
+	 *
+	 * As we want to only process HTML files that happen to have naughty
+	 * `<meta>' tags in `<head>', we use a DOMParser-based heuristic in
+	 * `may_define_rules()'. We don't do any additional MIME sniffing as it
+	 * is too unreliable (and our heuristic will likely mark non-HTML files
+	 * as harmless anyway).
+	 */
+
+	const dummy_script = `<script>null</script>`;
+	const doctype_decl = /^(\s*<!doctype[^<>"']*>)?/i.exec(decoded)[0];
+	decoded = doctype_decl + dummy_script +
+	    decoded.substring(doctype_decl.length);
+    }
+
+    properties.filter.write(properties.encoder.encode(decoded));
+
+    if (properties.decoder.encoding === "utf-8")
+	properties.filter.disconnect();
+}
+
+function apply_stream_filter(details, headers, policy)
+{
+    if (!policy.payload)
+	return headers;
+
+    const properties = properties_from_headers(headers);
+
+    properties.filter =
+	browser.webRequest.filterResponseData(details.requestId);
+
+    properties.filter.ondata = event => filter_data(properties, event);
+    properties.filter.onstop = () => properties.filter.close();
+
+    /*
+     * In the future we might consider modifying the headers that specify
+     * encoding. For now we are not yet doing it, though. However, we
+     * prepend the data with UTF-8 BOM which should be enough.
+     */
+    return headers;
+}
+
+/*
+ * EXPORTS_START
+ * EXPORT apply_stream_filter
+ * EXPORTS_END
+ */
author	jahoti <jahoti@tilde.team>	2021-12-03 00:00:00 +0000
committer	jahoti <jahoti@tilde.team>	2021-12-03 00:00:00 +0000
commit	d16e763e240a2aefe3d4490cddff61893a35a1ea (patch)
tree	1e90890a39798f6cd9a1c0886d1234ccc187f5b3 /background
parent	591c48a6903bbf324361610f81c628302cae7049 (diff)
parent	93dd73600e91eb19e11f5ca57f9429a85cf0150f (diff)
download	browser-extension-d16e763e240a2aefe3d4490cddff61893a35a1ea.tar.gz browser-extension-d16e763e240a2aefe3d4490cddff61893a35a1ea.zip