content/main.js


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354

/**
 * This file is part of Haketilo.
 *
 * Function: Main content script that runs in all frames.
 *
 * Copyright (C) 2021 Wojtek Kosior
 * Copyright (C) 2021 jahoti
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * As additional permission under GNU GPL version 3 section 7, you
 * may distribute forms of that code without the copy of the GNU
 * GPL normally required by section 4, provided you include this
 * license notice and, in case of non-source distribution, a URL
 * through which recipients can access the Corresponding Source.
 * If you modify file(s) with this exception, you may extend this
 * exception to your version of the file(s), but you are not
 * obligated to do so. If you do not wish to do so, delete this
 * exception statement from your version.
 *
 * As a special exception to the GPL, any HTML file which merely
 * makes function calls to this code, and for that purpose
 * includes it by reference shall be deemed a separate work for
 * copyright law purposes. If you modify this code, you may extend
 * this exception to your version of the code, but you are not
 * obligated to do so. If you do not wish to do so, delete this
 * exception statement from your version.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 *
 * I, Wojtek Kosior, thereby promise not to sue for violation of this file's
 * license. Although I request that you do not make use this code in a
 * proprietary program, I am not going to enforce this in court.
 */

/*
 * IMPORTS_START
 * IMPORT handle_page_actions
 * IMPORT gen_nonce
 * IMPORT is_privileged_url
 * IMPORT browser
 * IMPORT is_chrome
 * IMPORT is_mozilla
 * IMPORT start_activity_info_server
 * IMPORT make_csp_rule
 * IMPORT csp_header_regex
 * IMPORT report_settings
 * IMPORTS_END
 */

document.content_loaded = document.readyState === "complete";
const wait_loaded = e => e.content_loaded ? Promise.resolve() :
      new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true}));

wait_loaded(document).then(() => document.content_loaded = true);

/*
 * In the case of HTML documents:
 * 1. When injecting some payload we need to sanitize <meta> CSP tags before
 *    they reach the document.
 * 2. Only <meta> tags inside <head> are considered valid by the browser and
 *    need to be considered.
 * 3. We want to detach <html> from document, wait until its <head> completes
 *    loading, sanitize it and re-attach <html>.
 * 4. We shall wait for anything to appear in or after <body> and take that as
 *    a sign <head> has finished loading.
 * 5. Otherwise, getting the `DOMContentLoaded' event on the document shall also
 *    be a sign that <head> is fully loaded.
 */

function make_body_start_observer(DOM_element, waiting)
{
    const observer = new MutationObserver(() => try_body_started(waiting));
    observer.observe(DOM_element, {childList: true});
    return observer;
}

function try_body_started(waiting)
{
    const body = waiting.detached_html.querySelector("body");

    if ((body && (body.firstChild || body.nextSibling)) ||
	waiting.doc.documentElement.nextSibling) {
	finish_waiting(waiting);
	return true;
    }

    if (body && waiting.observers.length < 2)
	waiting.observers.push(make_body_start_observer(body, waiting));
}

function finish_waiting(waiting)
{
    if (waiting.finished)
	return;
    waiting.finished = true;
    waiting.observers.forEach(observer => observer.disconnect());
    setTimeout(waiting.callback, 0);
}

function _wait_for_head(doc, detached_html, callback)
{
    const waiting = {doc, detached_html, callback, observers: []};

    if (try_body_started(waiting))
	return;

    waiting.observers = [make_body_start_observer(detached_html, waiting)];

    wait_loaded(doc).then(() => finish_waiting(waiting));
}

function wait_for_head(doc, detached_html)
{
    return new Promise(cb => _wait_for_head(doc, detached_html, cb));
}

const blocked_str = "blocked";

function block_attribute(node, attr, ns=null)
{
    const [hasa, geta, seta, rema] = ["has", "get", "set", "remove"]
	  .map(m => (n, ...args) => typeof ns === "string" ?
	       n[`${m}AttributeNS`](ns, ...args) : n[`${m}Attribute`](...args));
    /*
     * Disabling attributes by prepending `-blocked' allows them to still be
     * relatively easily accessed in case they contain some useful data.
     */
    const construct_name = [attr];
    while (hasa(node, construct_name.join("")))
	construct_name.unshift(blocked_str);

    while (construct_name.length > 1) {
	construct_name.shift();
	const name = construct_name.join("");
	seta(node, `${blocked_str}-${name}`, geta(node, name));
    }

    rema(node, attr);
}

/*
 * Used to disable `<script>'s and `<meta>'s that have not yet been added to
 * live DOM (doesn't work for those already added).
 */
function sanitize_meta(meta)
{
    if (csp_header_regex.test(meta.httpEquiv) && meta.content)
	block_attribute(meta, "content");
}

function sanitize_script(script)
{
    script.haketilo_blocked_type = script.getAttribute("type");
    script.type = "text/plain";
}

/*
 * Executed after `<script>' has been connected to the DOM, when it is no longer
 * eligible for being executed by the browser.
 */
function desanitize_script(script)
{
    script.setAttribute("type", script.haketilo_blocked_type);

    if ([null, undefined].includes(script.haketilo_blocked_type))
	script.removeAttribute("type");

    delete script.haketilo_blocked_type;
}

const bad_url_reg = /^data:([^,;]*ml|unknown-content-type)/i;
function sanitize_urls(element)
{
    for (const attr of [...element.attributes || []]
	       .filter(attr => /^(href|src|data)$/i.test(attr.localName))
	       .filter(attr => bad_url_reg.test(attr.value)))
	block_attribute(element, attr.localName, attr.namespaceURI);
}

function start_data_urls_sanitizing(doc)
{
    doc.querySelectorAll("*[href], *[src], *[data]").forEach(sanitize_urls);
    if (!doc.content_loaded) {
	const mutation_handler = m => m.addedNodes.forEach(sanitize_urls);
	const mo = new MutationObserver(ms => ms.forEach(mutation_handler));
	mo.observe(doc, {childList: true, subtree: true});
	wait_loaded(doc).then(() => mo.disconnect());
    }
}

/*
 * Normally, we block scripts with CSP. However, Mozilla does optimizations that
 * cause part of the DOM to be loaded when our content scripts get to run. Thus,
 * before the CSP rules we inject (for non-HTTP pages) become effective, we need
 * to somehow block the execution of `<script>'s and intrinsics that were
 * already there. Additionally, some browsers (IceCat 60) seem to have problems
 * applying this CSP to non-inline `<scripts>' in certain scenarios.
 */
function prevent_script_execution(event)
{
    if (!event.target.haketilo_payload)
	event.preventDefault();
}

function mozilla_initial_block(doc)
{
    doc.addEventListener("beforescriptexecute", prevent_script_execution);

    for (const elem of doc.querySelectorAll("*")) {
	[...elem.attributes].map(attr => attr.localName)
	    .filter(attr => /^on/.test(attr) && elem.wrappedJSObject[attr])
	    .forEach(attr => elem.wrappedJSObject[attr] = null);
    }
}

/*
 * Here we block all scripts of a document which might be either and
 * HTMLDocument or an XMLDocument. Modifying an XML document might disrupt
 * Mozilla's XML preview. This is an unfortunate thing we have to accept for
 * now. XML documents *have to* be sanitized as well because they might
 * contain `<script>' tags (or on* attributes) with namespace declared as
 * "http://www.w3.org/1999/xhtml" or "http://www.w3.org/2000/svg" which allows
 * javascript execution.
 */
async function sanitize_document(doc, policy)
{
    /*
     * Blocking of scripts that are in the DOM from the beginning. Needed for
     * Mozilla.
     */
    if (is_mozilla)
	mozilla_initial_block(doc);

    /*
     * Ensure our CSP rules are employed from the beginning. This CSP injection
     * method is, when possible, going to be applied together with CSP rules
     * injected using webRequest.
     * Using elements namespaced as HTML makes this CSP injection also work for
     * non-HTML documents.
     */
    const html = new DOMParser().parseFromString(`<html><head><meta \
http-equiv="Content-Security-Policy" content="${make_csp_rule(policy)}"\
/></head><body>Loading...</body></html>`, "text/html").documentElement;

    /*
     * Root node gets hijacked now, to be re-attached after <head> is loaded
     * and sanitized.
     */
    const root = doc.documentElement;
    root.replaceWith(html);

    /*
     * When we don't inject payload, we neither block document's CSP `<meta>'
     * tags nor wait for `<head>' to be parsed.
     */
    if (policy.has_payload) {
	await wait_for_head(doc, root);

	root.querySelectorAll("head meta")
	    .forEach(m => sanitize_meta(m, policy));
    }

    root.querySelectorAll("script").forEach(s => sanitize_script(s, policy));
    html.replaceWith(root);
    root.querySelectorAll("script").forEach(s => desanitize_script(s, policy));

    start_data_urls_sanitizing(doc);
}

async function _disable_service_workers()
{
    if (!navigator.serviceWorker)
	return;

    const registrations = await navigator.serviceWorker.getRegistrations();
    if (registrations.length === 0)
	return;

    console.warn("Service Workers detected on this page! Unregistering and reloading.");

    try {
	await Promise.all(registrations.map(r => r.unregister()));
    } finally {
	location.reload();
    }

    /* Never actually return! */
    return new Promise(() => 0);
}

/*
 * Trying to use servce workers APIs might result in exceptions, for example
 * when in a non-HTML document. Because of this, we wrap the function that does
 * the actual work in a try {} block.
 */
async function disable_service_workers()
{
    try {
	await _disable_service_workers()
    } catch (e) {
	console.debug("Exception thrown during an attempt to detect and disable service workers.", e);
    }
}

function synchronously_get_policy(url)
{
    const encoded_url = encodeURIComponent(url);
    const request_url = `${browser.runtime.getURL("dummy")}?url=${encoded_url}`;

    try {
	var xhttp = new XMLHttpRequest();
	xhttp.open("GET", request_url, false);
	xhttp.send();
    } catch(e) {
	console.error("Failure to synchronously fetch policy for url.", e);
	return {allow: false};
    }

    const policy = /^[^?]*\?settings=(.*)$/.exec(xhttp.responseURL)[1];
    return JSON.parse(decodeURIComponent(policy));
}

if (!is_privileged_url(document.URL)) {
    const policy = synchronously_get_policy(document.URL);

    if (!(document instanceof HTMLDocument))
	delete policy.payload;

    console.debug("current policy", policy);

    report_settings(policy);

    policy.nonce = gen_nonce();

    const doc_ready = Promise.all([
	policy.allow ? Promise.resolve() : sanitize_document(document, policy),
	policy.allow ? Promise.resolve() : disable_service_workers(),
	wait_loaded(document)
    ]);

    handle_page_actions(policy, doc_ready);

    start_activity_info_server();
}