summaryrefslogtreecommitdiff
path: root/content/policy_enforcing.js
blob: 45529ea7be0fb51010317b373edd2c6a330d7ff7 (about) (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
/**
 * This file is part of Haketilo.
 *
 * Function: Enforcing script blocking rules on a given page, working from a
 *           content script.
 *
 * Copyright (C) 2021,2022 Wojtek Kosior
 * Copyright (C) 2021 jahoti
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * As additional permission under GNU GPL version 3 section 7, you
 * may distribute forms of that code without the copy of the GNU
 * GPL normally required by section 4, provided you include this
 * license notice and, in case of non-source distribution, a URL
 * through which recipients can access the Corresponding Source.
 * If you modify file(s) with this exception, you may extend this
 * exception to your version of the file(s), but you are not
 * obligated to do so. If you do not wish to do so, delete this
 * exception statement from your version.
 *
 * As a special exception to the GPL, any HTML file which merely
 * makes function calls to this code, and for that purpose
 * includes it by reference shall be deemed a separate work for
 * copyright law purposes. If you modify this code, you may extend
 * this exception to your version of the code, but you are not
 * obligated to do so. If you do not wish to do so, delete this
 * exception statement from your version.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 *
 * I, Wojtek Kosior, thereby promise not to sue for violation of this file's
 * license. Although I request that you do not make use of this code in a
 * proprietary program, I am not going to enforce this in court.
 */

#FROM common/misc.js IMPORT gen_nonce, csp_header_regex

document.content_loaded = document.readyState === "complete";
const wait_loaded = e => e.content_loaded ? Promise.resolve() :
      new Promise(c => e.addEventListener("DOMContentLoaded", c, {once: true}));

wait_loaded(document).then(() => document.content_loaded = true);

/*
 * In the case of HTML documents:
 * 1. When injecting some payload we need to sanitize <meta> CSP tags before
 *    they reach the document.
 * 2. Only <meta> tags inside <head> are considered valid by the browser and
 *    need to be considered.
 * 3. We want to detach <html> from document, wait until its <head> completes
 *    loading, sanitize it and re-attach <html>.
 * 4. We shall wait for anything to appear in or after <body> and take that as
 *    a sign <head> has finished loading.
 * 5. Otherwise, getting the `DOMContentLoaded' event on the document shall also
 *    be a sign that <head> is fully loaded.
 */

function make_body_start_observer(DOM_element, waiting) {
    const observer = new MutationObserver(() => try_body_started(waiting));
    observer.observe(DOM_element, {childList: true});
    return observer;
}

function try_body_started(waiting) {
    const body = waiting.detached_html.querySelector("body");

    if ((body && (body.firstChild || body.nextSibling)) ||
	waiting.doc.documentElement.nextSibling) {
	finish_waiting(waiting);
	return true;
    }

    if (body && waiting.observers.length < 2)
	waiting.observers.push(make_body_start_observer(body, waiting));
}

function finish_waiting(waiting) {
    if (waiting.finished)
	return;
    waiting.finished = true;
    waiting.observers.forEach(observer => observer.disconnect());
    setTimeout(waiting.callback, 0);
}

function _wait_for_head(doc, detached_html, callback) {
    const waiting = {doc, detached_html, callback, observers: []};

    if (try_body_started(waiting))
	return;

    waiting.observers = [make_body_start_observer(detached_html, waiting)];

    wait_loaded(doc).then(() => finish_waiting(waiting));
}

function wait_for_head(doc, detached_html) {
    return new Promise(cb => _wait_for_head(doc, detached_html, cb));
}

const blocked_str = "blocked";

function block_attribute(node, attr, ns=null, replace_with=null) {
    const [hasa, geta, seta, rema] = ["has", "get", "set", "remove"]
	  .map(m => (n, ...args) => typeof ns === "string" ?
	       n[`${m}AttributeNS`](ns, ...args) : n[`${m}Attribute`](...args));
    /*
     * Disabling attributes by prepending `blocked-' allows them to still be
     * relatively easily accessed in case they contain some useful data.
     */
    const construct_name = [attr];
    while (hasa(node, construct_name.join("")))
	construct_name.unshift(blocked_str);

    while (construct_name.length > 1) {
	construct_name.shift();
	const name = construct_name.join("");
	seta(node, `${blocked_str}-${name}`, geta(node, name));
    }

    rema(node, attr);
    if (replace_with !== null)
	seta(node, attr, replace_with);
}

/*
 * Used to disable `<script>'s and `<meta>'s that have not yet been added to
 * live DOM (doesn't work for those already added).
 */
function sanitize_meta(meta) {
    if (csp_header_regex.test(meta.httpEquiv) && meta.content)
	block_attribute(meta, "content");
}

function sanitize_script(script) {
    script.haketilo_blocked_type = script.getAttribute("type");
    script.type = "text/plain";
}

/*
 * Executed after `<script>' has been connected to the DOM, when it is no longer
 * eligible for being executed by the browser.
 */
function desanitize_script(script) {
    script.setAttribute("type", script.haketilo_blocked_type);

    if ([null, undefined].includes(script.haketilo_blocked_type))
	script.removeAttribute("type");

    delete script.haketilo_blocked_type;
}

/*
 * Blocking certain attributes that might allow 'javascript:' URLs. Some of
 * these are: <iframe>'s 'src' attributes (would normally execute js in URL upon
 * frame's load), <object>'s 'data' attribute (would also execute upon load) and
 * <a>'s 'href' attribute (would execute upon link click).
 */
const bad_url_reg = /^data:([^,;]*ml|unknown-content-type)|^javascript:/i;
function sanitize_element_urls(element) {
    if (element.haketilo_sanitized_urls)
	return;

    element.haketilo_sanitized_urls = true;

    let some_attr_blocked = false;

    for (const attr of [...element.attributes || []]
	       .filter(attr => /^(href|src|data)$/i.test(attr.localName))
	       .filter(attr => bad_url_reg.test(attr.value))) {
	/*
	 * Under some browsers (Mozilla) removing attributes doesn't stop their
	 * javascript from executing, but replacing them does. For 'src' and
	 * 'data' I chose to replace the attribute with a 'data:' URL and have
	 * it replace bad <iframe>'s/<object>'s contents with a "blocked"
	 * string. For 'href' (which appears on <a>'s) I chose to use a
	 * 'javascript:' URL to avoid having the page reloaded upon a link
	 * click.
	 */
	const replacement_value = /^href$/i.test(attr.localName) ?
              "javascript:void('blocked');" : "data:text/plain,blocked";
	some_attr_blocked = true;
	block_attribute(element, attr.localName, attr.namespaceURI,
			replacement_value);
    }

    /*
     * Trial and error shows that under certain browsers additional element
     * removal and re-addition might be necessary to prevent execution of a
     * 'javascript:' URL (Parabola's Iceweasel 75 requires it for 'src' URL of
     * an <iframe>).
     */
    if (some_attr_blocked) {
	const replacement_elem = document.createElement("a");
	element.replaceWith(replacement_elem);
	replacement_elem.replaceWith(element);
    }
}

function sanitize_tree_urls(root) {
    root.querySelectorAll("*[href], *[src], *[data]")
	.forEach(sanitize_element_urls);
}

#IF MOZILLA
function sanitize_element_onevent(element) {
    for (const attribute_node of (element.attributes || [])) {
	const attr = attribute_node.localName, attr_lo = attr.toLowerCase();;
	if (!/^on/.test(attr_lo) || !(attr_lo in element.wrappedJSObject))
	    continue;

	/*
	 * Guard against redefined getter on DOM object property. This is a
	 * supplemental security measure since page's own scripts should be
	 * blocked and unable to redefine properties, anyway.
	 */
	if (Object.getOwnPropertyDescriptor(element.wrappedJSObject, attr)) {
	    console.error("Redefined property on a DOM object! The page might have bypassed our script blocking measures!");
	    continue;
	}
	element.wrappedJSObject[attr] = null;
	block_attribute(element, attr, attribute_node.namespaceURI,
			"javascript:void('blocked');");
    }
}

function sanitize_tree_onevent(root) {
    root.querySelectorAll("*")
	.forEach(sanitize_element_onevent);
}
#ENDIF

function start_mo_sanitizing(doc) {
    if (!doc.content_loaded) {
	function mutation_handler(mutation) {
	    mutation.addedNodes.forEach(sanitize_element_urls);
#IF MOZILLA
	    mutation.addedNodes.forEach(sanitize_element_onevent);
#ENDIF
	}
	const mo = new MutationObserver(ms => ms.forEach(mutation_handler));
	mo.observe(doc, {childList: true, subtree: true});
	wait_loaded(doc).then(() => mo.disconnect());
    }
}

#IF MOZILLA
/*
 * Normally, we block scripts with CSP. However, Mozilla does optimizations that
 * cause part of the DOM to be loaded when our content scripts get to run. Thus,
 * before the CSP rules we inject (for non-HTTP pages) become effective, we need
 * to somehow block the execution of `<script>'s and intrinsics that were
 * already there. Additionally, some browsers (IceCat 60) seem to have problems
 * applying this CSP to non-inline `<scripts>' in certain scenarios.
 */
function prevent_script_execution(event) {
    if (!event.target.haketilo_payload)
	event.preventDefault();
}
#ENDIF

/*
 * Here we block all scripts of a document which might be either an
 * HTMLDocument or an XMLDocument. Modifying an XML document might disrupt
 * Mozilla's XML preview. This is an unfortunate thing we have to accept for
 * now. XML documents *have to* be sanitized as well because they might
 * contain `<script>' tags (or on* attributes) with namespace declared as
 * "http://www.w3.org/1999/xhtml" or "http://www.w3.org/2000/svg" which allows
 * javascript execution.
 */
async function sanitize_document(doc, policy) {
#IF MOZILLA
    /*
     * Blocking of scripts that are in the DOM from the beginning. Needed for
     * Mozilla.
     */
    const listener_args = ["beforescriptexecute", prevent_script_execution];
    doc.addEventListener(...listener_args);
    wait_loaded(doc).then(() => doc.removeEventListener(...listener_args));

    sanitize_tree_urls(doc.documentElement);
    sanitize_tree_onevent(doc.documentElement);
#ENDIF

    /*
     * Ensure our CSP rules are employed from the beginning. This CSP injection
     * method is, when possible, going to be applied together with CSP rules
     * injected using webRequest.
     * Using elements namespaced as HTML makes this CSP injection also work for
     * non-HTML documents.
     */
    const source = `\
<!DOCTYPE html>
<html>
  <head>
    <meta http-equiv="Content-Security-Policy" content="${policy.csp}"/>
  </head>
  <body>
    Loading...
  </body>
</html>`;
    const temporary_html =
	  new DOMParser().parseFromString(source, "text/html").documentElement;

    /*
     * Root node gets hijacked now, to be re-attached after <head> is loaded
     * and sanitized.
     */
    const root = doc.documentElement;
    root.replaceWith(temporary_html);

    /*
     * When we don't inject payload, we neither block document's CSP `<meta>'
     * tags nor wait for `<head>' to be parsed.
     */
    if (policy.payload) {
	await wait_for_head(doc, root);

	root.querySelectorAll("head meta")
	    .forEach(m => sanitize_meta(m, policy));
    }

    sanitize_tree_urls(root);
    root.querySelectorAll("script").forEach(s => sanitize_script(s, policy));
    temporary_html.replaceWith(root);
    root.querySelectorAll("script").forEach(s => desanitize_script(s, policy));
#IF MOZILLA
    sanitize_tree_onevent(root);
#ENDIF

    start_mo_sanitizing(doc);
}

async function _disable_service_workers() {
    if (!navigator.serviceWorker)
	return;

    const registrations = await navigator.serviceWorker.getRegistrations();
    if (registrations.length === 0)
	return;

    console.warn("Service Workers detected on this page! Unregistering and reloading.");

    try {
	await Promise.all(registrations.map(r => r.unregister()));
    } finally {
	location.reload();
    }

    /* Never actually return! */
    return new Promise(() => 0);
}

/*
 * Trying to use servce workers APIs might result in exceptions, for example
 * when in a non-HTML document. Because of this, we wrap the function that does
 * the actual work in a try {} block.
 */
async function disable_service_workers() {
    try {
	await _disable_service_workers()
    } catch (e) {
	console.debug("Exception thrown during an attempt to detect and disable service workers.", e);
    }
}

function enforce_blocking(policy) {
    if (policy.allow)
	return;

    return Promise.all([
	sanitize_document(document, policy),
	disable_service_workers(),
	wait_loaded(document)
    ]);
}
#EXPORT enforce_blocking