aboutsummaryrefslogtreecommitdiff
path: root/content/sanitize_document.js
blob: 1533526d00982ee4457c190a520547f529b7f55b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
/**
 * Hachette modify HTML document as it loads and reconstruct HTML code from it
 *
 * Copyright (C) 2021 Wojtek Kosior
 * Redistribution terms are gathered in the `copyright' file.
 */

/*
 * IMPORTS_START
 * IMPORT gen_nonce
 * IMPORT csp_rule
 * IMPORT is_csp_header_name
 * IMPORT sanitize_csp_header
 * IMPORT sanitize_attributes
 * IMPORTS_END
 */

/*
 * Functions that sanitize elements. The script blocking measures are, when
 * possible, going to be applied together with CSP rules injected using
 * webRequest.
 */

const blocked = "blocked";

function block_attribute(node, attr)
{
    /*
     * Disabling attributed this way allows them to still be relatively
     * easily accessed in case they contain some useful data.
     */

    const construct_name = [attr];
    while (node.hasAttribute(construct_name.join("")))
	construct_name.unshift(blocked);

    while (construct_name.length > 1) {
	construct_name.shift();
	const name = construct_name.join("");
	node.setAttribute(`${blocked}-${name}`, node.getAttribute(name));
    }

    node.removeAttribute(attr);
}

function sanitize_script(script, policy)
{
    if (policy.allow)
	return;

    block_attribute(script, "type");
    script.setAttribute("type", "application/json");
}

function inject_csp(head, policy)
{
    if (policy.allow)
	return;

    const meta = document.createElement("meta");
    meta.setAttribute("http-equiv", "Content-Security-Policy");
    meta.setAttribute("content", csp_rule(policy.nonce));
    meta.hachette_ignore = true;
    head.prepend(meta);
}

function sanitize_http_equiv_csp_rule(meta, policy)
{
    const http_equiv = meta.getAttribute("http-equiv");

    if (!is_csp_header_name(http_equiv, !policy.allow))
	return;

    if (policy.allow || is_csp_header_name(http_equiv, false)) {
	let value = meta.getAttribute("content");
	block_attribute(meta, "content");
	if (value) {
	    value = sanitize_csp_header({value}, policy).value;
	    meta.setAttribute("content", value);
	}
	return;
    }

    block_attribute(meta, "http-equiv");
}

function sanitize_node(node, policy)
{
    if (node.tagName === "SCRIPT")
	sanitize_script(node, policy);

    if (node.tagName === "HEAD")
	inject_csp(node, policy);

    if (node.tagName === "META")
	sanitize_http_equiv_csp_rule(node, policy);

    if (!policy.allow)
	sanitize_attributes(node, policy);
}

const serializer = new XMLSerializer();

function start_node(node, data)
{
    if (!data.writer)
	return;

    node.hachette_started = true;
    const clone = node.cloneNode(false);
    clone.textContent = data.uniq;
    data.writer(data.uniq_reg.exec(clone.outerHTML)[1]);
}

function finish_node(node, data)
{
    const nodes_to_process = [node];

    while (true) {
	node = nodes_to_process.pop();
	if (!node)
	    break;

	nodes_to_process.push(node, node.hachette_last_added);
    }

    while (nodes_to_process.length > 0) {
	const node = nodes_to_process.pop();
	node.remove();

	if (!data.writer)
	    continue;

	if (node.hachette_started) {
	    node.textContent = data.uniq;
	    data.writer(data.uniq_reg.exec(node.outerHTML)[2]);
	    continue;
	}

	data.writer(node.outerHTML || serializer.serializeToString(node));
    }
}

/*
 * Important! Due to some weirdness node.parentElement is not alway correct
 * under Chromium. Track node relations manually.
 */
function handle_added_node(node, true_parent, data)
{
    if (node.hachette_ignore || true_parent.hachette_ignore)
	return;

    if (!true_parent.hachette_started)
	start_node(true_parent, data)

    sanitize_node(node, data.policy);

    if (data.node_eater)
	data.node_eater(node, true_parent);

    finish_node(true_parent.hachette_last_added, data);

    true_parent.hachette_last_added = node;
}

function handle_mutation(mutations, data)
{
    /*
     * Chromium: for an unknown reason mutation.target is not always the same as
     * node.parentElement. The former is the correct one.
     */
    for (const mutation of mutations) {
	for (const node of mutation.addedNodes)
	    handle_added_node(node, mutation.target, data);
    }
}

function finish_processing(data)
{
    handle_mutation(data.observer.takeRecords(), data);
    finish_node(data.html_element, data);
    data.observer.disconnect();
}

function modify_on_the_fly(html_element, policy, consumers)
{
    const uniq = gen_nonce();
    const uniq_reg = new RegExp(`^(.*)${uniq}(.*)$`);
    const data = {policy, html_element, uniq, uniq_reg, ...consumers};

    start_node(data.html_element, data);

    var observer = new MutationObserver(m => handle_mutation(m, data));
    observer.observe(data.html_element, {
     	attributes: true,
	childList: true,
	subtree: true
    });

    data.observer = observer;

    return () => finish_processing(data);
}

/*
 * EXPORTS_START
 * EXPORT modify_on_the_fly
 * EXPORTS_END
 */