aboutsummaryrefslogtreecommitdiff
path: root/background/ResponseHandler.mjs
blob: 6b979e6166c024a622bb1998797b9b35f66124bf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
/**
* GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript.
* *
* Copyright (C) 2017, 2018 Nathan Nichols
* Copyright (C) 2018 Ruben Rodriguez <ruben@gnu.org>
*
* This file is part of GNU LibreJS.
*
* GNU LibreJS is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNU LibreJS is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU LibreJS.  If not, see <http://www.gnu.org/licenses/>.
*/

/**
*	This listener gets called as soon as we've got all the HTTP headers, can guess
* content type and encoding, and therefore correctly parse HTML documents
* and external script inclusions in search of crappy JavaScript
*/

import inject_scripts from './script_injector.mjs';
import {ResponseProcessor} from './ResponseProcessor.mjs';

"use strict";

var ResponseHandler = {
	/**
	*	Enforce white/black lists for url/site early (hashes will be handled later)
	*/
	async pre(response) {
	        // TODO: reimplement blacklisting/whitelisting later
	        if (true) return ResponseProcessor.CONTINUE;

	        let {request} = response;
		let {url, type, tabId, frameId, documentUrl} = request;

		let fullUrl = url;
		url = ListStore.urlItem(url);
		let site = ListStore.siteItem(url);

	        let blacklistedSite = ListManager.siteMatch(site, blacklist);
		let blacklisted = blacklistedSite || blacklist.contains(url);
		let topUrl = type === "sub_frame" && request.frameAncestors && request.frameAncestors.pop() || documentUrl;

		if (blacklisted) {
			if (type === "script") {
				// this shouldn't happen, because we intercept earlier in blockBlacklistedScripts()
				return ResponseProcessor.REJECT;
			}
			if (type === "main_frame") { // we handle the page change here too, since we won't call edit_html()
				activityReports[tabId] = await createReport({url: fullUrl, tabId});
				// Go on without parsing the page: it was explicitly blacklisted
				let reason = blacklistedSite
					? `All ${blacklistedSite} blacklisted by user`
					: "Address blacklisted by user";
				await addReportEntry(tabId, url, {"blacklisted": [blacklistedSite || url, reason], url: fullUrl});
			}
			// use CSP to restrict JavaScript execution in the page
			request.responseHeaders.unshift({
				name: `Content-security-policy`,
				value: `script-src 'none';`
			});
			return {responseHeaders: request.responseHeaders}; // let's skip the inline script parsing, since we block by CSP
		} else {
		        
			let whitelistedSite = ListManager.siteMatch(site, whitelist);
			let whitelisted = response.whitelisted = whitelistedSite || whitelist.contains(url);
			if (type === "script") {
				if (whitelisted) {
					// accept the script and stop processing
					addReportEntry(tabId, url, {url: topUrl,
						"whitelisted": [url, whitelistedSite ? `User whitelisted ${whitelistedSite}` : "Whitelisted by user"]});
					return ResponseProcessor.ACCEPT;
				} else {
					let scriptInfo = await ExternalLicenses.check({url: fullUrl, tabId, frameId, documentUrl});
					if (scriptInfo) {
						let verdict, ret;
						let msg = scriptInfo.toString();
						if (scriptInfo.free) {
							verdict = "accepted";
							ret = ResponseProcessor.ACCEPT;
						} else {
							verdict = "blocked";
							ret = ResponseProcessor.REJECT;
						}
						addReportEntry(tabId, url, {url, [verdict]: [url, msg]});
						return ret;
					}
				}
			}
		}
		// it's a page (it's too early to report) or an unknown script:
		//  let's keep processing
		return ResponseProcessor.CONTINUE;
	},

	/**
	*	Here we do the heavylifting, analyzing unknown scripts
	*/
	async post(response) {
	        let {type} = response.request;
		return await handle_html(response, response.whitelisted);
	}
}

/**
* Serializes HTMLDocument objects including the root element and
*	the DOCTYPE declaration
*/
function doc2HTML(doc) {
	let s = doc.documentElement.outerHTML;
	if (doc.doctype) {
		let dt = doc.doctype;
		let sDoctype = `<!DOCTYPE ${dt.name || "html"}`;
		if (dt.publicId) sDoctype += ` PUBLIC "${dt.publicId}"`;
		if (dt.systemId) sDoctype += ` "${dt.systemId}"`;
		s = `${sDoctype}>\n${s}`;
	}
	return s;
}

/**
* Shortcut to create a correctly namespaced DOM HTML elements
*/
function createHTMLElement(doc, name) {
  return doc.createElementNS("http://www.w3.org/1999/xhtml", name);
}

/**
* Replace any element with a span having the same content (useful to force
* NOSCRIPT elements to visible the same way as NoScript and uBlock do)
*/
function forceElement(doc, element) {
	let replacement = createHTMLElement(doc, "span");
	replacement.innerHTML = element.innerHTML;
	element.replaceWith(replacement);
	return replacement;
}

/**
*	Forces displaying any element having the "data-librejs-display" attribute and
* <noscript> elements on pages where LibreJS disabled inline scripts (unless
* they have the "data-librejs-nodisplay" attribute).
*/
function forceNoscriptElements(doc) {
	let shown = 0;
	// inspired by NoScript's onScriptDisabled.js
	for (let noscript of doc.querySelectorAll("noscript:not([data-librejs-nodisplay])")) {
    let replacement = forceElement(doc, noscript);
    // emulate meta-refresh
    let meta = replacement.querySelector('meta[http-equiv="refresh"]');
    if (meta) {
      refresh = true;
      doc.head.appendChild(meta);
    }
		shown++;
  }
	return shown;
}

/**
*	Forces displaying any element having the "data-librejs-display" attribute and
* <noscript> elements on pages where LibreJS disabled inline scripts (unless
* they have the "data-librejs-nodisplay" attribute).
*/
function showConditionalElements(doc) {
	let shown = 0;
	for (let element of document.querySelectorAll("[data-librejs-display]")) {
		forceElement(doc, element);
		shown++;
	}
	return shown;
}

/**

* 	Reads/changes the HTML of a page and the scripts within it.
*/
async function editHtml(html, documentUrl, tabId, frameId, whitelisted){

    var parser = new DOMParser();
    var html_doc = parser.parseFromString(html, "text/html");

    if (whitelisted) { // don't bother rewriting
	return null;
    }

    var scripts = html_doc.scripts;

    let findLine = finder => finder.test(html) && html.substring(0, finder.lastIndex).split(/\n/).length || 0;

    let modified = false;
    // Deal with intrinsic events
    let intrinsicFinder = /<[a-z][^>]*\b(on\w+|href\s*=\s*['"]?javascript:)/gi;
    for (let element of html_doc.all) {
	let line = -1;
	for (let attr of element.attributes) {
	    let {name, value} = attr;
	    value = value.trim();
	    if (name.startsWith("on")) {
		attr.value = "console.log(\"event script blocked by myext\")";
	    } else if (name === "href" && value.toLowerCase().startsWith("javascript:")){
		if (line === -1) {
		    line = findLine(intrinsicFinder);
		}
		try {
		    attr.value = `view-source:${documentUrl}#line${line}`;
		} catch (e) {
		    console.error(e);
		}
	    }
	}
    }

    let modifiedInline = false;
    let scriptFinder = /<script\b/ig;
    for(let i = 0, len = scripts.length; i < len; i++) {
	let script = scripts[i];
	let line = findLine(scriptFinder);
	if (!script.src) {
	    script.textContent = `//script blocked, you can examine it at view-source:${documentUrl}#line${line}`;
	} else {
	    let src = script.src;
	    script.removeAttribute("src");
	    script.setAttribute("blocked-src", src);
	    script.textContent = "//script blocked";
	}
    }

    showConditionalElements(html_doc);
    forceNoscriptElements(html_doc);
    await inject_scripts(documentUrl, html_doc);
    return doc2HTML(html_doc);
}

/**
* Here we handle html document responses
*/
async function handle_html(response, whitelisted) {
    let {text, request} = response;
    let {url, tabId, frameId, type} = request;
    if (type === "main_frame") {
	//activityReports[tabId] = await createReport({url, tabId});
	//updateBadge(tabId);
    }
    return await editHtml(text, url, tabId, frameId, whitelisted);
}

export default ResponseHandler;