summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--TODOS.org4
-rw-r--r--background/ResponseHandler.mjs257
-rw-r--r--background/ResponseMetaData.mjs107
-rw-r--r--background/ResponseProcessor.mjs145
-rw-r--r--background/main.mjs2
-rw-r--r--background/script_injector.mjs122
6 files changed, 2 insertions, 635 deletions
diff --git a/TODOS.org b/TODOS.org
index 2e3c210..72aa13c 100644
--- a/TODOS.org
+++ b/TODOS.org
@@ -9,7 +9,6 @@ TODO:
those scripts are already free, as is often the case)
- also, find some convenient way to automatically re-add "on" events ("onclick" & friends)
- add some good, sane error handling
-- implement whitelisting (LibreJS had some code doing it, but we'll see if it's of any use for us) -- CRUCIAL
- make it possible to export page settings in some format -- CRUCIAL
- get rid of those warnings and exceptions in console (many are not even related to this extension;
who invented this thing?) (gecko-only)
@@ -18,7 +17,6 @@ TODO:
settings and settings for pages that currently happen to
live in iframes
- add some nice styling to settings page
-- clean up the remnants of LibreJS
- stop using modules (not available on all browsers) -- CRUCIAL
- use non-predictable value in place of "myext-allow", utilizing hashes -- CRUCIAL
- rename the extension to something good
@@ -41,6 +39,8 @@ TODO:
- rearrange files in extension, add some mechanism to build the extension
DONE:
+- clean up the remnants of LibreJS -- DONE 2021-05-12
+- implement whitelisting -- DONE 2021-05-07
- find way to also block scripts in non-http pages (e.g. file://) -- DONE 2021-05-07 (via content scripts, may not be perfect)
(NoScript seems to be doing this through CSP)
- make page settings easily and conveniently editable in a separate window/tab -- DONE 2021-05-05
diff --git a/background/ResponseHandler.mjs b/background/ResponseHandler.mjs
deleted file mode 100644
index 6b979e6..0000000
--- a/background/ResponseHandler.mjs
+++ /dev/null
@@ -1,257 +0,0 @@
-/**
-* GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript.
-* *
-* Copyright (C) 2017, 2018 Nathan Nichols
-* Copyright (C) 2018 Ruben Rodriguez <ruben@gnu.org>
-*
-* This file is part of GNU LibreJS.
-*
-* GNU LibreJS is free software: you can redistribute it and/or modify
-* it under the terms of the GNU General Public License as published by
-* the Free Software Foundation, either version 3 of the License, or
-* (at your option) any later version.
-*
-* GNU LibreJS is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU General Public License for more details.
-*
-* You should have received a copy of the GNU General Public License
-* along with GNU LibreJS. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
-* This listener gets called as soon as we've got all the HTTP headers, can guess
-* content type and encoding, and therefore correctly parse HTML documents
-* and external script inclusions in search of crappy JavaScript
-*/
-
-import inject_scripts from './script_injector.mjs';
-import {ResponseProcessor} from './ResponseProcessor.mjs';
-
-"use strict";
-
-var ResponseHandler = {
- /**
- * Enforce white/black lists for url/site early (hashes will be handled later)
- */
- async pre(response) {
- // TODO: reimplement blacklisting/whitelisting later
- if (true) return ResponseProcessor.CONTINUE;
-
- let {request} = response;
- let {url, type, tabId, frameId, documentUrl} = request;
-
- let fullUrl = url;
- url = ListStore.urlItem(url);
- let site = ListStore.siteItem(url);
-
- let blacklistedSite = ListManager.siteMatch(site, blacklist);
- let blacklisted = blacklistedSite || blacklist.contains(url);
- let topUrl = type === "sub_frame" && request.frameAncestors && request.frameAncestors.pop() || documentUrl;
-
- if (blacklisted) {
- if (type === "script") {
- // this shouldn't happen, because we intercept earlier in blockBlacklistedScripts()
- return ResponseProcessor.REJECT;
- }
- if (type === "main_frame") { // we handle the page change here too, since we won't call edit_html()
- activityReports[tabId] = await createReport({url: fullUrl, tabId});
- // Go on without parsing the page: it was explicitly blacklisted
- let reason = blacklistedSite
- ? `All ${blacklistedSite} blacklisted by user`
- : "Address blacklisted by user";
- await addReportEntry(tabId, url, {"blacklisted": [blacklistedSite || url, reason], url: fullUrl});
- }
- // use CSP to restrict JavaScript execution in the page
- request.responseHeaders.unshift({
- name: `Content-security-policy`,
- value: `script-src 'none';`
- });
- return {responseHeaders: request.responseHeaders}; // let's skip the inline script parsing, since we block by CSP
- } else {
-
- let whitelistedSite = ListManager.siteMatch(site, whitelist);
- let whitelisted = response.whitelisted = whitelistedSite || whitelist.contains(url);
- if (type === "script") {
- if (whitelisted) {
- // accept the script and stop processing
- addReportEntry(tabId, url, {url: topUrl,
- "whitelisted": [url, whitelistedSite ? `User whitelisted ${whitelistedSite}` : "Whitelisted by user"]});
- return ResponseProcessor.ACCEPT;
- } else {
- let scriptInfo = await ExternalLicenses.check({url: fullUrl, tabId, frameId, documentUrl});
- if (scriptInfo) {
- let verdict, ret;
- let msg = scriptInfo.toString();
- if (scriptInfo.free) {
- verdict = "accepted";
- ret = ResponseProcessor.ACCEPT;
- } else {
- verdict = "blocked";
- ret = ResponseProcessor.REJECT;
- }
- addReportEntry(tabId, url, {url, [verdict]: [url, msg]});
- return ret;
- }
- }
- }
- }
- // it's a page (it's too early to report) or an unknown script:
- // let's keep processing
- return ResponseProcessor.CONTINUE;
- },
-
- /**
- * Here we do the heavylifting, analyzing unknown scripts
- */
- async post(response) {
- let {type} = response.request;
- return await handle_html(response, response.whitelisted);
- }
-}
-
-/**
-* Serializes HTMLDocument objects including the root element and
-* the DOCTYPE declaration
-*/
-function doc2HTML(doc) {
- let s = doc.documentElement.outerHTML;
- if (doc.doctype) {
- let dt = doc.doctype;
- let sDoctype = `<!DOCTYPE ${dt.name || "html"}`;
- if (dt.publicId) sDoctype += ` PUBLIC "${dt.publicId}"`;
- if (dt.systemId) sDoctype += ` "${dt.systemId}"`;
- s = `${sDoctype}>\n${s}`;
- }
- return s;
-}
-
-/**
-* Shortcut to create a correctly namespaced DOM HTML elements
-*/
-function createHTMLElement(doc, name) {
- return doc.createElementNS("http://www.w3.org/1999/xhtml", name);
-}
-
-/**
-* Replace any element with a span having the same content (useful to force
-* NOSCRIPT elements to visible the same way as NoScript and uBlock do)
-*/
-function forceElement(doc, element) {
- let replacement = createHTMLElement(doc, "span");
- replacement.innerHTML = element.innerHTML;
- element.replaceWith(replacement);
- return replacement;
-}
-
-/**
-* Forces displaying any element having the "data-librejs-display" attribute and
-* <noscript> elements on pages where LibreJS disabled inline scripts (unless
-* they have the "data-librejs-nodisplay" attribute).
-*/
-function forceNoscriptElements(doc) {
- let shown = 0;
- // inspired by NoScript's onScriptDisabled.js
- for (let noscript of doc.querySelectorAll("noscript:not([data-librejs-nodisplay])")) {
- let replacement = forceElement(doc, noscript);
- // emulate meta-refresh
- let meta = replacement.querySelector('meta[http-equiv="refresh"]');
- if (meta) {
- refresh = true;
- doc.head.appendChild(meta);
- }
- shown++;
- }
- return shown;
-}
-
-/**
-* Forces displaying any element having the "data-librejs-display" attribute and
-* <noscript> elements on pages where LibreJS disabled inline scripts (unless
-* they have the "data-librejs-nodisplay" attribute).
-*/
-function showConditionalElements(doc) {
- let shown = 0;
- for (let element of document.querySelectorAll("[data-librejs-display]")) {
- forceElement(doc, element);
- shown++;
- }
- return shown;
-}
-
-/**
-
-* Reads/changes the HTML of a page and the scripts within it.
-*/
-async function editHtml(html, documentUrl, tabId, frameId, whitelisted){
-
- var parser = new DOMParser();
- var html_doc = parser.parseFromString(html, "text/html");
-
- if (whitelisted) { // don't bother rewriting
- return null;
- }
-
- var scripts = html_doc.scripts;
-
- let findLine = finder => finder.test(html) && html.substring(0, finder.lastIndex).split(/\n/).length || 0;
-
- let modified = false;
- // Deal with intrinsic events
- let intrinsicFinder = /<[a-z][^>]*\b(on\w+|href\s*=\s*['"]?javascript:)/gi;
- for (let element of html_doc.all) {
- let line = -1;
- for (let attr of element.attributes) {
- let {name, value} = attr;
- value = value.trim();
- if (name.startsWith("on")) {
- attr.value = "console.log(\"event script blocked by myext\")";
- } else if (name === "href" && value.toLowerCase().startsWith("javascript:")){
- if (line === -1) {
- line = findLine(intrinsicFinder);
- }
- try {
- attr.value = `view-source:${documentUrl}#line${line}`;
- } catch (e) {
- console.error(e);
- }
- }
- }
- }
-
- let modifiedInline = false;
- let scriptFinder = /<script\b/ig;
- for(let i = 0, len = scripts.length; i < len; i++) {
- let script = scripts[i];
- let line = findLine(scriptFinder);
- if (!script.src) {
- script.textContent = `//script blocked, you can examine it at view-source:${documentUrl}#line${line}`;
- } else {
- let src = script.src;
- script.removeAttribute("src");
- script.setAttribute("blocked-src", src);
- script.textContent = "//script blocked";
- }
- }
-
- showConditionalElements(html_doc);
- forceNoscriptElements(html_doc);
- await inject_scripts(documentUrl, html_doc);
- return doc2HTML(html_doc);
-}
-
-/**
-* Here we handle html document responses
-*/
-async function handle_html(response, whitelisted) {
- let {text, request} = response;
- let {url, tabId, frameId, type} = request;
- if (type === "main_frame") {
- //activityReports[tabId] = await createReport({url, tabId});
- //updateBadge(tabId);
- }
- return await editHtml(text, url, tabId, frameId, whitelisted);
-}
-
-export default ResponseHandler;
diff --git a/background/ResponseMetaData.mjs b/background/ResponseMetaData.mjs
deleted file mode 100644
index 345fc54..0000000
--- a/background/ResponseMetaData.mjs
+++ /dev/null
@@ -1,107 +0,0 @@
-/**
-* GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript.
-*
-* Copyright (C) 2018 Giorgio Maone <giorgio@maone.net>
-*
-* This file is part of GNU LibreJS.
-*
-* GNU LibreJS is free software: you can redistribute it and/or modify
-* it under the terms of the GNU General Public License as published by
-* the Free Software Foundation, either version 3 of the License, or
-* (at your option) any later version.
-*
-* GNU LibreJS is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU General Public License for more details.
-*
-* You should have received a copy of the GNU General Public License
-* along with GNU LibreJS. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- This class parses HTTP response headers to extract both the
- MIME Content-type and the character set to be used, if specified,
- to parse textual data through a decoder.
-*/
-
-"use strict";
-
-const BOM = [0xEF, 0xBB, 0xBF];
-const DECODER_PARAMS = {stream: true};
-
-class ResponseMetaData {
- constructor(request) {
- let {responseHeaders} = request;
- this.headers = {};
- for (let h of responseHeaders) {
- if (/^\s*Content-(Type|Disposition)\s*$/i.test(h.name)) {
- let propertyName = h.name.split("-")[1].trim();
- propertyName = `content${propertyName.charAt(0).toUpperCase()}${propertyName.substring(1).toLowerCase()}`;
- this[propertyName] = h.value;
- this.headers[propertyName] = h;
- }
- }
- this.computedCharset = "";
- }
-
- get charset() {
- let charset = "";
- if (this.contentType) {
- let m = this.contentType.match(/;\s*charset\s*=\s*(\S+)/);
- if (m) {
- charset = m[1];
- }
- }
- Object.defineProperty(this, "charset", { value: charset, writable: false, configurable: true });
- return this.computedCharset = charset;
- }
-
- decode(data) {
- let charset = this.charset;
- let decoder = this.createDecoder();
- let text = decoder.decode(data, DECODER_PARAMS);
- if (!charset && /html/i.test(this.contentType)) {
- // missing HTTP charset, sniffing in content...
-
- if (data[0] === BOM[0] && data[1] === BOM[1] && data[2] === BOM[2]) {
- // forced UTF-8, nothing to do
- return text;
- }
-
- // let's try figuring out the charset from <meta> tags
- let parser = new DOMParser();
- let doc = parser.parseFromString(text, "text/html");
- let meta = doc.querySelectorAll('meta[charset], meta[http-equiv="content-type"], meta[content*="charset"]');
- for (let m of meta) {
- charset = m.getAttribute("charset");
- if (!charset) {
- let match = m.getAttribute("content").match(/;\s*charset\s*=\s*([\w-]+)/i)
- if (match) charset = match[1];
- }
- if (charset) {
- decoder = this.createDecoder(charset, null);
- if (decoder) {
- this.computedCharset = charset;
- return decoder.decode(data, DECODER_PARAMS);
- }
- }
- }
- }
- return text;
- }
-
- createDecoder(charset = this.charset, def = "latin1") {
- if (charset) {
- try {
- return new TextDecoder(charset);
- } catch (e) {
- console.error(e);
- }
- }
- return def ? new TextDecoder(def) : null;
- }
-};
-ResponseMetaData.UTF8BOM = new Uint8Array(BOM);
-
-export default ResponseMetaData;
diff --git a/background/ResponseProcessor.mjs b/background/ResponseProcessor.mjs
deleted file mode 100644
index 85c2655..0000000
--- a/background/ResponseProcessor.mjs
+++ /dev/null
@@ -1,145 +0,0 @@
-/**
-* GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript.
-*
-* Copyright (C) 2018 Giorgio Maone <giorgio@maone.net>
-*
-* This file is part of GNU LibreJS.
-*
-* GNU LibreJS is free software: you can redistribute it and/or modify
-* it under the terms of the GNU General Public License as published by
-* the Free Software Foundation, either version 3 of the License, or
-* (at your option) any later version.
-*
-* GNU LibreJS is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU General Public License for more details.
-*
-* You should have received a copy of the GNU General Public License
-* along with GNU LibreJS. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- An abstraction layer over the StreamFilter API, allowing its clients to process
- only the "interesting" HTML and script requests and leaving the other alone
-*/
-
-import ResponseMetaData from './ResponseMetaData.mjs';
-import browser from '/common/browser.mjs';
-
-let listeners = new WeakMap();
-let webRequestEvent = browser.webRequest.onHeadersReceived;
-
-class ResponseProcessor {
-
- static install(handler, types = ["main_frame", "sub_frame"]) {
- if (listeners.has(handler)) return false;
- let listener =
- async request => await new ResponseTextFilter(request).process(handler);
- listeners.set(handler, listener);
- webRequestEvent.addListener(
- listener,
- {urls: ["<all_urls>"], types},
- ["blocking", "responseHeaders"]
- );
- return true;
- }
-
- static uninstall(handler) {
- let listener = listeners.get(handler);
- if (listener) {
- webRequestEvent.removeListener(listener);
- }
- }
-}
-
-Object.assign(ResponseProcessor, {
- // control flow values to be returned by handler.pre() callbacks
- ACCEPT: {},
- REJECT: {cancel: true},
- CONTINUE: null
-});
-
-class ResponseTextFilter {
- constructor(request) {
- this.request = request;
- let {type, statusCode} = request;
- let md = this.metaData = new ResponseMetaData(request);
- this.canProcess = // we want to process html documents and scripts only
- (statusCode < 300 || statusCode >= 400) && // skip redirections
- !md.disposition && // skip forced downloads
- (type === "script" || /\bhtml\b/i.test(md.contentType));
- }
-
- async process(handler) {
- if (!this.canProcess) return ResponseProcessor.ACCEPT;
- let {metaData, request} = this;
- let response = {request, metaData}; // we keep it around allowing callbacks to store state
- if (typeof handler.pre === "function") {
- let res = await handler.pre(response);
- if (res) return res;
- if (handler.post) handler = handler.post;
- if (typeof handler !== "function") return ResponseProcessor.ACCEPT;
- }
-
- return ResponseProcessor.ACCEPT;
-
- let {requestId, responseHeaders} = request;
- let filter = browser.webRequest.filterResponseData(requestId);
- let buffer = [];
-
- filter.ondata = event => {
- buffer.push(event.data);
- };
-
- filter.onstop = async event => {
- // concatenate chunks
- let size = buffer.reduce((sum, chunk, n) => sum + chunk.byteLength, 0)
- let allBytes = new Uint8Array(size);
- let pos = 0;
- for (let chunk of buffer) {
- allBytes.set(new Uint8Array(chunk), pos);
- pos += chunk.byteLength;
- }
- buffer = null; // allow garbage collection
- if (allBytes.indexOf(0) !== -1) {
- console.debug("Warning: zeroes in bytestream, probable cached encoding mismatch.", request);
- if (request.type === "script") {
- console.debug("It's a script, trying to refetch it.");
- response.text = await (await fetch(request.url, {cache: "reload", credentials: "include"})).text();
- } else {
- console.debug("It's a %s, trying to decode it as UTF-16.", request.type);
- response.text = new TextDecoder("utf-16be").decode(allBytes, {stream: true});
- }
- } else {
- response.text = metaData.decode(allBytes);
- }
- let editedText = null;
- try {
- editedText = await handler(response);
- } catch(e) {
- console.error(e);
- }
- if (editedText !== null) {
- // we changed the content, let's re-encode
- let encoded = new TextEncoder().encode(editedText);
- // pre-pending the UTF-8 BOM will force the charset per HTML 5 specs
- allBytes = new Uint8Array(encoded.byteLength + 3);
- allBytes.set(ResponseMetaData.UTF8BOM, 0); // UTF-8 BOM
- allBytes.set(encoded, 3);
- }
- filter.write(allBytes);
- filter.close();
- }
-
- return ResponseProcessor.ACCEPT;
- }
-}
-
-/* The following was originally in Storage.js */
-function url_item(url) {
- let queryPos = url.indexOf("?");
- return queryPos === -1 ? url : url.substring(0, queryPos);
-}
-
-export {ResponseProcessor, url_item};
diff --git a/background/main.mjs b/background/main.mjs
index 5d32d98..ee14c74 100644
--- a/background/main.mjs
+++ b/background/main.mjs
@@ -10,8 +10,6 @@
import {TYPE_PREFIX} from '/common/stored_types.mjs';
import get_storage from './storage.mjs';
-//import {ResponseProcessor} from './ResponseProcessor.mjs';
-//import ResponseHandler from './ResponseHandler.mjs';
import start_storage_server from './storage_server.mjs';
import start_page_actions_server from './page_actions_server.mjs';
import start_policy_smuggler from './policy_smuggler.mjs';
diff --git a/background/script_injector.mjs b/background/script_injector.mjs
deleted file mode 100644
index 3298a43..0000000
--- a/background/script_injector.mjs
+++ /dev/null
@@ -1,122 +0,0 @@
-/**
-* Myext script injector
-*
-* Copyright (C) 2021 Wojtek Kosior
-*
-* Dual-licensed under:
-* - 0BSD license
-* - GPLv3 or (at your option) any later version
-*/
-
-import {TYPE_PREFIX} from '/common/stored_types.mjs';
-import sha256 from './sha256.mjs';
-import {url_item} from './ResponseProcessor.mjs';
-import get_storage from './storage.mjs';
-
-"use strict";
-
-var storage;
-
-function ajax_callback()
-{
- if (this.readyState == 4)
- this.resolve_callback(this);
-}
-
-function initiate_ajax_request(resolve, method, url)
-{
- var xhttp = new XMLHttpRequest();
- xhttp.resolve_callback = resolve;
- xhttp.onreadystatechange = ajax_callback;
- xhttp.open(method, url, true);
- xhttp.send();
-}
-
-function make_ajax_request(method, url)
-{
- return new Promise((resolve, reject) =>
- initiate_ajax_request(resolve, method, url));
-}
-
-async function fetch_remote_script(script_data)
-{
- try {
- let xhttp = await make_ajax_request("GET", script_data.url);
- if (xhttp.status === 200) {
- let computed_hash = sha256(xhttp.responseText);
- if (computed_hash !== script_data.hash) {
- console.log(`Bad hash for ${script_data.url}\n got ${computed_hash} instead of ${script_data.hash}`);
- return;
- }
- return xhttp.responseText;
- } else {
- console.log("script not fetched: " + script_data.url);
- return;
- }
- } catch (e) {
- console.log(e);
- }
-}
-
-async function get_script_text(script_name)
-{
- try {
- let script_data = storage.get(TYPE_PREFIX.SCRIPT, script_name);
- if (script_data === undefined) {
- console.log(`missing data for ${script_name}`);
- return;
- }
- let script_text = script_data.text;
- if (!script_text)
- script_text = await fetch_remote_script(script_data);
- return script_text;
- } catch (e) {
- console.log(e);
- }
-}
-
-// TODO: parallelize script fetching
-// TODO: guard against infinite recursion
-
-async function inject_scripts_rec(components, doc)
-{
- for (let [prefix, name] of components) {
- if (prefix === TYPE_PREFIX.BUNDLE) {
- var bundle = storage.get(TYPE_PREFIX.BUNDLE, name);
-
- if (bundle === undefined) {
- console.log(`no bundle in storage for key ${elem_key}`);
- continue;
- }
- await inject_scripts_rec(bundle, doc);
- } else {
- let script_text = await get_script_text(name,);
- if (script_text === undefined)
- continue;
-
- let script = doc.createElement("script");
- script.textContent = script_text;
- doc.body.appendChild(script);
- }
- }
-}
-
-async function inject_scripts(url, doc)
-{
- storage = await get_storage();
-
- url = url_item(url);
-
- let components = storage.get(TYPE_PREFIX.PAGE, url);
-
- if (components === undefined) {
- console.log(`got nothing for ${url}`);
- return
- } else {
- console.log(`got ${components.length} component(s) for ${url}`);
- }
-
- await inject_scripts_rec(components, doc);
-}
-
-export default inject_scripts;