diff options
-rw-r--r-- | TODOS.org | 4 | ||||
-rw-r--r-- | background/ResponseHandler.mjs | 257 | ||||
-rw-r--r-- | background/ResponseMetaData.mjs | 107 | ||||
-rw-r--r-- | background/ResponseProcessor.mjs | 145 | ||||
-rw-r--r-- | background/main.mjs | 2 | ||||
-rw-r--r-- | background/script_injector.mjs | 122 |
6 files changed, 2 insertions, 635 deletions
@@ -9,7 +9,6 @@ TODO: those scripts are already free, as is often the case) - also, find some convenient way to automatically re-add "on" events ("onclick" & friends) - add some good, sane error handling -- implement whitelisting (LibreJS had some code doing it, but we'll see if it's of any use for us) -- CRUCIAL - make it possible to export page settings in some format -- CRUCIAL - get rid of those warnings and exceptions in console (many are not even related to this extension; who invented this thing?) (gecko-only) @@ -18,7 +17,6 @@ TODO: settings and settings for pages that currently happen to live in iframes - add some nice styling to settings page -- clean up the remnants of LibreJS - stop using modules (not available on all browsers) -- CRUCIAL - use non-predictable value in place of "myext-allow", utilizing hashes -- CRUCIAL - rename the extension to something good @@ -41,6 +39,8 @@ TODO: - rearrange files in extension, add some mechanism to build the extension DONE: +- clean up the remnants of LibreJS -- DONE 2021-05-12 +- implement whitelisting -- DONE 2021-05-07 - find way to also block scripts in non-http pages (e.g. file://) -- DONE 2021-05-07 (via content scripts, may not be perfect) (NoScript seems to be doing this through CSP) - make page settings easily and conveniently editable in a separate window/tab -- DONE 2021-05-05 diff --git a/background/ResponseHandler.mjs b/background/ResponseHandler.mjs deleted file mode 100644 index 6b979e6..0000000 --- a/background/ResponseHandler.mjs +++ /dev/null @@ -1,257 +0,0 @@ -/** -* GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript. -* * -* Copyright (C) 2017, 2018 Nathan Nichols -* Copyright (C) 2018 Ruben Rodriguez <ruben@gnu.org> -* -* This file is part of GNU LibreJS. -* -* GNU LibreJS is free software: you can redistribute it and/or modify -* it under the terms of the GNU General Public License as published by -* the Free Software Foundation, either version 3 of the License, or -* (at your option) any later version. -* -* GNU LibreJS is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with GNU LibreJS. If not, see <http://www.gnu.org/licenses/>. -*/ - -/** -* This listener gets called as soon as we've got all the HTTP headers, can guess -* content type and encoding, and therefore correctly parse HTML documents -* and external script inclusions in search of crappy JavaScript -*/ - -import inject_scripts from './script_injector.mjs'; -import {ResponseProcessor} from './ResponseProcessor.mjs'; - -"use strict"; - -var ResponseHandler = { - /** - * Enforce white/black lists for url/site early (hashes will be handled later) - */ - async pre(response) { - // TODO: reimplement blacklisting/whitelisting later - if (true) return ResponseProcessor.CONTINUE; - - let {request} = response; - let {url, type, tabId, frameId, documentUrl} = request; - - let fullUrl = url; - url = ListStore.urlItem(url); - let site = ListStore.siteItem(url); - - let blacklistedSite = ListManager.siteMatch(site, blacklist); - let blacklisted = blacklistedSite || blacklist.contains(url); - let topUrl = type === "sub_frame" && request.frameAncestors && request.frameAncestors.pop() || documentUrl; - - if (blacklisted) { - if (type === "script") { - // this shouldn't happen, because we intercept earlier in blockBlacklistedScripts() - return ResponseProcessor.REJECT; - } - if (type === "main_frame") { // we handle the page change here too, since we won't call edit_html() - activityReports[tabId] = await createReport({url: fullUrl, tabId}); - // Go on without parsing the page: it was explicitly blacklisted - let reason = blacklistedSite - ? `All ${blacklistedSite} blacklisted by user` - : "Address blacklisted by user"; - await addReportEntry(tabId, url, {"blacklisted": [blacklistedSite || url, reason], url: fullUrl}); - } - // use CSP to restrict JavaScript execution in the page - request.responseHeaders.unshift({ - name: `Content-security-policy`, - value: `script-src 'none';` - }); - return {responseHeaders: request.responseHeaders}; // let's skip the inline script parsing, since we block by CSP - } else { - - let whitelistedSite = ListManager.siteMatch(site, whitelist); - let whitelisted = response.whitelisted = whitelistedSite || whitelist.contains(url); - if (type === "script") { - if (whitelisted) { - // accept the script and stop processing - addReportEntry(tabId, url, {url: topUrl, - "whitelisted": [url, whitelistedSite ? `User whitelisted ${whitelistedSite}` : "Whitelisted by user"]}); - return ResponseProcessor.ACCEPT; - } else { - let scriptInfo = await ExternalLicenses.check({url: fullUrl, tabId, frameId, documentUrl}); - if (scriptInfo) { - let verdict, ret; - let msg = scriptInfo.toString(); - if (scriptInfo.free) { - verdict = "accepted"; - ret = ResponseProcessor.ACCEPT; - } else { - verdict = "blocked"; - ret = ResponseProcessor.REJECT; - } - addReportEntry(tabId, url, {url, [verdict]: [url, msg]}); - return ret; - } - } - } - } - // it's a page (it's too early to report) or an unknown script: - // let's keep processing - return ResponseProcessor.CONTINUE; - }, - - /** - * Here we do the heavylifting, analyzing unknown scripts - */ - async post(response) { - let {type} = response.request; - return await handle_html(response, response.whitelisted); - } -} - -/** -* Serializes HTMLDocument objects including the root element and -* the DOCTYPE declaration -*/ -function doc2HTML(doc) { - let s = doc.documentElement.outerHTML; - if (doc.doctype) { - let dt = doc.doctype; - let sDoctype = `<!DOCTYPE ${dt.name || "html"}`; - if (dt.publicId) sDoctype += ` PUBLIC "${dt.publicId}"`; - if (dt.systemId) sDoctype += ` "${dt.systemId}"`; - s = `${sDoctype}>\n${s}`; - } - return s; -} - -/** -* Shortcut to create a correctly namespaced DOM HTML elements -*/ -function createHTMLElement(doc, name) { - return doc.createElementNS("http://www.w3.org/1999/xhtml", name); -} - -/** -* Replace any element with a span having the same content (useful to force -* NOSCRIPT elements to visible the same way as NoScript and uBlock do) -*/ -function forceElement(doc, element) { - let replacement = createHTMLElement(doc, "span"); - replacement.innerHTML = element.innerHTML; - element.replaceWith(replacement); - return replacement; -} - -/** -* Forces displaying any element having the "data-librejs-display" attribute and -* <noscript> elements on pages where LibreJS disabled inline scripts (unless -* they have the "data-librejs-nodisplay" attribute). -*/ -function forceNoscriptElements(doc) { - let shown = 0; - // inspired by NoScript's onScriptDisabled.js - for (let noscript of doc.querySelectorAll("noscript:not([data-librejs-nodisplay])")) { - let replacement = forceElement(doc, noscript); - // emulate meta-refresh - let meta = replacement.querySelector('meta[http-equiv="refresh"]'); - if (meta) { - refresh = true; - doc.head.appendChild(meta); - } - shown++; - } - return shown; -} - -/** -* Forces displaying any element having the "data-librejs-display" attribute and -* <noscript> elements on pages where LibreJS disabled inline scripts (unless -* they have the "data-librejs-nodisplay" attribute). -*/ -function showConditionalElements(doc) { - let shown = 0; - for (let element of document.querySelectorAll("[data-librejs-display]")) { - forceElement(doc, element); - shown++; - } - return shown; -} - -/** - -* Reads/changes the HTML of a page and the scripts within it. -*/ -async function editHtml(html, documentUrl, tabId, frameId, whitelisted){ - - var parser = new DOMParser(); - var html_doc = parser.parseFromString(html, "text/html"); - - if (whitelisted) { // don't bother rewriting - return null; - } - - var scripts = html_doc.scripts; - - let findLine = finder => finder.test(html) && html.substring(0, finder.lastIndex).split(/\n/).length || 0; - - let modified = false; - // Deal with intrinsic events - let intrinsicFinder = /<[a-z][^>]*\b(on\w+|href\s*=\s*['"]?javascript:)/gi; - for (let element of html_doc.all) { - let line = -1; - for (let attr of element.attributes) { - let {name, value} = attr; - value = value.trim(); - if (name.startsWith("on")) { - attr.value = "console.log(\"event script blocked by myext\")"; - } else if (name === "href" && value.toLowerCase().startsWith("javascript:")){ - if (line === -1) { - line = findLine(intrinsicFinder); - } - try { - attr.value = `view-source:${documentUrl}#line${line}`; - } catch (e) { - console.error(e); - } - } - } - } - - let modifiedInline = false; - let scriptFinder = /<script\b/ig; - for(let i = 0, len = scripts.length; i < len; i++) { - let script = scripts[i]; - let line = findLine(scriptFinder); - if (!script.src) { - script.textContent = `//script blocked, you can examine it at view-source:${documentUrl}#line${line}`; - } else { - let src = script.src; - script.removeAttribute("src"); - script.setAttribute("blocked-src", src); - script.textContent = "//script blocked"; - } - } - - showConditionalElements(html_doc); - forceNoscriptElements(html_doc); - await inject_scripts(documentUrl, html_doc); - return doc2HTML(html_doc); -} - -/** -* Here we handle html document responses -*/ -async function handle_html(response, whitelisted) { - let {text, request} = response; - let {url, tabId, frameId, type} = request; - if (type === "main_frame") { - //activityReports[tabId] = await createReport({url, tabId}); - //updateBadge(tabId); - } - return await editHtml(text, url, tabId, frameId, whitelisted); -} - -export default ResponseHandler; diff --git a/background/ResponseMetaData.mjs b/background/ResponseMetaData.mjs deleted file mode 100644 index 345fc54..0000000 --- a/background/ResponseMetaData.mjs +++ /dev/null @@ -1,107 +0,0 @@ -/** -* GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript. -* -* Copyright (C) 2018 Giorgio Maone <giorgio@maone.net> -* -* This file is part of GNU LibreJS. -* -* GNU LibreJS is free software: you can redistribute it and/or modify -* it under the terms of the GNU General Public License as published by -* the Free Software Foundation, either version 3 of the License, or -* (at your option) any later version. -* -* GNU LibreJS is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with GNU LibreJS. If not, see <http://www.gnu.org/licenses/>. -*/ - -/** - This class parses HTTP response headers to extract both the - MIME Content-type and the character set to be used, if specified, - to parse textual data through a decoder. -*/ - -"use strict"; - -const BOM = [0xEF, 0xBB, 0xBF]; -const DECODER_PARAMS = {stream: true}; - -class ResponseMetaData { - constructor(request) { - let {responseHeaders} = request; - this.headers = {}; - for (let h of responseHeaders) { - if (/^\s*Content-(Type|Disposition)\s*$/i.test(h.name)) { - let propertyName = h.name.split("-")[1].trim(); - propertyName = `content${propertyName.charAt(0).toUpperCase()}${propertyName.substring(1).toLowerCase()}`; - this[propertyName] = h.value; - this.headers[propertyName] = h; - } - } - this.computedCharset = ""; - } - - get charset() { - let charset = ""; - if (this.contentType) { - let m = this.contentType.match(/;\s*charset\s*=\s*(\S+)/); - if (m) { - charset = m[1]; - } - } - Object.defineProperty(this, "charset", { value: charset, writable: false, configurable: true }); - return this.computedCharset = charset; - } - - decode(data) { - let charset = this.charset; - let decoder = this.createDecoder(); - let text = decoder.decode(data, DECODER_PARAMS); - if (!charset && /html/i.test(this.contentType)) { - // missing HTTP charset, sniffing in content... - - if (data[0] === BOM[0] && data[1] === BOM[1] && data[2] === BOM[2]) { - // forced UTF-8, nothing to do - return text; - } - - // let's try figuring out the charset from <meta> tags - let parser = new DOMParser(); - let doc = parser.parseFromString(text, "text/html"); - let meta = doc.querySelectorAll('meta[charset], meta[http-equiv="content-type"], meta[content*="charset"]'); - for (let m of meta) { - charset = m.getAttribute("charset"); - if (!charset) { - let match = m.getAttribute("content").match(/;\s*charset\s*=\s*([\w-]+)/i) - if (match) charset = match[1]; - } - if (charset) { - decoder = this.createDecoder(charset, null); - if (decoder) { - this.computedCharset = charset; - return decoder.decode(data, DECODER_PARAMS); - } - } - } - } - return text; - } - - createDecoder(charset = this.charset, def = "latin1") { - if (charset) { - try { - return new TextDecoder(charset); - } catch (e) { - console.error(e); - } - } - return def ? new TextDecoder(def) : null; - } -}; -ResponseMetaData.UTF8BOM = new Uint8Array(BOM); - -export default ResponseMetaData; diff --git a/background/ResponseProcessor.mjs b/background/ResponseProcessor.mjs deleted file mode 100644 index 85c2655..0000000 --- a/background/ResponseProcessor.mjs +++ /dev/null @@ -1,145 +0,0 @@ -/** -* GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript. -* -* Copyright (C) 2018 Giorgio Maone <giorgio@maone.net> -* -* This file is part of GNU LibreJS. -* -* GNU LibreJS is free software: you can redistribute it and/or modify -* it under the terms of the GNU General Public License as published by -* the Free Software Foundation, either version 3 of the License, or -* (at your option) any later version. -* -* GNU LibreJS is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with GNU LibreJS. If not, see <http://www.gnu.org/licenses/>. -*/ - -/** - An abstraction layer over the StreamFilter API, allowing its clients to process - only the "interesting" HTML and script requests and leaving the other alone -*/ - -import ResponseMetaData from './ResponseMetaData.mjs'; -import browser from '/common/browser.mjs'; - -let listeners = new WeakMap(); -let webRequestEvent = browser.webRequest.onHeadersReceived; - -class ResponseProcessor { - - static install(handler, types = ["main_frame", "sub_frame"]) { - if (listeners.has(handler)) return false; - let listener = - async request => await new ResponseTextFilter(request).process(handler); - listeners.set(handler, listener); - webRequestEvent.addListener( - listener, - {urls: ["<all_urls>"], types}, - ["blocking", "responseHeaders"] - ); - return true; - } - - static uninstall(handler) { - let listener = listeners.get(handler); - if (listener) { - webRequestEvent.removeListener(listener); - } - } -} - -Object.assign(ResponseProcessor, { - // control flow values to be returned by handler.pre() callbacks - ACCEPT: {}, - REJECT: {cancel: true}, - CONTINUE: null -}); - -class ResponseTextFilter { - constructor(request) { - this.request = request; - let {type, statusCode} = request; - let md = this.metaData = new ResponseMetaData(request); - this.canProcess = // we want to process html documents and scripts only - (statusCode < 300 || statusCode >= 400) && // skip redirections - !md.disposition && // skip forced downloads - (type === "script" || /\bhtml\b/i.test(md.contentType)); - } - - async process(handler) { - if (!this.canProcess) return ResponseProcessor.ACCEPT; - let {metaData, request} = this; - let response = {request, metaData}; // we keep it around allowing callbacks to store state - if (typeof handler.pre === "function") { - let res = await handler.pre(response); - if (res) return res; - if (handler.post) handler = handler.post; - if (typeof handler !== "function") return ResponseProcessor.ACCEPT; - } - - return ResponseProcessor.ACCEPT; - - let {requestId, responseHeaders} = request; - let filter = browser.webRequest.filterResponseData(requestId); - let buffer = []; - - filter.ondata = event => { - buffer.push(event.data); - }; - - filter.onstop = async event => { - // concatenate chunks - let size = buffer.reduce((sum, chunk, n) => sum + chunk.byteLength, 0) - let allBytes = new Uint8Array(size); - let pos = 0; - for (let chunk of buffer) { - allBytes.set(new Uint8Array(chunk), pos); - pos += chunk.byteLength; - } - buffer = null; // allow garbage collection - if (allBytes.indexOf(0) !== -1) { - console.debug("Warning: zeroes in bytestream, probable cached encoding mismatch.", request); - if (request.type === "script") { - console.debug("It's a script, trying to refetch it."); - response.text = await (await fetch(request.url, {cache: "reload", credentials: "include"})).text(); - } else { - console.debug("It's a %s, trying to decode it as UTF-16.", request.type); - response.text = new TextDecoder("utf-16be").decode(allBytes, {stream: true}); - } - } else { - response.text = metaData.decode(allBytes); - } - let editedText = null; - try { - editedText = await handler(response); - } catch(e) { - console.error(e); - } - if (editedText !== null) { - // we changed the content, let's re-encode - let encoded = new TextEncoder().encode(editedText); - // pre-pending the UTF-8 BOM will force the charset per HTML 5 specs - allBytes = new Uint8Array(encoded.byteLength + 3); - allBytes.set(ResponseMetaData.UTF8BOM, 0); // UTF-8 BOM - allBytes.set(encoded, 3); - } - filter.write(allBytes); - filter.close(); - } - - return ResponseProcessor.ACCEPT; - } -} - -/* The following was originally in Storage.js */ -function url_item(url) { - let queryPos = url.indexOf("?"); - return queryPos === -1 ? url : url.substring(0, queryPos); -} - -export {ResponseProcessor, url_item}; diff --git a/background/main.mjs b/background/main.mjs index 5d32d98..ee14c74 100644 --- a/background/main.mjs +++ b/background/main.mjs @@ -10,8 +10,6 @@ import {TYPE_PREFIX} from '/common/stored_types.mjs'; import get_storage from './storage.mjs'; -//import {ResponseProcessor} from './ResponseProcessor.mjs'; -//import ResponseHandler from './ResponseHandler.mjs'; import start_storage_server from './storage_server.mjs'; import start_page_actions_server from './page_actions_server.mjs'; import start_policy_smuggler from './policy_smuggler.mjs'; diff --git a/background/script_injector.mjs b/background/script_injector.mjs deleted file mode 100644 index 3298a43..0000000 --- a/background/script_injector.mjs +++ /dev/null @@ -1,122 +0,0 @@ -/** -* Myext script injector -* -* Copyright (C) 2021 Wojtek Kosior -* -* Dual-licensed under: -* - 0BSD license -* - GPLv3 or (at your option) any later version -*/ - -import {TYPE_PREFIX} from '/common/stored_types.mjs'; -import sha256 from './sha256.mjs'; -import {url_item} from './ResponseProcessor.mjs'; -import get_storage from './storage.mjs'; - -"use strict"; - -var storage; - -function ajax_callback() -{ - if (this.readyState == 4) - this.resolve_callback(this); -} - -function initiate_ajax_request(resolve, method, url) -{ - var xhttp = new XMLHttpRequest(); - xhttp.resolve_callback = resolve; - xhttp.onreadystatechange = ajax_callback; - xhttp.open(method, url, true); - xhttp.send(); -} - -function make_ajax_request(method, url) -{ - return new Promise((resolve, reject) => - initiate_ajax_request(resolve, method, url)); -} - -async function fetch_remote_script(script_data) -{ - try { - let xhttp = await make_ajax_request("GET", script_data.url); - if (xhttp.status === 200) { - let computed_hash = sha256(xhttp.responseText); - if (computed_hash !== script_data.hash) { - console.log(`Bad hash for ${script_data.url}\n got ${computed_hash} instead of ${script_data.hash}`); - return; - } - return xhttp.responseText; - } else { - console.log("script not fetched: " + script_data.url); - return; - } - } catch (e) { - console.log(e); - } -} - -async function get_script_text(script_name) -{ - try { - let script_data = storage.get(TYPE_PREFIX.SCRIPT, script_name); - if (script_data === undefined) { - console.log(`missing data for ${script_name}`); - return; - } - let script_text = script_data.text; - if (!script_text) - script_text = await fetch_remote_script(script_data); - return script_text; - } catch (e) { - console.log(e); - } -} - -// TODO: parallelize script fetching -// TODO: guard against infinite recursion - -async function inject_scripts_rec(components, doc) -{ - for (let [prefix, name] of components) { - if (prefix === TYPE_PREFIX.BUNDLE) { - var bundle = storage.get(TYPE_PREFIX.BUNDLE, name); - - if (bundle === undefined) { - console.log(`no bundle in storage for key ${elem_key}`); - continue; - } - await inject_scripts_rec(bundle, doc); - } else { - let script_text = await get_script_text(name,); - if (script_text === undefined) - continue; - - let script = doc.createElement("script"); - script.textContent = script_text; - doc.body.appendChild(script); - } - } -} - -async function inject_scripts(url, doc) -{ - storage = await get_storage(); - - url = url_item(url); - - let components = storage.get(TYPE_PREFIX.PAGE, url); - - if (components === undefined) { - console.log(`got nothing for ${url}`); - return - } else { - console.log(`got ${components.length} component(s) for ${url}`); - } - - await inject_scripts_rec(components, doc); -} - -export default inject_scripts; |