From 68d557db3eb0c050ff0027429922202ef97c1fe1 Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Fri, 14 May 2021 18:18:51 +0200 Subject: support wildcard urls in settings --- TODOS.org | 4 +- background/background.html | 1 + background/page_actions_server.js | 6 +- background/policy_smuggler.js | 8 ++- background/settings_query.js | 130 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 142 insertions(+), 7 deletions(-) create mode 100644 background/settings_query.js diff --git a/TODOS.org b/TODOS.org index b389d06..bf449c9 100644 --- a/TODOS.org +++ b/TODOS.org @@ -4,8 +4,7 @@ TODO: or indirectly in a bag should be included multiple times or once - make it possible to provide backup urls for remote scripts - make it possible to cache remote scripts -- make it possible to use wildcards or something similar to be able to assign a script set to -- CRUCIAL - a set of domains or to a set of possible queries at a url +- optimize url querying - make it possible to automatically download page's served scripts and save them (of course, this by itself -- CRUCIAL would give little benefit, but it will make it easy to modify this set of scripts - useful, if some of those scripts are already free, as is often the case) @@ -42,6 +41,7 @@ TODO: - perform never-ending refactoring of already-written code DONE: +- make it possible to use wildcard urls in settings -- DONE 2021-05-14 - port to gecko-based browsers -- DONE 2021-05-13 - find a way to additionally block all other scripts using CSP -- DONE 2021-05-13 - only allow a single injection payload for page -- DONE 2021-05-13 diff --git a/background/background.html b/background/background.html index c6621e2..aac8ec8 100644 --- a/background/background.html +++ b/background/background.html @@ -12,6 +12,7 @@ + diff --git a/background/page_actions_server.js b/background/page_actions_server.js index 2dfcf9a..f96b659 100644 --- a/background/page_actions_server.js +++ b/background/page_actions_server.js @@ -16,15 +16,16 @@ const CONNECTION_TYPE = window.CONNECTION_TYPE; const browser = window.browser; const listen_for_connection = window.listen_for_connection; - const url_item = window.url_item; const sha256 = window.sha256; + const get_query_best = window.get_query_best; var storage; + var query_best; var handler; function send_scripts(url, port) { - let settings = storage.get(TYPE_PREFIX.PAGE, url_item(url)); + let [pattern, settings] = query_best(url); if (settings === undefined) return; @@ -142,6 +143,7 @@ async function start() { storage = await get_storage(); + query_best = await get_query_best(); listen_for_connection(CONNECTION_TYPE.PAGE_ACTIONS, new_connection); } diff --git a/background/policy_smuggler.js b/background/policy_smuggler.js index 180dcb7..ad8d565 100644 --- a/background/policy_smuggler.js +++ b/background/policy_smuggler.js @@ -16,8 +16,10 @@ const browser = window.browser; const url_item = window.url_item; const gen_unique = window.gen_unique; + const get_query_best = window.get_query_best; var storage; + var query_best; function redirect(request) { @@ -35,12 +37,11 @@ return {cancel : false}; } - let settings = storage.get(TYPE_PREFIX.PAGE, url); - console.log("got", storage.get(TYPE_PREFIX.PAGE, url), "for", url); + let [pattern, settings] = query_best(url); if (settings === undefined || !settings.allow) return {cancel : false}; - second_target = (first_target || "") + (second_target || "") + second_target = (first_target || "") + (second_target || ""); console.log(["redirecting", request.url, (base_url + unique + second_target)]); @@ -52,6 +53,7 @@ async function start() { storage = await get_storage(); + query_best = await get_query_best(); chrome.webRequest.onBeforeRequest.addListener( redirect, diff --git a/background/settings_query.js b/background/settings_query.js new file mode 100644 index 0000000..9101913 --- /dev/null +++ b/background/settings_query.js @@ -0,0 +1,130 @@ +/** +* Myext querying page settings with regard to wildcard records +* +* Copyright (C) 2021 Wojtek Kosior +* +* Dual-licensed under: +* - 0BSD license +* - GPLv3 or (at your option) any later version +*/ + +"use strict"; + +(() => { + const make_once = window.make_once; + const get_storage = window.get_storage; + + var storage; + + var exports = {}; + + async function init(fun) + { + storage = await get_storage(); + + return fun; + } + + // TODO: also support urls with specified ports as well as `data:' urls + function query(url, multiple) + { + let proto_re = "[a-zA-Z]*:\/\/"; + let domain_re = "[^/?#]+"; + let segments_re = "/[^?#]*"; + let query_re = "\\?[^#]*"; + + let url_regex = new RegExp(`\ +^\ +(${proto_re})\ +(${domain_re})\ +(${segments_re})?\ +(${query_re})?\ +#?.*\$\ +`); + + let regex_match = url_regex.exec(url); + if (regex_match === null) { + console.log("bad url format", url); + return multiple ? [] : [undefined, undefined]; + } + + let [_, proto, domain, segments, query] = regex_match; + + domain = domain.split("."); + let segments_trailing_dash = + segments && segments[segments.length - 1] === "/"; + segments = (segments || "").split("/").filter(s => s !== ""); + segments.unshift(""); + + let matched = []; + + for (let d_slice = 0; d_slice < domain.length; d_slice++) { + let domain_part = domain.slice(d_slice).join("."); + let domain_wildcards = []; + if (d_slice === 0) + domain_wildcards.push(""); + if (d_slice === 1) + domain_wildcards.push("*."); + if (d_slice > 0) + domain_wildcards.push("**."); + domain_wildcards.push("***."); + + for (let domain_wildcard of domain_wildcards) { + let domain_pattern = domain_wildcard + domain_part; + + for (let s_slice = segments.length; s_slice > 0; s_slice--) { + let segments_part = segments.slice(0, s_slice).join("/"); + let segments_wildcards = []; + if (s_slice === segments.length) { + if (segments_trailing_dash) + segments_wildcards.push("/"); + segments_wildcards.push(""); + } + if (s_slice === segments.length - 1) { + if (segments[s_slice] !== "*") + segments_wildcards.push("/*"); + } + if (s_slice < segments.length && + (segments[s_slice] !== "**" || + s_slice < segments.length - 1)) + segments_wildcards.push("/**"); + if (segments[s_slice] !== "***" || + s_slice < segments.length) + segments_wildcards.push("/***"); + + for (let segments_wildcard of segments_wildcards) { + let segments_pattern = + segments_part + segments_wildcard; + + let pattern = proto + domain_pattern + segments_pattern; + console.log("trying", pattern); + let settings = storage.get(TYPE_PREFIX.PAGE, pattern); + + if (settings === undefined) + continue; + + if (!multiple) + return [pattern, settings]; + + matched.push([pattern, settings]); + } + } + } + } + + return multiple ? matched : [undefined, undefined]; + } + + function query_best(url) + { + return query(url, false); + } + + function query_all(url) + { + return query(url, true); + } + + window.get_query_best = make_once(() => init(query_best)); + window.get_query_all = make_once(() => init(query_all)); +})(); -- cgit v1.2.3