summaryrefslogtreecommitdiff
path: root/common
diff options
context:
space:
mode:
Diffstat (limited to 'common')
-rw-r--r--common/misc.js2
-rw-r--r--common/patterns.js75
-rw-r--r--common/patterns_query_tree.js296
-rw-r--r--common/signing.js108
-rw-r--r--common/storage_light.js1
5 files changed, 344 insertions, 138 deletions
diff --git a/common/misc.js b/common/misc.js
index 2d744ea..a960d35 100644
--- a/common/misc.js
+++ b/common/misc.js
@@ -83,7 +83,7 @@ function gen_nonce(length=16)
function make_csp_rule(policy)
{
let rule = "prefetch-src 'none'; script-src-attr 'none';";
- const script_src = policy.has_payload ?
+ const script_src = policy.nonce !== undefined ?
`'nonce-${policy.nonce}'` : "'none'";
rule += ` script-src ${script_src}; script-src-elem ${script_src};`;
return rule;
diff --git a/common/patterns.js b/common/patterns.js
index e198482..7d28dfe 100644
--- a/common/patterns.js
+++ b/common/patterns.js
@@ -41,50 +41,67 @@
* proprietary program, I am not going to enforce this in court.
*/
-const MAX_URL_PATH_LEN = 12;
-const MAX_URL_PATH_CHARS = 255;
-const MAX_DOMAIN_LEN = 7;
-const MAX_DOMAIN_CHARS = 100;
+const MAX = {
+ URL_PATH_LEN: 12,
+ URL_PATH_CHARS: 255,
+ DOMAIN_LEN: 7,
+ DOMAIN_CHARS: 100
+};
const proto_regex = /^(\w+):\/\/(.*)$/;
const user_re = "[^/?#@]+@"
-const domain_re = "[^/?#]+";
+const domain_re = "[.*a-zA-Z0-9-]+";
const path_re = "[^?#]*";
const query_re = "\\??[^#]*";
const http_regex = new RegExp(`^(${domain_re})(${path_re})(${query_re}).*`);
-const file_regex = new RegExp(`^(${path_re}).*`);
+const file_regex = new RegExp(`^(/${path_re}).*`);
const ftp_regex = new RegExp(`^(${user_re})?(${domain_re})(${path_re}).*`);
-function deconstruct_url(url)
+function match_or_throw(regex, string, error_msg)
{
- const proto_match = proto_regex.exec(url);
- if (proto_match === null)
- return undefined;
+ const match = regex.exec(string);
+ if (match === null)
+ throw error_msg;
+ return match;
+}
+
+function deconstruct_url(url, use_limits=true)
+{
+ const max = MAX;
+ if (!use_limits) {
+ for (key in MAX)
+ max[key] = Infinity;
+ }
+
+ const matcher = (re, str) => match_or_throw(re, str, `bad url '${url}'`)
+
+ const proto_match = matcher(proto_regex, url);
const deco = {proto: proto_match[1]};
if (deco.proto === "file") {
- deco.path = file_regex.exec(proto_match[2])[1];
+ deco.path = matcher(file_regex, proto_match[2])[1];
} else if (deco.proto === "ftp") {
- [deco.domain, deco.path] = ftp_regex.exec(proto_match[2]).slice(2, 4);
+ [deco.domain, deco.path] =
+ matcher(ftp_regex, proto_match[2]).slice(2, 4);
+ } else if (deco.proto === "http" || deco.proto === "https") {
+ [deco.domain, deco.path, deco.query] =
+ matcher(http_regex, proto_match[2]).slice(1, 4);
+ deco.domain = deco.domain.toLowerCase();
} else {
- const http_match = http_regex.exec(proto_match[2]);
- if (!http_match)
- return undefined;
- [deco.domain, deco.path, deco.query] = http_match.slice(1, 4);
+ throw `unsupported protocol in url '${url}'`;
}
- const leading_dash = deco.path[0] === "/";
- deco.trailing_dash = deco.path[deco.path.length - 1] === "/";
+ deco.trailing_slash = deco.path[deco.path.length - 1] === "/";
if (deco.domain) {
- if (deco.domain.length > MAX_DOMAIN_CHARS) {
+ if (deco.domain.length > max.DOMAIN_CHARS) {
const idx = deco.domain.indexOf(".", deco.domain.length -
- MAX_DOMAIN_CHARS);
+ max.DOMAIN_CHARS);
if (idx === -1)
deco.domain = [];
else
@@ -93,7 +110,7 @@ function deconstruct_url(url)
deco.domain_truncated = true;
}
- if (deco.path.length > MAX_URL_PATH_CHARS) {
+ if (deco.path.length > max.URL_PATH_CHARS) {
deco.path = deco.path.substring(0, deco.path.lastIndexOf("/"));
deco.path_truncated = true;
}
@@ -101,16 +118,14 @@ function deconstruct_url(url)
if (typeof deco.domain === "string") {
deco.domain = deco.domain.split(".");
- if (deco.domain.splice(0, deco.domain.length - MAX_DOMAIN_LEN).length
+ if (deco.domain.splice(0, deco.domain.length - max.DOMAIN_LEN).length
> 0)
deco.domain_truncated = true;
}
deco.path = deco.path.split("/").filter(s => s !== "");
- if (deco.domain && deco.path.splice(MAX_URL_PATH_LEN).length > 0)
+ if (deco.domain && deco.path.splice(max.URL_PATH_LEN).length > 0)
deco.path_truncated = true;
- if (leading_dash || deco.path.length === 0)
- deco.path.unshift("");
return deco;
}
@@ -132,13 +147,14 @@ function* each_domain_pattern(deco)
function* each_path_pattern(deco)
{
- for (let slice = deco.path.length; slice > 0; slice--) {
- const path_part = deco.path.slice(0, slice).join("/");
+ for (let slice = deco.path.length; slice >= 0; slice--) {
+ const path_part = ["", ...deco.path.slice(0, slice)].join("/");
const path_wildcards = [];
if (slice === deco.path.length && !deco.path_truncated) {
- if (deco.trailing_dash)
+ if (deco.trailing_slash)
yield path_part + "/";
- yield path_part;
+ if (slice > 0 || deco.proto !== "file")
+ yield path_part;
}
if (slice === deco.path.length - 1 && !deco.path_truncated &&
deco.path[slice] !== "*")
@@ -171,5 +187,6 @@ function* each_url_pattern(url)
/*
* EXPORTS_START
* EXPORT each_url_pattern
+ * EXPORT deconstruct_url
* EXPORTS_END
*/
diff --git a/common/patterns_query_tree.js b/common/patterns_query_tree.js
new file mode 100644
index 0000000..49205c5
--- /dev/null
+++ b/common/patterns_query_tree.js
@@ -0,0 +1,296 @@
+/**
+ * This file is part of Haketilo.
+ *
+ * Function: Data structure to query items by URL patterns.
+ *
+ * Copyright (C) 2021 Wojtek Kosior
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * As additional permission under GNU GPL version 3 section 7, you
+ * may distribute forms of that code without the copy of the GNU
+ * GPL normally required by section 4, provided you include this
+ * license notice and, in case of non-source distribution, a URL
+ * through which recipients can access the Corresponding Source.
+ * If you modify file(s) with this exception, you may extend this
+ * exception to your version of the file(s), but you are not
+ * obligated to do so. If you do not wish to do so, delete this
+ * exception statement from your version.
+ *
+ * As a special exception to the GPL, any HTML file which merely
+ * makes function calls to this code, and for that purpose
+ * includes it by reference shall be deemed a separate work for
+ * copyright law purposes. If you modify this code, you may extend
+ * this exception to your version of the code, but you are not
+ * obligated to do so. If you do not wish to do so, delete this
+ * exception statement from your version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * I, Wojtek Kosior, thereby promise not to sue for violation of this file's
+ * license. Although I request that you do not make use this code in a
+ * proprietary program, I am not going to enforce this in court.
+ */
+
+/*
+ * IMPORTS_START
+ * IMPORT deconstruct_url
+ * IMPORTS_END
+ */
+
+/* "Pattern Tree" is how we refer to the data structure used for querying
+ * Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal
+ * is to make it possible for given URL to quickly retrieve all known patterns
+ * that match it.
+ */
+function empty_node() {
+ return {
+ wildcard_matches: [null, null, null],
+ literal_match: null,
+ children: {}
+ };
+}
+
+function is_empty_node(tree_node) {
+ const children = tree_node.children;
+ for (const key in children) {
+ if (children.hasOwnProperty(key))
+ return false;
+ }
+
+ if (Array.reduce(tree_node.wildcard_matches, (a, b) => b && a !== null, 1))
+ return false;
+
+ return tree_node.literal_match === null;
+}
+
+const is_wildcard = segment => ["*", "**", "***"].lastIndexOf(segment) >= 0;
+
+/*
+ * Yields all matches of this segments sequence against the tree that starts at
+ * this node. Results are produces in order from greatest to lowest pattern
+ * specificity.
+ */
+function* search_sequence(tree_node, segments)
+{
+ const nodes = [tree_node];
+
+ for (const segment of segments) {
+ const next_node = nodes[nodes.length - 1].children[segment];
+ if (next_node === undefined)
+ break;
+
+ nodes.push(next_node);
+ }
+
+ const nsegments = segments.length;
+
+ const conds = [
+ /* literal pattern match */
+ () => nodes.length == nsegments,
+ /* wildcard pattern matches */
+ () => nodes.length + 1 == nsegments && segments[nsegments - 1] != "*",
+ () => nodes.length + 1 < nsegments,
+ () => nodes.length + 1 != nsegments || segments[nsegments - 1] != "***"
+ ];
+
+ while (nodes.length) {
+ const node = nodes.pop();
+ const items = [node.literal_match, ...node.wildcard_matches];
+
+ for (let i = 0; i < 4; i++) {
+ if (items[i] !== null && conds[i]())
+ yield items[i];
+ }
+ }
+}
+
+/*
+ * Make item queryable through (this branch of) the Pattern Tree or remove its
+ * path from there.
+ *
+ * item_modifier should be a function that accepts 1 argument, the item stored
+ * in the tree (or `null` if there wasn't any item there), and returns an item
+ * that should be used in place of the first one. It is also legal for it to
+ * return the same item modifying it first. If it returns `null`, it means the
+ * item should be deleted from the Tree.
+ *
+ * If there was not yet any item associated with the tree path designated by
+ * segments and value returned by item_modifier is not `null`, make the value
+ * queryable by this path.
+ */
+function modify_sequence(tree_node, segments, item_modifier)
+{
+ const nodes = [tree_node];
+ let removed = true;
+
+ for (var current_segment of segments) {
+ wildcards = tree_node.wildcard_matches;
+
+ const child = tree_node.children[current_segment] || empty_node();
+ tree_node.children[current_segment] = child;
+ tree_node = child;
+ nodes.push(tree_node);
+ }
+
+ tree_node.literal_match = item_modifier(tree_node.literal_match);
+ if (tree_node.literal_match !== null)
+ removed = false;
+
+ let i = segments.length;
+
+ if (is_wildcard(current_segment)) {
+ const asterisks = current_segment.length - 1;
+ const wildcards = nodes[i - 1].wildcard_matches;
+ wildcards[asterisks] = item_modifier(wildcards[asterisks]);
+ if (wildcards[asterisks] !== null)
+ removed = false;
+ }
+
+ while (!removed)
+ return;
+
+ while (i > 0) {
+ tree_node = nodes[i--];
+ if (is_empty_node(tree_node))
+ delete nodes[i].children[segments[i]];
+ }
+}
+
+/* Helper function for modify_tree(). */
+function modify_path(tree_node, deco, item_modifier)
+{
+ tree_node = tree_node || empty_node();
+ modify_sequence(tree_node, deco.path, item_modifier);
+ return is_empty_node(tree_node) ? null : tree_node;
+}
+
+/* Helper function for modify_tree(). */
+function modify_domain(tree_node, deco, item_modifier)
+{
+ const path_modifier = branch => modify_path(branch, deco, item_modifier);
+ tree_node = tree_node || empty_node();
+ /* We need an array of domain labels ordered most-significant-first. */
+ modify_sequence(tree_node, [...deco.domain].reverse(), path_modifier);
+ return is_empty_node(tree_node) ? null : tree_node;
+}
+
+/* Helper function for pattern_tree_register() and pattern_tree_deregister(). */
+function modify_tree(patterns_by_proto, pattern, item_modifier)
+{
+ /*
+ * We pass 'false' to disable length limits on URL parts. Length limits are
+ * mostly useful in case of iteration over all patterns matching given URL.
+ * Here we don't do that.
+ */
+ const deco = deconstruct_url(pattern, false);
+
+ let tree_for_proto = patterns_by_proto[deco.proto];
+
+ tree_for_proto = deco.domain === undefined ?
+ modify_path(tree_for_proto, deco, item_modifier) :
+ modify_domain(tree_for_proto, deco, item_modifier);
+
+ patterns_by_proto[deco.proto] = tree_for_proto;
+ if (tree_for_proto === null)
+ delete patterns_by_proto[deco.proto];
+}
+
+/*
+ * Make item queryable through the Pattern Tree that starts with the protocols
+ * dictionary object passed in the first argument.
+ */
+function pattern_tree_register(patterns_by_proto, pattern, item_name, item)
+{
+ const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_';
+ item_name = key_prefix + item_name;
+ const add_item = obj => Object.assign(obj || {}, {[item_name]: item});
+ modify_tree(patterns_by_proto, pattern, add_item);
+}
+
+/* Helper function for pattern_tree_deregister(). */
+function _remove_item(obj, item_name)
+{
+ obj = obj || {};
+ delete obj[item_name];
+ for (const key in obj)
+ return obj;
+ return null;
+}
+
+/*
+ * Remove registered item from the Pattern Tree that starts with the protocols
+ * dictionary object passed in the first argument. The remaining 2 arguments
+ * should be pattern and name that have been earlier passed to
+ * pattern_tree_register().
+ */
+function pattern_tree_deregister(patterns_by_proto, pattern, item_name)
+{
+ const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_';
+ item_name = key_prefix + item_name;
+ const remove_item = obj => _remove_item(obj, item_name);
+ modify_tree(patterns_by_proto, pattern, remove_item);
+}
+
+/*
+ * Yield registered items that match url. Each yielded value is an object with
+ * key being matched item names and values being the items. One such object
+ * shall contain all items matched with given pattern specificity. Objects are
+ * yielded in order from greatest to lowest pattern specificity.
+ */
+function* pattern_tree_search(patterns_by_proto, url)
+{
+ const deco = deconstruct_url(url, false);
+
+ const tree_for_proto = patterns_by_proto[deco.proto] || empty_node();
+ let by_path = [tree_for_proto];
+
+ /* We need an array of domain labels ordered most-significant-first. */
+ if (deco.domain !== undefined)
+ by_path = search_sequence(tree_for_proto, [...deco.domain].reverse());
+
+ for (const path_tree of by_path) {
+ for (const match_obj of search_sequence(path_tree, deco.path)) {
+ let result_obj_slash = null;
+ let result_obj_no_slash = null;
+
+ for (const [key, item] of Object.entries(match_obj)) {
+ if (deco.trailing_slash && key[0] === '/') {
+ result_obj_slash = result_obj_slash || {};
+ result_obj_slash[key.substring(1)] = item;
+ } else if (key[0] !== '/') {
+ result_obj_no_slash = result_obj_no_slash || {};
+ result_obj_no_slash[key.substring(1)] = item;
+ }
+ }
+
+ if (deco.trailing_slash && result_obj_slash)
+ yield result_obj_slash;
+
+ if (result_obj_no_slash)
+ yield result_obj_no_slash;
+ }
+ }
+}
+
+const pattern_tree = {
+ make: () => ({}),
+ register: pattern_tree_register,
+ deregister: pattern_tree_deregister,
+ search: pattern_tree_search
+}
+
+/*
+ * EXPORTS_START
+ * EXPORT pattern_tree
+ * EXPORTS_END
+ */
diff --git a/common/signing.js b/common/signing.js
deleted file mode 100644
index db2aa92..0000000
--- a/common/signing.js
+++ /dev/null
@@ -1,108 +0,0 @@
-/**
- * This file is part of Haketilo.
- *
- * Functions: Operations related to "signing" of data.
- *
- * Copyright (C) 2021 Wojtek Kosior
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * As additional permission under GNU GPL version 3 section 7, you
- * may distribute forms of that code without the copy of the GNU
- * GPL normally required by section 4, provided you include this
- * license notice and, in case of non-source distribution, a URL
- * through which recipients can access the Corresponding Source.
- * If you modify file(s) with this exception, you may extend this
- * exception to your version of the file(s), but you are not
- * obligated to do so. If you do not wish to do so, delete this
- * exception statement from your version.
- *
- * As a special exception to the GPL, any HTML file which merely
- * makes function calls to this code, and for that purpose
- * includes it by reference shall be deemed a separate work for
- * copyright law purposes. If you modify this code, you may extend
- * this exception to your version of the code, but you are not
- * obligated to do so. If you do not wish to do so, delete this
- * exception statement from your version.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <https://www.gnu.org/licenses/>.
- *
- * I, Wojtek Kosior, thereby promise not to sue for violation of this file's
- * license. Although I request that you do not make use this code in a
- * proprietary program, I am not going to enforce this in court.
- */
-
-/*
- * IMPORTS_START
- * IMPORT sha256
- * IMPORT browser
- * IMPORT is_mozilla
- * IMPORTS_END
- */
-
-/*
- * In order to make certain data synchronously accessible in certain contexts,
- * Haketilo smuggles it in string form in places like cookies, URLs and headers.
- * When using the smuggled data, we first need to make sure it isn't spoofed.
- * For that, we use this pseudo-signing mechanism.
- *
- * Despite what name suggests, no assymetric cryptography is involved, as it
- * would bring no additional benefits and would incur bigger performance
- * overhead. Instead, we hash the string data together with some secret value
- * that is supposed to be known only by this browser instance. Resulting hash
- * sum plays the role of the signature. In the hash we also include current
- * time. This way, even if signed data leaks (which shouldn't happen in the
- * first place), an attacker won't be able to re-use it indefinitely.
- *
- * The secret shared between execution contexts has to be available
- * synchronously. Under Mozilla, this is the extension's per-session id. Under
- * Chromium, this is a dummy web-accessible-resource name that resides in the
- * manifest and is supposed to be constructed by each user using a unique value
- * (this is done automatically by `build.sh').
- */
-
-function get_secret()
-{
- if (is_mozilla)
- return browser.runtime.getURL("dummy");
-
- return chrome.runtime.getManifest().web_accessible_resources
- .map(r => /^chromium-key-dummy-file-(.*)/.exec(r)).filter(r => r)[0][1];
-}
-
-function extract_signed(signature, signed_data)
-{
- const match = /^([1-9][0-9]{12}|0)_(.*)$/.exec(signed_data);
- if (!match)
- return {fail: "bad format"};
-
- const result = {time: parseInt(match[1]), data: match[2]};
- if (sign_data(result.data, result.time)[0] !== signature)
- result.fail = "bad signature";
-
- return result;
-}
-
-/*
- * Sign a given string for a given time. Time should be either 0 or in the range
- * 10^12 <= time < 10^13.
- */
-function sign_data(data, time) {
- return [sha256(get_secret() + time + data), `${time}_${data}`];
-}
-
-/*
- * EXPORTS_START
- * EXPORT extract_signed
- * EXPORT sign_data
- * EXPORTS_END
- */
diff --git a/common/storage_light.js b/common/storage_light.js
index 4e6a041..a315858 100644
--- a/common/storage_light.js
+++ b/common/storage_light.js
@@ -47,6 +47,7 @@
* IMPORT raw_storage
* IMPORT is_mozilla
* IMPORT observables
+ * IMPORTS_END
*/
const reg_spec = new Set(["\\", "[", "]", "(", ")", "{", "}", ".", "*", "+"]);