diff options
Diffstat (limited to 'common')
-rw-r--r-- | common/misc.js | 2 | ||||
-rw-r--r-- | common/patterns.js | 71 | ||||
-rw-r--r-- | common/patterns_query_tree.js | 271 | ||||
-rw-r--r-- | common/sha256.js | 2 | ||||
-rw-r--r-- | common/signing.js | 108 | ||||
-rw-r--r-- | common/storage_light.js | 1 |
6 files changed, 318 insertions, 137 deletions
diff --git a/common/misc.js b/common/misc.js index 2d744ea..a960d35 100644 --- a/common/misc.js +++ b/common/misc.js @@ -83,7 +83,7 @@ function gen_nonce(length=16) function make_csp_rule(policy) { let rule = "prefetch-src 'none'; script-src-attr 'none';"; - const script_src = policy.has_payload ? + const script_src = policy.nonce !== undefined ? `'nonce-${policy.nonce}'` : "'none'"; rule += ` script-src ${script_src}; script-src-elem ${script_src};`; return rule; diff --git a/common/patterns.js b/common/patterns.js index e198482..5d84a73 100644 --- a/common/patterns.js +++ b/common/patterns.js @@ -41,50 +41,67 @@ * proprietary program, I am not going to enforce this in court. */ -const MAX_URL_PATH_LEN = 12; -const MAX_URL_PATH_CHARS = 255; -const MAX_DOMAIN_LEN = 7; -const MAX_DOMAIN_CHARS = 100; +const MAX = { + URL_PATH_LEN: 12, + URL_PATH_CHARS: 255, + DOMAIN_LEN: 7, + DOMAIN_CHARS: 100 +}; const proto_regex = /^(\w+):\/\/(.*)$/; const user_re = "[^/?#@]+@" -const domain_re = "[^/?#]+"; +const domain_re = "[.*a-zA-Z0-9-]+"; const path_re = "[^?#]*"; const query_re = "\\??[^#]*"; const http_regex = new RegExp(`^(${domain_re})(${path_re})(${query_re}).*`); -const file_regex = new RegExp(`^(${path_re}).*`); +const file_regex = new RegExp(`^(/${path_re}).*`); const ftp_regex = new RegExp(`^(${user_re})?(${domain_re})(${path_re}).*`); -function deconstruct_url(url) +function match_or_throw(regex, string, error_msg) { - const proto_match = proto_regex.exec(url); - if (proto_match === null) - return undefined; + const match = regex.exec(string); + if (match === null) + throw error_msg; + return match; +} + +function deconstruct_url(url, use_limits=true) +{ + const max = MAX; + if (!use_limits) { + for (key in MAX) + max[key] = Infinity; + } + + const matcher = (re, str) => match_or_throw(re, str, `bad url '${url}'`) + + const proto_match = matcher(proto_regex, url); const deco = {proto: proto_match[1]}; if (deco.proto === "file") { - deco.path = file_regex.exec(proto_match[2])[1]; + deco.path = matcher(file_regex, proto_match[2])[1]; } else if (deco.proto === "ftp") { - [deco.domain, deco.path] = ftp_regex.exec(proto_match[2]).slice(2, 4); + [deco.domain, deco.path] = + matcher(ftp_regex, proto_match[2]).slice(2, 4); + } else if (deco.proto === "http" || deco.proto === "https") { + [deco.domain, deco.path, deco.query] = + matcher(http_regex, proto_match[2]).slice(1, 4); + deco.domain = deco.domain.toLowerCase(); } else { - const http_match = http_regex.exec(proto_match[2]); - if (!http_match) - return undefined; - [deco.domain, deco.path, deco.query] = http_match.slice(1, 4); + throw `unsupported protocol in url '${url}'`; } - const leading_dash = deco.path[0] === "/"; deco.trailing_dash = deco.path[deco.path.length - 1] === "/"; if (deco.domain) { - if (deco.domain.length > MAX_DOMAIN_CHARS) { + if (deco.domain.length > max.DOMAIN_CHARS) { const idx = deco.domain.indexOf(".", deco.domain.length - - MAX_DOMAIN_CHARS); + max.DOMAIN_CHARS); if (idx === -1) deco.domain = []; else @@ -93,7 +110,7 @@ function deconstruct_url(url) deco.domain_truncated = true; } - if (deco.path.length > MAX_URL_PATH_CHARS) { + if (deco.path.length > max.URL_PATH_CHARS) { deco.path = deco.path.substring(0, deco.path.lastIndexOf("/")); deco.path_truncated = true; } @@ -101,16 +118,14 @@ function deconstruct_url(url) if (typeof deco.domain === "string") { deco.domain = deco.domain.split("."); - if (deco.domain.splice(0, deco.domain.length - MAX_DOMAIN_LEN).length + if (deco.domain.splice(0, deco.domain.length - max.DOMAIN_LEN).length > 0) deco.domain_truncated = true; } deco.path = deco.path.split("/").filter(s => s !== ""); - if (deco.domain && deco.path.splice(MAX_URL_PATH_LEN).length > 0) + if (deco.domain && deco.path.splice(max.URL_PATH_LEN).length > 0) deco.path_truncated = true; - if (leading_dash || deco.path.length === 0) - deco.path.unshift(""); return deco; } @@ -132,13 +147,14 @@ function* each_domain_pattern(deco) function* each_path_pattern(deco) { - for (let slice = deco.path.length; slice > 0; slice--) { - const path_part = deco.path.slice(0, slice).join("/"); + for (let slice = deco.path.length; slice >= 0; slice--) { + const path_part = ["", ...deco.path.slice(0, slice)].join("/"); const path_wildcards = []; if (slice === deco.path.length && !deco.path_truncated) { if (deco.trailing_dash) yield path_part + "/"; - yield path_part; + if (slice > 0 || deco.proto !== "file") + yield path_part; } if (slice === deco.path.length - 1 && !deco.path_truncated && deco.path[slice] !== "*") @@ -171,5 +187,6 @@ function* each_url_pattern(url) /* * EXPORTS_START * EXPORT each_url_pattern + * EXPORT deconstruct_url * EXPORTS_END */ diff --git a/common/patterns_query_tree.js b/common/patterns_query_tree.js new file mode 100644 index 0000000..326c7b7 --- /dev/null +++ b/common/patterns_query_tree.js @@ -0,0 +1,271 @@ +/** + * This file is part of Haketilo. + * + * Function: Data structure to query items by URL patterns. + * + * Copyright (C) 2021 Wojtek Kosior + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * As additional permission under GNU GPL version 3 section 7, you + * may distribute forms of that code without the copy of the GNU + * GPL normally required by section 4, provided you include this + * license notice and, in case of non-source distribution, a URL + * through which recipients can access the Corresponding Source. + * If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not + * obligated to do so. If you do not wish to do so, delete this + * exception statement from your version. + * + * As a special exception to the GPL, any HTML file which merely + * makes function calls to this code, and for that purpose + * includes it by reference shall be deemed a separate work for + * copyright law purposes. If you modify this code, you may extend + * this exception to your version of the code, but you are not + * obligated to do so. If you do not wish to do so, delete this + * exception statement from your version. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + * + * I, Wojtek Kosior, thereby promise not to sue for violation of this file's + * license. Although I request that you do not make use this code in a + * proprietary program, I am not going to enforce this in court. + */ + +/* + * IMPORTS_START + * IMPORT deconstruct_url + * IMPORTS_END + */ + +/* + * This is a javascript rewrite of Python implementation of the algorithm here: + * https://git.koszko.org/pydrilla/tree/src/pydrilla/pydrilla.py?id=f4edcbe7f4739d6f82a2e1bb180960b003b30862#n205 + */ + +/* "Pattern Tree" is how we refer to the data structure used for querying + * Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal + * is to make it possible for given URL to quickly retrieve all known patterns + * that match it. + */ +function make_tree_node() { + return { + wildcard_matches: [null, null, null], + literal_match: null, + children: {} + }; +} + +function is_empty_node(tree_node) { + const children = tree_node.children; + for (const key in children) { + if (children.hasOwnProperty(key)) + return false; + } + + if (Array.reduce(tree_node.wildcard_matches, (a, b) => b && a !== null, 1)) + return false; + + return tree_node.literal_match === null; +} + +const is_wildcard = segment => ["*", "**", "***"].lastIndexOf(segment) >= 0; + +/* + * Yields all matches of this segments sequence against the tree that starts at + * this node. Results are produces in order from greatest to lowest pattern + * specificity. + */ +function* search_sequence(tree_node, segments) +{ + const nodes = [tree_node]; + + for (const segment of segments) { + const next_node = nodes[nodes.length - 1].children[segment]; + if (next_node === undefined) + break; + + nodes.push(next_node); + } + + const nsegments = segments.length; + + const conds = [ + /* literal pattern match */ + () => nodes.length == nsegments, + /* wildcard pattern matches */ + () => nodes.length + 1 == nsegments && segments[nsegments - 1] != "*", + () => nodes.length + 1 < nsegments, + () => nodes.length + 1 != nsegments || segments[nsegments - 1] != "***" + ]; + + while (nodes.length) { + const node = nodes.pop(); + const items = [node.literal_match, ...node.wildcard_matches]; + + for (let i = 0; i < 4; i++) { + if (items[i] !== null && conds[i]()) + yield items[i]; + } + } +} + +/* + * Make item queryable through (this branch of) the Pattern Tree or remove its + * path from there. + * + * item_modifier should be a function that accepts 1 argument, the item stored + * in the tree (or `null` if there wasn't any item there), and returns an item + * that should be used in place of the first one. It is also legal for it to + * return the same item modifying it first. If it returns `null`, it means the + * item should be deleted from the Tree. + * + * If there was not yet any item associated with the tree path designated by + * segments and value returned by item_modifier is not `null`, make the value + * queryable by this path. + */ +function modify_sequence(tree_node, segments, item_modifier) +{ + const nodes = [tree_node]; + let removed = true; + + for (var current_segment of segments) { + wildcards = tree_node.wildcard_matches; + + const child = tree_node.children[current_segment] || make_tree_node(); + tree_node.children[current_segment] = child; + tree_node = child; + nodes.push(tree_node); + } + + tree_node.literal_match = item_modifier(tree_node.literal_match); + if (tree_node.literal_match !== null) + removed = false; + + let i = segments.length; + + if (is_wildcard(current_segment)) { + const asterisks = current_segment.length - 1; + const wildcards = nodes[i - 1].wildcard_matches; + wildcards[asterisks] = item_modifier(wildcards[asterisks]); + if (wildcards[asterisks] !== null) + removed = false; + } + + while (!removed) + return; + + while (i > 0) { + tree_node = nodes[i--]; + if (is_empty_node(tree_node)) + delete nodes[i].children[segments[i]]; + } +} + +/* + * Make item queryable through the Pattern Tree that starts with the protocols + * dictionary object passed in the first argument. + */ +function pattern_tree_register(patterns_by_proto, pattern, item_name, item) +{ + /* + * We pass 'false' to disable length limits on URL parts. Length limits are + * mostly useful in case of iteration over all patterns matching given URL. + * Here we don't do that. + */ + const deco = deconstruct_url(pattern, false); + + const tree = patterns_by_proto[deco.proto] || make_tree_node(); + patterns_by_proto[deco.proto] = tree; + + let path_trees; + + if (deco.proto === "file") { + path_trees = [tree]; + } else { + /* We need an aray of domain labels ordered most-significant-first. */ + const domain = [...deco.domain].reverse(); + path_trees = add_sequence(tree, domain, make_tree_node); + } + + for (const path_tree of path_trees) { + for (const match_obj of add_sequence(path_tree, deco.path, () => {})) + match_obj[item_name] = item; + } +} + +// /* +// * Remove registered item from the Pattern Tree that starts with the protocols +// * dictionary object passed in the first argument. The remaining 2 arguments +// * should be pattern and name that have been earlier passed to +// * pattern_tree_register(). +// */ +// function pattern_tree_deregister(patterns_by_proto, pattern, item_name) +// { +// const deco = deconstruct_url(pattern, false); + +// const tree = patterns_by_proto[deco.proto] || make_tree_node(); +// patterns_by_proto[deco.proto] = tree; + +// let path_trees; + +// if (deco.proto === "file") { +// path_trees = [tree]; +// } else { +// /* We need an aray of domain labels ordered most-significant-first. */ +// const domain = [...deco.domain].reverse(); +// .......................... +// path_trees = add_sequence(tree, domain, make_tree_node); +// } +// } + +/* + * Yield registered items (as [item_name, item] arrays) that match url. For + * cosistent behavior, please don't modify the pattern tree while iterating over + * the results. + */ +function* pattern_tree_search(patterns_by_proto, url) +{ + const deco = deconstruct_url(url, false); + + tree = patterns_by_proto[deco.proto] || make_tree_node(); + + let path_trees; + + if (deco.proto === "file") { + path_trees = [tree]; + } else { + /* We need an aray of domain labels ordered most-significant-first. */ + const domain = [...deco.domain].reverse(); + path_trees = search_sequence(tree, domain); + } + + for (const path_tree of path_trees) { + for (const match_obj of search_sequence(path_tree, deco.path)) { + for (const entry of Object.entries(match_obj)) + yield entry; + } + } +} + +const pattern_tree = { + make: () => {}, + register: pattern_tree_register, + // deregister: pattern_tree_deregister, + search: pattern_tree_search +} + +/* + * EXPORTS_START + * EXPORT pattern_tree + * EXPORTS_END + */ diff --git a/common/sha256.js b/common/sha256.js index 9aae7a1..3a02d7a 100644 --- a/common/sha256.js +++ b/common/sha256.js @@ -4,7 +4,7 @@ * @version 0.9.0 * * Copyright (C) 2014-2017, Yi-Cyuan Chen <emn178@gmail.com> - * + * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including diff --git a/common/signing.js b/common/signing.js deleted file mode 100644 index db2aa92..0000000 --- a/common/signing.js +++ /dev/null @@ -1,108 +0,0 @@ -/** - * This file is part of Haketilo. - * - * Functions: Operations related to "signing" of data. - * - * Copyright (C) 2021 Wojtek Kosior - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * As additional permission under GNU GPL version 3 section 7, you - * may distribute forms of that code without the copy of the GNU - * GPL normally required by section 4, provided you include this - * license notice and, in case of non-source distribution, a URL - * through which recipients can access the Corresponding Source. - * If you modify file(s) with this exception, you may extend this - * exception to your version of the file(s), but you are not - * obligated to do so. If you do not wish to do so, delete this - * exception statement from your version. - * - * As a special exception to the GPL, any HTML file which merely - * makes function calls to this code, and for that purpose - * includes it by reference shall be deemed a separate work for - * copyright law purposes. If you modify this code, you may extend - * this exception to your version of the code, but you are not - * obligated to do so. If you do not wish to do so, delete this - * exception statement from your version. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <https://www.gnu.org/licenses/>. - * - * I, Wojtek Kosior, thereby promise not to sue for violation of this file's - * license. Although I request that you do not make use this code in a - * proprietary program, I am not going to enforce this in court. - */ - -/* - * IMPORTS_START - * IMPORT sha256 - * IMPORT browser - * IMPORT is_mozilla - * IMPORTS_END - */ - -/* - * In order to make certain data synchronously accessible in certain contexts, - * Haketilo smuggles it in string form in places like cookies, URLs and headers. - * When using the smuggled data, we first need to make sure it isn't spoofed. - * For that, we use this pseudo-signing mechanism. - * - * Despite what name suggests, no assymetric cryptography is involved, as it - * would bring no additional benefits and would incur bigger performance - * overhead. Instead, we hash the string data together with some secret value - * that is supposed to be known only by this browser instance. Resulting hash - * sum plays the role of the signature. In the hash we also include current - * time. This way, even if signed data leaks (which shouldn't happen in the - * first place), an attacker won't be able to re-use it indefinitely. - * - * The secret shared between execution contexts has to be available - * synchronously. Under Mozilla, this is the extension's per-session id. Under - * Chromium, this is a dummy web-accessible-resource name that resides in the - * manifest and is supposed to be constructed by each user using a unique value - * (this is done automatically by `build.sh'). - */ - -function get_secret() -{ - if (is_mozilla) - return browser.runtime.getURL("dummy"); - - return chrome.runtime.getManifest().web_accessible_resources - .map(r => /^chromium-key-dummy-file-(.*)/.exec(r)).filter(r => r)[0][1]; -} - -function extract_signed(signature, signed_data) -{ - const match = /^([1-9][0-9]{12}|0)_(.*)$/.exec(signed_data); - if (!match) - return {fail: "bad format"}; - - const result = {time: parseInt(match[1]), data: match[2]}; - if (sign_data(result.data, result.time)[0] !== signature) - result.fail = "bad signature"; - - return result; -} - -/* - * Sign a given string for a given time. Time should be either 0 or in the range - * 10^12 <= time < 10^13. - */ -function sign_data(data, time) { - return [sha256(get_secret() + time + data), `${time}_${data}`]; -} - -/* - * EXPORTS_START - * EXPORT extract_signed - * EXPORT sign_data - * EXPORTS_END - */ diff --git a/common/storage_light.js b/common/storage_light.js index 4e6a041..a315858 100644 --- a/common/storage_light.js +++ b/common/storage_light.js @@ -47,6 +47,7 @@ * IMPORT raw_storage * IMPORT is_mozilla * IMPORT observables + * IMPORTS_END */ const reg_spec = new Set(["\\", "[", "]", "(", ")", "{", "}", ".", "*", "+"]); |