aboutsummaryrefslogtreecommitdiff
path: root/common
diff options
context:
space:
mode:
authorWojtek Kosior <koszko@koszko.org>2021-12-04 19:31:43 +0100
committerWojtek Kosior <koszko@koszko.org>2021-12-04 19:31:43 +0100
commite1282a63d6e41d437dd1b14a08baf89b78ab56cc (patch)
tree623f6682b70c7eb304d47506bf465bc77ab2cda6 /common
parent44bb618aa99fa99a6df81e6a2b1dbec7a720f442 (diff)
downloadbrowser-extension-e1282a63d6e41d437dd1b14a08baf89b78ab56cc.tar.gz
browser-extension-e1282a63d6e41d437dd1b14a08baf89b78ab56cc.zip
finish implementing more efficient querying of URL patterns
The algorithm is implemented and tested. However, it is yet to be hooked into the actual extension.
Diffstat (limited to 'common')
-rw-r--r--common/patterns.js4
-rw-r--r--common/patterns_query_tree.js165
2 files changed, 97 insertions, 72 deletions
diff --git a/common/patterns.js b/common/patterns.js
index 5d84a73..7d28dfe 100644
--- a/common/patterns.js
+++ b/common/patterns.js
@@ -96,7 +96,7 @@ function deconstruct_url(url, use_limits=true)
throw `unsupported protocol in url '${url}'`;
}
- deco.trailing_dash = deco.path[deco.path.length - 1] === "/";
+ deco.trailing_slash = deco.path[deco.path.length - 1] === "/";
if (deco.domain) {
if (deco.domain.length > max.DOMAIN_CHARS) {
@@ -151,7 +151,7 @@ function* each_path_pattern(deco)
const path_part = ["", ...deco.path.slice(0, slice)].join("/");
const path_wildcards = [];
if (slice === deco.path.length && !deco.path_truncated) {
- if (deco.trailing_dash)
+ if (deco.trailing_slash)
yield path_part + "/";
if (slice > 0 || deco.proto !== "file")
yield path_part;
diff --git a/common/patterns_query_tree.js b/common/patterns_query_tree.js
index 326c7b7..49205c5 100644
--- a/common/patterns_query_tree.js
+++ b/common/patterns_query_tree.js
@@ -47,17 +47,12 @@
* IMPORTS_END
*/
-/*
- * This is a javascript rewrite of Python implementation of the algorithm here:
- * https://git.koszko.org/pydrilla/tree/src/pydrilla/pydrilla.py?id=f4edcbe7f4739d6f82a2e1bb180960b003b30862#n205
- */
-
/* "Pattern Tree" is how we refer to the data structure used for querying
* Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal
* is to make it possible for given URL to quickly retrieve all known patterns
* that match it.
*/
-function make_tree_node() {
+function empty_node() {
return {
wildcard_matches: [null, null, null],
literal_match: null,
@@ -141,7 +136,7 @@ function modify_sequence(tree_node, segments, item_modifier)
for (var current_segment of segments) {
wildcards = tree_node.wildcard_matches;
- const child = tree_node.children[current_segment] || make_tree_node();
+ const child = tree_node.children[current_segment] || empty_node();
tree_node.children[current_segment] = child;
tree_node = child;
nodes.push(tree_node);
@@ -171,11 +166,26 @@ function modify_sequence(tree_node, segments, item_modifier)
}
}
-/*
- * Make item queryable through the Pattern Tree that starts with the protocols
- * dictionary object passed in the first argument.
- */
-function pattern_tree_register(patterns_by_proto, pattern, item_name, item)
+/* Helper function for modify_tree(). */
+function modify_path(tree_node, deco, item_modifier)
+{
+ tree_node = tree_node || empty_node();
+ modify_sequence(tree_node, deco.path, item_modifier);
+ return is_empty_node(tree_node) ? null : tree_node;
+}
+
+/* Helper function for modify_tree(). */
+function modify_domain(tree_node, deco, item_modifier)
+{
+ const path_modifier = branch => modify_path(branch, deco, item_modifier);
+ tree_node = tree_node || empty_node();
+ /* We need an array of domain labels ordered most-significant-first. */
+ modify_sequence(tree_node, [...deco.domain].reverse(), path_modifier);
+ return is_empty_node(tree_node) ? null : tree_node;
+}
+
+/* Helper function for pattern_tree_register() and pattern_tree_deregister(). */
+function modify_tree(patterns_by_proto, pattern, item_modifier)
{
/*
* We pass 'false' to disable length limits on URL parts. Length limits are
@@ -184,84 +194,99 @@ function pattern_tree_register(patterns_by_proto, pattern, item_name, item)
*/
const deco = deconstruct_url(pattern, false);
- const tree = patterns_by_proto[deco.proto] || make_tree_node();
- patterns_by_proto[deco.proto] = tree;
+ let tree_for_proto = patterns_by_proto[deco.proto];
- let path_trees;
+ tree_for_proto = deco.domain === undefined ?
+ modify_path(tree_for_proto, deco, item_modifier) :
+ modify_domain(tree_for_proto, deco, item_modifier);
- if (deco.proto === "file") {
- path_trees = [tree];
- } else {
- /* We need an aray of domain labels ordered most-significant-first. */
- const domain = [...deco.domain].reverse();
- path_trees = add_sequence(tree, domain, make_tree_node);
- }
+ patterns_by_proto[deco.proto] = tree_for_proto;
+ if (tree_for_proto === null)
+ delete patterns_by_proto[deco.proto];
+}
- for (const path_tree of path_trees) {
- for (const match_obj of add_sequence(path_tree, deco.path, () => {}))
- match_obj[item_name] = item;
- }
+/*
+ * Make item queryable through the Pattern Tree that starts with the protocols
+ * dictionary object passed in the first argument.
+ */
+function pattern_tree_register(patterns_by_proto, pattern, item_name, item)
+{
+ const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_';
+ item_name = key_prefix + item_name;
+ const add_item = obj => Object.assign(obj || {}, {[item_name]: item});
+ modify_tree(patterns_by_proto, pattern, add_item);
+}
+
+/* Helper function for pattern_tree_deregister(). */
+function _remove_item(obj, item_name)
+{
+ obj = obj || {};
+ delete obj[item_name];
+ for (const key in obj)
+ return obj;
+ return null;
}
-// /*
-// * Remove registered item from the Pattern Tree that starts with the protocols
-// * dictionary object passed in the first argument. The remaining 2 arguments
-// * should be pattern and name that have been earlier passed to
-// * pattern_tree_register().
-// */
-// function pattern_tree_deregister(patterns_by_proto, pattern, item_name)
-// {
-// const deco = deconstruct_url(pattern, false);
-
-// const tree = patterns_by_proto[deco.proto] || make_tree_node();
-// patterns_by_proto[deco.proto] = tree;
-
-// let path_trees;
-
-// if (deco.proto === "file") {
-// path_trees = [tree];
-// } else {
-// /* We need an aray of domain labels ordered most-significant-first. */
-// const domain = [...deco.domain].reverse();
-// ..........................
-// path_trees = add_sequence(tree, domain, make_tree_node);
-// }
-// }
+/*
+ * Remove registered item from the Pattern Tree that starts with the protocols
+ * dictionary object passed in the first argument. The remaining 2 arguments
+ * should be pattern and name that have been earlier passed to
+ * pattern_tree_register().
+ */
+function pattern_tree_deregister(patterns_by_proto, pattern, item_name)
+{
+ const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_';
+ item_name = key_prefix + item_name;
+ const remove_item = obj => _remove_item(obj, item_name);
+ modify_tree(patterns_by_proto, pattern, remove_item);
+}
/*
- * Yield registered items (as [item_name, item] arrays) that match url. For
- * cosistent behavior, please don't modify the pattern tree while iterating over
- * the results.
+ * Yield registered items that match url. Each yielded value is an object with
+ * key being matched item names and values being the items. One such object
+ * shall contain all items matched with given pattern specificity. Objects are
+ * yielded in order from greatest to lowest pattern specificity.
*/
function* pattern_tree_search(patterns_by_proto, url)
{
const deco = deconstruct_url(url, false);
- tree = patterns_by_proto[deco.proto] || make_tree_node();
+ const tree_for_proto = patterns_by_proto[deco.proto] || empty_node();
+ let by_path = [tree_for_proto];
- let path_trees;
-
- if (deco.proto === "file") {
- path_trees = [tree];
- } else {
- /* We need an aray of domain labels ordered most-significant-first. */
- const domain = [...deco.domain].reverse();
- path_trees = search_sequence(tree, domain);
- }
+ /* We need an array of domain labels ordered most-significant-first. */
+ if (deco.domain !== undefined)
+ by_path = search_sequence(tree_for_proto, [...deco.domain].reverse());
- for (const path_tree of path_trees) {
+ for (const path_tree of by_path) {
for (const match_obj of search_sequence(path_tree, deco.path)) {
- for (const entry of Object.entries(match_obj))
- yield entry;
+ let result_obj_slash = null;
+ let result_obj_no_slash = null;
+
+ for (const [key, item] of Object.entries(match_obj)) {
+ if (deco.trailing_slash && key[0] === '/') {
+ result_obj_slash = result_obj_slash || {};
+ result_obj_slash[key.substring(1)] = item;
+ } else if (key[0] !== '/') {
+ result_obj_no_slash = result_obj_no_slash || {};
+ result_obj_no_slash[key.substring(1)] = item;
+ }
+ }
+
+ if (deco.trailing_slash && result_obj_slash)
+ yield result_obj_slash;
+
+ if (result_obj_no_slash)
+ yield result_obj_no_slash;
}
}
}
const pattern_tree = {
- make: () => {},
- register: pattern_tree_register,
- // deregister: pattern_tree_deregister,
- search: pattern_tree_search
+ make: () => ({}),
+ register: pattern_tree_register,
+ deregister: pattern_tree_deregister,
+ search: pattern_tree_search
}
/*