aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWojtek Kosior <koszko@koszko.org>2022-03-02 11:58:57 +0100
committerWojtek Kosior <koszko@koszko.org>2022-03-04 16:13:39 +0100
commitc8294257727aec36017e1dea0cbbe34f1aaa625e (patch)
tree698d98a60b6f53ad9975f791b87b37afcad3c32c
parent241f58b66f5a245c3dd85101dee052bba358bbc8 (diff)
downloadbrowser-extension-c8294257727aec36017e1dea0cbbe34f1aaa625e.tar.gz
browser-extension-c8294257727aec36017e1dea0cbbe34f1aaa625e.zip
optimize Pattern Query Tree for size of its JSON.stringify()'ed representation
-rw-r--r--common/patterns_query_tree.js113
-rw-r--r--test/haketilo_test/unit/test_patterns_query_tree.py51
2 files changed, 77 insertions, 87 deletions
diff --git a/common/patterns_query_tree.js b/common/patterns_query_tree.js
index ea3607e..8d01cc1 100644
--- a/common/patterns_query_tree.js
+++ b/common/patterns_query_tree.js
@@ -3,7 +3,7 @@
*
* Function: Data structure to query items by URL patterns.
*
- * Copyright (C) 2021 Wojtek Kosior
+ * Copyright (C) 2021, 2022 Wojtek Kosior <koszko@koszko.org>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -41,8 +41,6 @@
* proprietary program, I am not going to enforce this in court.
*/
-// TODO! Modify the code to use `Object.create(null)` instead of `{}`.
-
#FROM common/patterns.js IMPORT deconstruct_url
/* "Pattern Tree" is how we refer to the data structure used for querying
@@ -53,44 +51,48 @@
const pattern_tree_make = () => ({})
#EXPORT pattern_tree_make AS make
-function empty_node() {
- return {
- wildcard_matches: [null, null, null],
- literal_match: null,
- children: {}
- };
-}
+const empty_node = () => ({});
function is_empty_node(tree_node) {
- const children = tree_node.children;
- for (const key in children) {
- if (children.hasOwnProperty(key))
- return false;
- }
-
- if (tree_node.wildcard_matches.reduce((a, b) => b && a !== null, 1))
+ for (const key in tree_node)
return false;
- return tree_node.literal_match === null;
+ return true;
}
+const wildcard_matches = node => [node["*"], node["**"], node["***"]];
+
const is_wildcard = segment => ["*", "**", "***"].lastIndexOf(segment) >= 0;
/*
+ * Remove reference to given child fron node and leave the node in consistent
+ * state afterwards, i.e. remove the "c" property if no childs are left.
+ */
+function delete_child(node, child_name) {
+ if (node.c) {
+ delete node.c[child_name];
+
+ for (const key in node.c)
+ return;
+
+ delete node.c;
+ }
+}
+
+/*
* Yields all matches of this segments sequence against the tree that starts at
* this node. Results are produces in order from greatest to lowest pattern
* specificity.
*/
-function* search_sequence(tree_node, segments)
-{
+function* search_sequence(tree_node, segments) {
const nodes = [tree_node];
- for (const segment of segments) {
- const next_node = nodes[nodes.length - 1].children[segment];
- if (next_node === undefined)
+ for (const current_segment of segments) {
+ const children = nodes[nodes.length - 1].c || {};
+ if (!Object.hasOwnProperty.call(children, current_segment))
break;
- nodes.push(next_node);
+ nodes.push(children[current_segment]);
}
const nsegments = segments.length;
@@ -106,10 +108,11 @@ function* search_sequence(tree_node, segments)
while (nodes.length) {
const node = nodes.pop();
- const items = [node.literal_match, ...node.wildcard_matches];
+ const literal_match = node.l;
+ const items = [literal_match, ...wildcard_matches(node)];
- for (let i = 0; i < 4; i++) {
- if (items[i] !== null && conds[i]())
+ for (let i = 0; i < items.length; i++) {
+ if (items[i] !== undefined && conds[i]())
yield items[i];
}
}
@@ -129,53 +132,60 @@ function* search_sequence(tree_node, segments)
* segments and value returned by item_modifier is not `null`, make the value
* queryable by this path.
*/
-function modify_sequence(tree_node, segments, item_modifier)
-{
+function modify_sequence(tree_node, segments, item_modifier) {
const nodes = [tree_node];
let removed = true;
for (var current_segment of segments) {
- const child = tree_node.children[current_segment] || empty_node();
- tree_node.children[current_segment] = child;
+ const children = tree_node.c || {};
+ tree_node.c = children;
+
+ const child = Object.hasOwnProperty.call(children, current_segment) ?
+ children[current_segment] : empty_node();
+ children[current_segment] = child;
+
tree_node = child;
nodes.push(tree_node);
}
- tree_node.literal_match = item_modifier(tree_node.literal_match);
- if (tree_node.literal_match !== null)
+ tree_node.l = item_modifier(tree_node.l || null);
+ if (tree_node.l === null)
+ delete tree_node.l;
+ else
removed = false;
let i = segments.length;
if (is_wildcard(current_segment)) {
- const asterisks = current_segment.length - 1;
- const wildcards = nodes[i - 1].wildcard_matches;
- wildcards[asterisks] = item_modifier(wildcards[asterisks]);
- if (wildcards[asterisks] !== null)
+ nodes[i - 1][current_segment] =
+ item_modifier(nodes[i - 1][current_segment] || null);
+ if (nodes[i - 1][current_segment] === null)
+ delete nodes[i - 1][current_segment];
+ else
removed = false;
}
- while (!removed)
+ if (!removed)
return;
while (i > 0) {
tree_node = nodes[i--];
if (is_empty_node(tree_node))
- delete nodes[i].children[segments[i]];
+ delete_child(nodes[i], segments[i]);
+ else
+ break;
}
}
/* Helper function for modify_tree(). */
-function modify_path(tree_node, deco, item_modifier)
-{
+function modify_path(tree_node, deco, item_modifier) {
tree_node = tree_node || empty_node();
modify_sequence(tree_node, deco.path, item_modifier);
return is_empty_node(tree_node) ? null : tree_node;
}
/* Helper function for modify_tree(). */
-function modify_domain(tree_node, deco, item_modifier)
-{
+function modify_domain(tree_node, deco, item_modifier) {
const path_modifier = branch => modify_path(branch, deco, item_modifier);
tree_node = tree_node || empty_node();
/* We need an array of domain labels ordered most-significant-first. */
@@ -184,8 +194,7 @@ function modify_domain(tree_node, deco, item_modifier)
}
/* Helper function for pattern_tree_register() and pattern_tree_deregister(). */
-function modify_tree(patterns_by_proto, pattern, item_modifier)
-{
+function modify_tree(patterns_by_proto, pattern, item_modifier) {
/*
* We pass 'false' to disable length limits on URL parts. Length limits are
* mostly useful in case of iteration over all patterns matching given URL.
@@ -208,8 +217,7 @@ function modify_tree(patterns_by_proto, pattern, item_modifier)
* Make item queryable through the Pattern Tree that starts with the protocols
* dictionary object passed in the first argument.
*/
-function pattern_tree_register(patterns_by_proto, pattern, item_name, item)
-{
+function pattern_tree_register(patterns_by_proto, pattern, item_name, item) {
const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_';
item_name = key_prefix + item_name;
const add_item = obj => Object.assign(obj || {}, {[item_name]: item});
@@ -218,8 +226,7 @@ function pattern_tree_register(patterns_by_proto, pattern, item_name, item)
#EXPORT pattern_tree_register AS register
/* Helper function for pattern_tree_deregister(). */
-function _remove_item(obj, item_name)
-{
+function _remove_item(obj, item_name) {
obj = obj || {};
delete obj[item_name];
for (const key in obj)
@@ -233,8 +240,7 @@ function _remove_item(obj, item_name)
* should be pattern and name that have been earlier passed to
* pattern_tree_register().
*/
-function pattern_tree_deregister(patterns_by_proto, pattern, item_name)
-{
+function pattern_tree_deregister(patterns_by_proto, pattern, item_name) {
const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_';
item_name = key_prefix + item_name;
const remove_item = obj => _remove_item(obj, item_name);
@@ -244,12 +250,11 @@ function pattern_tree_deregister(patterns_by_proto, pattern, item_name)
/*
* Yield registered items that match url. Each yielded value is an object with
- * key being matched item names and values being the items. One such object
+ * keys being matched item names and values being the items. One such object
* shall contain all items matched with given pattern specificity. Objects are
* yielded in order from greatest to lowest pattern specificity.
*/
-function* pattern_tree_search(patterns_by_proto, url)
-{
+function* pattern_tree_search(patterns_by_proto, url) {
const deco = deconstruct_url(url, false);
const tree_for_proto = patterns_by_proto[deco.proto] || empty_node();
diff --git a/test/haketilo_test/unit/test_patterns_query_tree.py b/test/haketilo_test/unit/test_patterns_query_tree.py
index 80bf554..1660aa2 100644
--- a/test/haketilo_test/unit/test_patterns_query_tree.py
+++ b/test/haketilo_test/unit/test_patterns_query_tree.py
@@ -6,7 +6,7 @@ Haketilo unit tests - URL patterns
# This file is part of Haketilo
#
-# Copyright (C) 2021, Wojtek Kosior
+# Copyright (C) 2021, 2022 Wojtek Kosior <koszko@koszko.org>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the CC0 1.0 Universal License as published by
@@ -70,18 +70,11 @@ def test_modify_branch(execute_in_page):
returnval(branch);
}''')
assert branch == {
- 'literal_match': None,
- 'wildcard_matches': [None, None, None],
- 'children': {
+ 'c': {
'com': {
- 'literal_match': None,
- 'wildcard_matches': [None, None, None],
- 'children': {
+ 'c': {
'example': {
- 'literal_match': ['some_item'],
- 'wildcard_matches': [None, None, None],
- 'children': {
- }
+ 'l': ['some_item']
}
}
}
@@ -95,8 +88,8 @@ def test_modify_branch(execute_in_page):
returnval([branch, items_added]);
}''', branch)
assert items_added == 1
- assert branch['children']['com']['children']['example']['literal_match'] \
- == ['some_item', 'other_item']
+ assert branch['c']['com']['c']['example']['l'] \
+ == ['some_item', 'other_item']
for i in range(3):
for expected_array in [['third_item'], ['third_item', '4th_item']]:
@@ -110,10 +103,9 @@ def test_modify_branch(execute_in_page):
}''',
branch, wildcard, expected_array[-1])
assert items_added == 2
- sample = branch['children']['com']['children']['sample']
- assert sample['wildcard_matches'][i] == expected_array
- assert sample['children'][wildcard]['literal_match'] \
- == expected_array
+ sample = branch['c']['com']['c']['sample']
+ assert sample[wildcard] == expected_array
+ assert sample['c'][wildcard]['l'] == expected_array
branch, items_added = execute_in_page(
'''{
@@ -124,8 +116,8 @@ def test_modify_branch(execute_in_page):
}''',
branch)
assert items_added == 1
- assert branch['children']['org']['children']['koszko']['children']['***']\
- ['children']['123']['literal_match'] == ['5th_item']
+ assert branch['c']['org']['c']['koszko']['c']['***']['c']['123']['l'] \
+ == ['5th_item']
# Let's verify that removing a nonexistent element doesn't modify the tree.
branch2, items_removed = execute_in_page(
@@ -150,7 +142,7 @@ def test_modify_branch(execute_in_page):
}''',
branch)
assert items_removed == 1
- assert 'org' not in branch['children']
+ assert 'org' not in branch['c']
for i in range(3):
for expected_array in [['third_item'], None]:
@@ -166,11 +158,10 @@ def test_modify_branch(execute_in_page):
assert items_removed == 2
if i == 2 and expected_array == []:
break
- sample = branch['children']['com']['children'].get('sample', {})
- assert sample.get('wildcard_matches', [None, None, None])[i] \
+ sample = branch['c']['com']['c'].get('sample', {})
+ assert sample.get(wildcard) == expected_array
+ assert sample.get('c', {}).get(wildcard, {}).get('l') \
== expected_array
- assert sample.get('children', {}).get(wildcard, {})\
- .get('literal_match') == expected_array
for i in range(2):
branch, items_removed = execute_in_page(
@@ -182,15 +173,9 @@ def test_modify_branch(execute_in_page):
branch)
assert items_removed == 1
if i == 0:
- assert branch['children']['com']['children']['example']\
- ['literal_match'] == ['some_item']
- else:
- assert branch == {
- 'literal_match': None,
- 'wildcard_matches': [None, None, None],
- 'children': {
- }
- }
+ assert branch['c']['com']['c']['example']['l'] == ['some_item']
+
+ assert branch == {}
@pytest.mark.get_page('https://gotmyowndoma.in')
def test_search_branch(execute_in_page):