From c8294257727aec36017e1dea0cbbe34f1aaa625e Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Wed, 2 Mar 2022 11:58:57 +0100 Subject: optimize Pattern Query Tree for size of its JSON.stringify()'ed representation --- common/patterns_query_tree.js | 113 +++++++++++---------- .../haketilo_test/unit/test_patterns_query_tree.py | 51 ++++------ 2 files changed, 77 insertions(+), 87 deletions(-) diff --git a/common/patterns_query_tree.js b/common/patterns_query_tree.js index ea3607e..8d01cc1 100644 --- a/common/patterns_query_tree.js +++ b/common/patterns_query_tree.js @@ -3,7 +3,7 @@ * * Function: Data structure to query items by URL patterns. * - * Copyright (C) 2021 Wojtek Kosior + * Copyright (C) 2021, 2022 Wojtek Kosior * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -41,8 +41,6 @@ * proprietary program, I am not going to enforce this in court. */ -// TODO! Modify the code to use `Object.create(null)` instead of `{}`. - #FROM common/patterns.js IMPORT deconstruct_url /* "Pattern Tree" is how we refer to the data structure used for querying @@ -53,44 +51,48 @@ const pattern_tree_make = () => ({}) #EXPORT pattern_tree_make AS make -function empty_node() { - return { - wildcard_matches: [null, null, null], - literal_match: null, - children: {} - }; -} +const empty_node = () => ({}); function is_empty_node(tree_node) { - const children = tree_node.children; - for (const key in children) { - if (children.hasOwnProperty(key)) - return false; - } - - if (tree_node.wildcard_matches.reduce((a, b) => b && a !== null, 1)) + for (const key in tree_node) return false; - return tree_node.literal_match === null; + return true; } +const wildcard_matches = node => [node["*"], node["**"], node["***"]]; + const is_wildcard = segment => ["*", "**", "***"].lastIndexOf(segment) >= 0; +/* + * Remove reference to given child fron node and leave the node in consistent + * state afterwards, i.e. remove the "c" property if no childs are left. + */ +function delete_child(node, child_name) { + if (node.c) { + delete node.c[child_name]; + + for (const key in node.c) + return; + + delete node.c; + } +} + /* * Yields all matches of this segments sequence against the tree that starts at * this node. Results are produces in order from greatest to lowest pattern * specificity. */ -function* search_sequence(tree_node, segments) -{ +function* search_sequence(tree_node, segments) { const nodes = [tree_node]; - for (const segment of segments) { - const next_node = nodes[nodes.length - 1].children[segment]; - if (next_node === undefined) + for (const current_segment of segments) { + const children = nodes[nodes.length - 1].c || {}; + if (!Object.hasOwnProperty.call(children, current_segment)) break; - nodes.push(next_node); + nodes.push(children[current_segment]); } const nsegments = segments.length; @@ -106,10 +108,11 @@ function* search_sequence(tree_node, segments) while (nodes.length) { const node = nodes.pop(); - const items = [node.literal_match, ...node.wildcard_matches]; + const literal_match = node.l; + const items = [literal_match, ...wildcard_matches(node)]; - for (let i = 0; i < 4; i++) { - if (items[i] !== null && conds[i]()) + for (let i = 0; i < items.length; i++) { + if (items[i] !== undefined && conds[i]()) yield items[i]; } } @@ -129,53 +132,60 @@ function* search_sequence(tree_node, segments) * segments and value returned by item_modifier is not `null`, make the value * queryable by this path. */ -function modify_sequence(tree_node, segments, item_modifier) -{ +function modify_sequence(tree_node, segments, item_modifier) { const nodes = [tree_node]; let removed = true; for (var current_segment of segments) { - const child = tree_node.children[current_segment] || empty_node(); - tree_node.children[current_segment] = child; + const children = tree_node.c || {}; + tree_node.c = children; + + const child = Object.hasOwnProperty.call(children, current_segment) ? + children[current_segment] : empty_node(); + children[current_segment] = child; + tree_node = child; nodes.push(tree_node); } - tree_node.literal_match = item_modifier(tree_node.literal_match); - if (tree_node.literal_match !== null) + tree_node.l = item_modifier(tree_node.l || null); + if (tree_node.l === null) + delete tree_node.l; + else removed = false; let i = segments.length; if (is_wildcard(current_segment)) { - const asterisks = current_segment.length - 1; - const wildcards = nodes[i - 1].wildcard_matches; - wildcards[asterisks] = item_modifier(wildcards[asterisks]); - if (wildcards[asterisks] !== null) + nodes[i - 1][current_segment] = + item_modifier(nodes[i - 1][current_segment] || null); + if (nodes[i - 1][current_segment] === null) + delete nodes[i - 1][current_segment]; + else removed = false; } - while (!removed) + if (!removed) return; while (i > 0) { tree_node = nodes[i--]; if (is_empty_node(tree_node)) - delete nodes[i].children[segments[i]]; + delete_child(nodes[i], segments[i]); + else + break; } } /* Helper function for modify_tree(). */ -function modify_path(tree_node, deco, item_modifier) -{ +function modify_path(tree_node, deco, item_modifier) { tree_node = tree_node || empty_node(); modify_sequence(tree_node, deco.path, item_modifier); return is_empty_node(tree_node) ? null : tree_node; } /* Helper function for modify_tree(). */ -function modify_domain(tree_node, deco, item_modifier) -{ +function modify_domain(tree_node, deco, item_modifier) { const path_modifier = branch => modify_path(branch, deco, item_modifier); tree_node = tree_node || empty_node(); /* We need an array of domain labels ordered most-significant-first. */ @@ -184,8 +194,7 @@ function modify_domain(tree_node, deco, item_modifier) } /* Helper function for pattern_tree_register() and pattern_tree_deregister(). */ -function modify_tree(patterns_by_proto, pattern, item_modifier) -{ +function modify_tree(patterns_by_proto, pattern, item_modifier) { /* * We pass 'false' to disable length limits on URL parts. Length limits are * mostly useful in case of iteration over all patterns matching given URL. @@ -208,8 +217,7 @@ function modify_tree(patterns_by_proto, pattern, item_modifier) * Make item queryable through the Pattern Tree that starts with the protocols * dictionary object passed in the first argument. */ -function pattern_tree_register(patterns_by_proto, pattern, item_name, item) -{ +function pattern_tree_register(patterns_by_proto, pattern, item_name, item) { const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_'; item_name = key_prefix + item_name; const add_item = obj => Object.assign(obj || {}, {[item_name]: item}); @@ -218,8 +226,7 @@ function pattern_tree_register(patterns_by_proto, pattern, item_name, item) #EXPORT pattern_tree_register AS register /* Helper function for pattern_tree_deregister(). */ -function _remove_item(obj, item_name) -{ +function _remove_item(obj, item_name) { obj = obj || {}; delete obj[item_name]; for (const key in obj) @@ -233,8 +240,7 @@ function _remove_item(obj, item_name) * should be pattern and name that have been earlier passed to * pattern_tree_register(). */ -function pattern_tree_deregister(patterns_by_proto, pattern, item_name) -{ +function pattern_tree_deregister(patterns_by_proto, pattern, item_name) { const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_'; item_name = key_prefix + item_name; const remove_item = obj => _remove_item(obj, item_name); @@ -244,12 +250,11 @@ function pattern_tree_deregister(patterns_by_proto, pattern, item_name) /* * Yield registered items that match url. Each yielded value is an object with - * key being matched item names and values being the items. One such object + * keys being matched item names and values being the items. One such object * shall contain all items matched with given pattern specificity. Objects are * yielded in order from greatest to lowest pattern specificity. */ -function* pattern_tree_search(patterns_by_proto, url) -{ +function* pattern_tree_search(patterns_by_proto, url) { const deco = deconstruct_url(url, false); const tree_for_proto = patterns_by_proto[deco.proto] || empty_node(); diff --git a/test/haketilo_test/unit/test_patterns_query_tree.py b/test/haketilo_test/unit/test_patterns_query_tree.py index 80bf554..1660aa2 100644 --- a/test/haketilo_test/unit/test_patterns_query_tree.py +++ b/test/haketilo_test/unit/test_patterns_query_tree.py @@ -6,7 +6,7 @@ Haketilo unit tests - URL patterns # This file is part of Haketilo # -# Copyright (C) 2021, Wojtek Kosior +# Copyright (C) 2021, 2022 Wojtek Kosior # # This program is free software: you can redistribute it and/or modify # it under the terms of the CC0 1.0 Universal License as published by @@ -70,18 +70,11 @@ def test_modify_branch(execute_in_page): returnval(branch); }''') assert branch == { - 'literal_match': None, - 'wildcard_matches': [None, None, None], - 'children': { + 'c': { 'com': { - 'literal_match': None, - 'wildcard_matches': [None, None, None], - 'children': { + 'c': { 'example': { - 'literal_match': ['some_item'], - 'wildcard_matches': [None, None, None], - 'children': { - } + 'l': ['some_item'] } } } @@ -95,8 +88,8 @@ def test_modify_branch(execute_in_page): returnval([branch, items_added]); }''', branch) assert items_added == 1 - assert branch['children']['com']['children']['example']['literal_match'] \ - == ['some_item', 'other_item'] + assert branch['c']['com']['c']['example']['l'] \ + == ['some_item', 'other_item'] for i in range(3): for expected_array in [['third_item'], ['third_item', '4th_item']]: @@ -110,10 +103,9 @@ def test_modify_branch(execute_in_page): }''', branch, wildcard, expected_array[-1]) assert items_added == 2 - sample = branch['children']['com']['children']['sample'] - assert sample['wildcard_matches'][i] == expected_array - assert sample['children'][wildcard]['literal_match'] \ - == expected_array + sample = branch['c']['com']['c']['sample'] + assert sample[wildcard] == expected_array + assert sample['c'][wildcard]['l'] == expected_array branch, items_added = execute_in_page( '''{ @@ -124,8 +116,8 @@ def test_modify_branch(execute_in_page): }''', branch) assert items_added == 1 - assert branch['children']['org']['children']['koszko']['children']['***']\ - ['children']['123']['literal_match'] == ['5th_item'] + assert branch['c']['org']['c']['koszko']['c']['***']['c']['123']['l'] \ + == ['5th_item'] # Let's verify that removing a nonexistent element doesn't modify the tree. branch2, items_removed = execute_in_page( @@ -150,7 +142,7 @@ def test_modify_branch(execute_in_page): }''', branch) assert items_removed == 1 - assert 'org' not in branch['children'] + assert 'org' not in branch['c'] for i in range(3): for expected_array in [['third_item'], None]: @@ -166,11 +158,10 @@ def test_modify_branch(execute_in_page): assert items_removed == 2 if i == 2 and expected_array == []: break - sample = branch['children']['com']['children'].get('sample', {}) - assert sample.get('wildcard_matches', [None, None, None])[i] \ + sample = branch['c']['com']['c'].get('sample', {}) + assert sample.get(wildcard) == expected_array + assert sample.get('c', {}).get(wildcard, {}).get('l') \ == expected_array - assert sample.get('children', {}).get(wildcard, {})\ - .get('literal_match') == expected_array for i in range(2): branch, items_removed = execute_in_page( @@ -182,15 +173,9 @@ def test_modify_branch(execute_in_page): branch) assert items_removed == 1 if i == 0: - assert branch['children']['com']['children']['example']\ - ['literal_match'] == ['some_item'] - else: - assert branch == { - 'literal_match': None, - 'wildcard_matches': [None, None, None], - 'children': { - } - } + assert branch['c']['com']['c']['example']['l'] == ['some_item'] + + assert branch == {} @pytest.mark.get_page('https://gotmyowndoma.in') def test_search_branch(execute_in_page): -- cgit v1.2.3