diff options
Diffstat (limited to 'common/patterns_query_tree.js')
-rw-r--r-- | common/patterns_query_tree.js | 296 |
1 files changed, 296 insertions, 0 deletions
diff --git a/common/patterns_query_tree.js b/common/patterns_query_tree.js new file mode 100644 index 0000000..49205c5 --- /dev/null +++ b/common/patterns_query_tree.js @@ -0,0 +1,296 @@ +/** + * This file is part of Haketilo. + * + * Function: Data structure to query items by URL patterns. + * + * Copyright (C) 2021 Wojtek Kosior + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * As additional permission under GNU GPL version 3 section 7, you + * may distribute forms of that code without the copy of the GNU + * GPL normally required by section 4, provided you include this + * license notice and, in case of non-source distribution, a URL + * through which recipients can access the Corresponding Source. + * If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not + * obligated to do so. If you do not wish to do so, delete this + * exception statement from your version. + * + * As a special exception to the GPL, any HTML file which merely + * makes function calls to this code, and for that purpose + * includes it by reference shall be deemed a separate work for + * copyright law purposes. If you modify this code, you may extend + * this exception to your version of the code, but you are not + * obligated to do so. If you do not wish to do so, delete this + * exception statement from your version. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + * + * I, Wojtek Kosior, thereby promise not to sue for violation of this file's + * license. Although I request that you do not make use this code in a + * proprietary program, I am not going to enforce this in court. + */ + +/* + * IMPORTS_START + * IMPORT deconstruct_url + * IMPORTS_END + */ + +/* "Pattern Tree" is how we refer to the data structure used for querying + * Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal + * is to make it possible for given URL to quickly retrieve all known patterns + * that match it. + */ +function empty_node() { + return { + wildcard_matches: [null, null, null], + literal_match: null, + children: {} + }; +} + +function is_empty_node(tree_node) { + const children = tree_node.children; + for (const key in children) { + if (children.hasOwnProperty(key)) + return false; + } + + if (Array.reduce(tree_node.wildcard_matches, (a, b) => b && a !== null, 1)) + return false; + + return tree_node.literal_match === null; +} + +const is_wildcard = segment => ["*", "**", "***"].lastIndexOf(segment) >= 0; + +/* + * Yields all matches of this segments sequence against the tree that starts at + * this node. Results are produces in order from greatest to lowest pattern + * specificity. + */ +function* search_sequence(tree_node, segments) +{ + const nodes = [tree_node]; + + for (const segment of segments) { + const next_node = nodes[nodes.length - 1].children[segment]; + if (next_node === undefined) + break; + + nodes.push(next_node); + } + + const nsegments = segments.length; + + const conds = [ + /* literal pattern match */ + () => nodes.length == nsegments, + /* wildcard pattern matches */ + () => nodes.length + 1 == nsegments && segments[nsegments - 1] != "*", + () => nodes.length + 1 < nsegments, + () => nodes.length + 1 != nsegments || segments[nsegments - 1] != "***" + ]; + + while (nodes.length) { + const node = nodes.pop(); + const items = [node.literal_match, ...node.wildcard_matches]; + + for (let i = 0; i < 4; i++) { + if (items[i] !== null && conds[i]()) + yield items[i]; + } + } +} + +/* + * Make item queryable through (this branch of) the Pattern Tree or remove its + * path from there. + * + * item_modifier should be a function that accepts 1 argument, the item stored + * in the tree (or `null` if there wasn't any item there), and returns an item + * that should be used in place of the first one. It is also legal for it to + * return the same item modifying it first. If it returns `null`, it means the + * item should be deleted from the Tree. + * + * If there was not yet any item associated with the tree path designated by + * segments and value returned by item_modifier is not `null`, make the value + * queryable by this path. + */ +function modify_sequence(tree_node, segments, item_modifier) +{ + const nodes = [tree_node]; + let removed = true; + + for (var current_segment of segments) { + wildcards = tree_node.wildcard_matches; + + const child = tree_node.children[current_segment] || empty_node(); + tree_node.children[current_segment] = child; + tree_node = child; + nodes.push(tree_node); + } + + tree_node.literal_match = item_modifier(tree_node.literal_match); + if (tree_node.literal_match !== null) + removed = false; + + let i = segments.length; + + if (is_wildcard(current_segment)) { + const asterisks = current_segment.length - 1; + const wildcards = nodes[i - 1].wildcard_matches; + wildcards[asterisks] = item_modifier(wildcards[asterisks]); + if (wildcards[asterisks] !== null) + removed = false; + } + + while (!removed) + return; + + while (i > 0) { + tree_node = nodes[i--]; + if (is_empty_node(tree_node)) + delete nodes[i].children[segments[i]]; + } +} + +/* Helper function for modify_tree(). */ +function modify_path(tree_node, deco, item_modifier) +{ + tree_node = tree_node || empty_node(); + modify_sequence(tree_node, deco.path, item_modifier); + return is_empty_node(tree_node) ? null : tree_node; +} + +/* Helper function for modify_tree(). */ +function modify_domain(tree_node, deco, item_modifier) +{ + const path_modifier = branch => modify_path(branch, deco, item_modifier); + tree_node = tree_node || empty_node(); + /* We need an array of domain labels ordered most-significant-first. */ + modify_sequence(tree_node, [...deco.domain].reverse(), path_modifier); + return is_empty_node(tree_node) ? null : tree_node; +} + +/* Helper function for pattern_tree_register() and pattern_tree_deregister(). */ +function modify_tree(patterns_by_proto, pattern, item_modifier) +{ + /* + * We pass 'false' to disable length limits on URL parts. Length limits are + * mostly useful in case of iteration over all patterns matching given URL. + * Here we don't do that. + */ + const deco = deconstruct_url(pattern, false); + + let tree_for_proto = patterns_by_proto[deco.proto]; + + tree_for_proto = deco.domain === undefined ? + modify_path(tree_for_proto, deco, item_modifier) : + modify_domain(tree_for_proto, deco, item_modifier); + + patterns_by_proto[deco.proto] = tree_for_proto; + if (tree_for_proto === null) + delete patterns_by_proto[deco.proto]; +} + +/* + * Make item queryable through the Pattern Tree that starts with the protocols + * dictionary object passed in the first argument. + */ +function pattern_tree_register(patterns_by_proto, pattern, item_name, item) +{ + const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_'; + item_name = key_prefix + item_name; + const add_item = obj => Object.assign(obj || {}, {[item_name]: item}); + modify_tree(patterns_by_proto, pattern, add_item); +} + +/* Helper function for pattern_tree_deregister(). */ +function _remove_item(obj, item_name) +{ + obj = obj || {}; + delete obj[item_name]; + for (const key in obj) + return obj; + return null; +} + +/* + * Remove registered item from the Pattern Tree that starts with the protocols + * dictionary object passed in the first argument. The remaining 2 arguments + * should be pattern and name that have been earlier passed to + * pattern_tree_register(). + */ +function pattern_tree_deregister(patterns_by_proto, pattern, item_name) +{ + const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_'; + item_name = key_prefix + item_name; + const remove_item = obj => _remove_item(obj, item_name); + modify_tree(patterns_by_proto, pattern, remove_item); +} + +/* + * Yield registered items that match url. Each yielded value is an object with + * key being matched item names and values being the items. One such object + * shall contain all items matched with given pattern specificity. Objects are + * yielded in order from greatest to lowest pattern specificity. + */ +function* pattern_tree_search(patterns_by_proto, url) +{ + const deco = deconstruct_url(url, false); + + const tree_for_proto = patterns_by_proto[deco.proto] || empty_node(); + let by_path = [tree_for_proto]; + + /* We need an array of domain labels ordered most-significant-first. */ + if (deco.domain !== undefined) + by_path = search_sequence(tree_for_proto, [...deco.domain].reverse()); + + for (const path_tree of by_path) { + for (const match_obj of search_sequence(path_tree, deco.path)) { + let result_obj_slash = null; + let result_obj_no_slash = null; + + for (const [key, item] of Object.entries(match_obj)) { + if (deco.trailing_slash && key[0] === '/') { + result_obj_slash = result_obj_slash || {}; + result_obj_slash[key.substring(1)] = item; + } else if (key[0] !== '/') { + result_obj_no_slash = result_obj_no_slash || {}; + result_obj_no_slash[key.substring(1)] = item; + } + } + + if (deco.trailing_slash && result_obj_slash) + yield result_obj_slash; + + if (result_obj_no_slash) + yield result_obj_no_slash; + } + } +} + +const pattern_tree = { + make: () => ({}), + register: pattern_tree_register, + deregister: pattern_tree_deregister, + search: pattern_tree_search +} + +/* + * EXPORTS_START + * EXPORT pattern_tree + * EXPORTS_END + */ |