/** * This file is part of Haketilo. * * Function: Data structure to query items by URL patterns. * * Copyright (C) 2021, 2022 Wojtek Kosior <koszko@koszko.org> * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * As additional permission under GNU GPL version 3 section 7, you * may distribute forms of that code without the copy of the GNU * GPL normally required by section 4, provided you include this * license notice and, in case of non-source distribution, a URL * through which recipients can access the Corresponding Source. * If you modify file(s) with this exception, you may extend this * exception to your version of the file(s), but you are not * obligated to do so. If you do not wish to do so, delete this * exception statement from your version. * * As a special exception to the GPL, any HTML file which merely * makes function calls to this code, and for that purpose * includes it by reference shall be deemed a separate work for * copyright law purposes. If you modify this code, you may extend * this exception to your version of the code, but you are not * obligated to do so. If you do not wish to do so, delete this * exception statement from your version. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <https://www.gnu.org/licenses/>. * * I, Wojtek Kosior, thereby promise not to sue for violation of this file's * license. Although I request that you do not make use of this code in a * proprietary program, I am not going to enforce this in court. */ #FROM common/patterns.js IMPORT deconstruct_url /* "Pattern Tree" is how we refer to the data structure used for querying * Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal * is to make it possible for given URL to quickly retrieve all known patterns * that match it. */ const pattern_tree_make = () => ({}) #EXPORT pattern_tree_make AS make const empty_node = () => ({}); function is_empty_node(tree_node) { for (const key in tree_node) return false; return true; } const wildcard_matches = node => [node["*"], node["**"], node["***"]]; const is_wildcard = segment => ["*", "**", "***"].lastIndexOf(segment) >= 0; /* * Remove reference to given child fron node and leave the node in consistent * state afterwards, i.e. remove the "c" property if no childs are left. */ function delete_child(node, child_name) { if (node.c) { delete node.c[child_name]; for (const key in node.c) return; delete node.c; } } /* * Yields all matches of this segments sequence against the tree that starts at * this node. Results are produces in order from greatest to lowest pattern * specificity. */ function* search_sequence(tree_node, segments) { const nodes = [tree_node]; for (const current_segment of segments) { const children = nodes[nodes.length - 1].c || {}; if (!Object.hasOwnProperty.call(children, current_segment)) break; nodes.push(children[current_segment]); } const nsegments = segments.length; const conds = [ /* literal pattern match */ () => nodes.length == nsegments, /* wildcard pattern matches */ () => nodes.length + 1 == nsegments && segments[nsegments - 1] != "*", () => nodes.length + 1 < nsegments, () => nodes.length + 1 != nsegments || segments[nsegments - 1] != "***" ]; while (nodes.length) { const node = nodes.pop(); const literal_match = node.l; const items = [literal_match, ...wildcard_matches(node)]; for (let i = 0; i < items.length; i++) { if (items[i] !== undefined && conds[i]()) yield items[i]; } } } /* * Make item queryable through (this branch of) the Pattern Tree or remove its * path from there. * * item_modifier should be a function that accepts 1 argument, the item stored * in the tree (or `null` if there wasn't any item there), and returns an item * that should be used in place of the first one. It is also legal for it to * return the same item modifying it first. If it returns `null`, it means the * item should be deleted from the Tree. * * If there was not yet any item associated with the tree path designated by * segments and value returned by item_modifier is not `null`, make the value * queryable by this path. */ function modify_sequence(tree_node, segments, item_modifier) { const nodes = [tree_node]; let removed = true; for (var current_segment of segments) { const children = tree_node.c || {}; tree_node.c = children; const child = Object.hasOwnProperty.call(children, current_segment) ? children[current_segment] : empty_node(); children[current_segment] = child; tree_node = child; nodes.push(tree_node); } tree_node.l = item_modifier(tree_node.l || null); if (tree_node.l === null) delete tree_node.l; else removed = false; let i = segments.length; if (is_wildcard(current_segment)) { nodes[i - 1][current_segment] = item_modifier(nodes[i - 1][current_segment] || null); if (nodes[i - 1][current_segment] === null) delete nodes[i - 1][current_segment]; else removed = false; } if (!removed) return; while (i > 0) { tree_node = nodes[i--]; if (is_empty_node(tree_node)) delete_child(nodes[i], segments[i]); else break; } } /* Helper function for modify_tree(). */ function modify_path(tree_node, deco, item_modifier) { tree_node = tree_node || empty_node(); modify_sequence(tree_node, deco.path, item_modifier); return is_empty_node(tree_node) ? null : tree_node; } /* Helper function for modify_tree(). */ function modify_domain(tree_node, deco, item_modifier) { const path_modifier = branch => modify_path(branch, deco, item_modifier); tree_node = tree_node || empty_node(); /* We need an array of domain labels ordered most-significant-first. */ modify_sequence(tree_node, [...deco.domain].reverse(), path_modifier); return is_empty_node(tree_node) ? null : tree_node; } /* Helper function for pattern_tree_register() and pattern_tree_deregister(). */ function modify_tree(patterns_by_proto, pattern, item_modifier) { /* * We pass 'false' to disable length limits on URL parts. Length limits are * mostly useful in case of iteration over all patterns matching given URL. * Here we don't do that. */ const deco = deconstruct_url(pattern, false); let tree_for_proto = patterns_by_proto[deco.proto]; tree_for_proto = deco.domain === undefined ? modify_path(tree_for_proto, deco, item_modifier) : modify_domain(tree_for_proto, deco, item_modifier); patterns_by_proto[deco.proto] = tree_for_proto; if (tree_for_proto === null) delete patterns_by_proto[deco.proto]; } /* * Make item queryable through the Pattern Tree that starts with the protocols * dictionary object passed in the first argument. */ function pattern_tree_register(patterns_by_proto, pattern, item_name, item) { const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_'; item_name = key_prefix + item_name; const add_item = obj => Object.assign(obj || {}, {[item_name]: item}); modify_tree(patterns_by_proto, pattern, add_item); } #EXPORT pattern_tree_register AS register /* Helper function for pattern_tree_deregister(). */ function _remove_item(obj, item_name) { obj = obj || {}; delete obj[item_name]; for (const key in obj) return obj; return null; } /* * Remove registered item from the Pattern Tree that starts with the protocols * dictionary object passed in the first argument. The remaining 2 arguments * should be pattern and name that have been earlier passed to * pattern_tree_register(). */ function pattern_tree_deregister(patterns_by_proto, pattern, item_name) { const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_'; item_name = key_prefix + item_name; const remove_item = obj => _remove_item(obj, item_name); modify_tree(patterns_by_proto, pattern, remove_item); } #EXPORT pattern_tree_deregister AS deregister /* * Yield registered items that match url. Each yielded value is an object with * keys being matched item names and values being the items. One such object * shall contain all items matched with given pattern specificity. Objects are * yielded in order from greatest to lowest pattern specificity. */ function* pattern_tree_search(patterns_by_proto, url) { const deco = deconstruct_url(url, false); const tree_for_proto = patterns_by_proto[deco.proto] || empty_node(); let by_path = [tree_for_proto]; /* We need an array of domain labels ordered most-significant-first. */ if (deco.domain !== undefined) by_path = search_sequence(tree_for_proto, [...deco.domain].reverse()); for (const path_tree of by_path) { for (const match_obj of search_sequence(path_tree, deco.path)) { let result_obj_slash = null; let result_obj_no_slash = null; for (const [key, item] of Object.entries(match_obj)) { if (deco.trailing_slash && key[0] === '/') { result_obj_slash = result_obj_slash || {}; result_obj_slash[key.substring(1)] = item; } else if (key[0] !== '/') { result_obj_no_slash = result_obj_no_slash || {}; result_obj_no_slash[key.substring(1)] = item; } } if (deco.trailing_slash && result_obj_slash) yield result_obj_slash; if (result_obj_no_slash) yield result_obj_no_slash; } } } #EXPORT pattern_tree_search AS search