/** * This file is part of Haketilo. * * Function: Data structure to query items by URL patterns. * * Copyright (C) 2021 Wojtek Kosior * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * As additional permission under GNU GPL version 3 section 7, you * may distribute forms of that code without the copy of the GNU * GPL normally required by section 4, provided you include this * license notice and, in case of non-source distribution, a URL * through which recipients can access the Corresponding Source. * If you modify file(s) with this exception, you may extend this * exception to your version of the file(s), but you are not * obligated to do so. If you do not wish to do so, delete this * exception statement from your version. * * As a special exception to the GPL, any HTML file which merely * makes function calls to this code, and for that purpose * includes it by reference shall be deemed a separate work for * copyright law purposes. If you modify this code, you may extend * this exception to your version of the code, but you are not * obligated to do so. If you do not wish to do so, delete this * exception statement from your version. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * * I, Wojtek Kosior, thereby promise not to sue for violation of this file's * license. Although I request that you do not make use this code in a * proprietary program, I am not going to enforce this in court. */ /* * IMPORTS_START * IMPORT deconstruct_url * IMPORTS_END */ /* * This is a javascript rewrite of Python implementation of the algorithm here: * https://git.koszko.org/pydrilla/tree/src/pydrilla/pydrilla.py?id=f4edcbe7f4739d6f82a2e1bb180960b003b30862#n205 */ /* "Pattern Tree" is how we refer to the data structure used for querying * Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal * is to make it possible for given URL to quickly retrieve all known patterns * that match it. */ function make_tree_node() { return { wildcard_matches: [null, null, null], literal_match: null, children: null }; } const is_wildcard = segment => ["*", "**", "***"].lastIndexOf(segment) >= 0; /* * Yields all matches of this segments sequence against the tree that starts at * this node. Results are produces in order from greatest to lowest pattern * specificity. */ function* search_sequence(tree_node, segments) { const nodes = [tree_node]; for (const segment of segments) { const next_node = nodes[nodes.length - 1].children[segment]; if (next_node === undefined) break; nodes.push(next_node); } const nsegments = segments.length; const conds = [ /* literal pattern match */ () => nodes.length == nsegments, /* wildcard pattern matches */ () => nodes.length + 1 == nsegments && segments[nsegments - 1] != "*", () => nodes.length + 1 < nsegments, () => nodes.length + 1 != nsegments || segments[nsegments - 1] != "***" ]; while (nodes.length) { const node = nodes.pop(); const items = [node.literal_match, ...node.wildcard_matches]; for (let i = 0; i < 4; i++) { if (items[i] !== null && conds[i]()) yield items[i]; } } } /* * Make item queryable through (this branch of) the Pattern Tree. If there was * not yet any item associated with the tree path designated by segments, create * a new one using item_instantiator() function. Return all items matching this * path (both the ones that existed and the ones just created). */ function add_sequence(tree_node, segments, item_instantiator) { let wildcards; for (var segment of segments) { wildcards = tree_node.wildcard_matches; const child = tree_node.children[segment] || make_tree_node(); tree_node.children[segment] = child; tree_node = child; } if (tree_node.literal_match === null) tree_node.literal_match = item_instantiator(); if (!is_wildcard(segment)) return [tree_node.literal_match]; if (wildcards[segment.length - 1] === null) wildcards[segment.length - 1] = item_instantiator(); return [tree_node.literal_match, wildcards[segment.length - 1]]; } /* * Can be used to remove item from (this branch of) the Pattern Tree. * item_modifier should be a function that accepts 1 argument, the item stored * in the tree, and returns an item that should be used in place of the first * one. It is also legal for it to return the same item modifying it first. If * it returns `undefined`, it means the item should be deleted from the Tree. * * item_modifier() will be called with all items in (this branch of) the Pattern * Tree that correspond to the path designated by segments. The Tree will be * updated accordingly. */ function modify_sequence(tree_node, segments, item_modifier) { const nodes = [tree_node]; for (const segment of segments) { const next_node = nodes[nodes.length - 1].children[segment]; nodes.push(next_node); } const nsegments = segments.length; let i = segments.length - 1; if (nsegments > 0 && is_wildcard(segments[i])) { const node = nodes[nodes.length - 2]; const asterisks = segments[i].length; } ............................ for (let i = segments.length - 1; i >= 0; i--) { if (i } segments const conds = [ /* literal pattern match */ () => nodes.length == nsegments, /* wildcard pattern matches */ () => nodes.length + 1 == nsegments && segments[nsegments - 1] != "*", () => nodes.length + 1 < nsegments, () => nodes.length + 1 != nsegments || segments[nsegments - 1] != "***" ]; while (nodes.length) { const node = nodes.pop(); const items = [node.literal_match, ...node.wildcard_matches]; for (let i = 0; i < 4; i++) { if (items[i] !== null && conds[i]()) yield items[i]; } } ................. } /* * Make item queryable through the Pattern Tree that starts with the protocols * dictionary object passed in the first argument. */ function pattern_tree_register(patterns_by_proto, pattern, item_name, item) { /* * We pass 'false' to disable length limits on URL parts. Length limits are * mostly useful in case of iteration over all patterns matching given URL. * Here we don't do that. */ const deco = deconstruct_url(pattern, false); const tree = patterns_by_proto[deco.proto] || make_tree_node(); patterns_by_proto[deco.proto] = tree; let path_trees; if (deco.proto === "file") { path_trees = [tree]; } else { /* We need an aray of domain labels ordered most-significant-first. */ const domain = [...deco.domain].reverse(); path_trees = add_sequence(tree, domain, make_tree_node); } for (const path_tree of path_trees) { for (const match_obj of add_sequence(path_tree, deco.path, () => {})) match_obj[item_name] = item; } } /* * Remove registered item from the Pattern Tree that starts with the protocols * dictionary object passed in the first argument. The remaining 2 arguments * should be pattern and name that have been earlier passed to * pattern_tree_register(). */ function pattern_tree_deregister(patterns_by_proto, pattern, item_name) { const deco = deconstruct_url(pattern, false); const tree = patterns_by_proto[deco.proto] || make_tree_node(); patterns_by_proto[deco.proto] = tree; let path_trees; if (deco.proto === "file") { path_trees = [tree]; } else { /* We need an aray of domain labels ordered most-significant-first. */ const domain = [...deco.domain].reverse(); .......................... path_trees = add_sequence(tree, domain, make_tree_node); } } /* * Yield registered items (as [item_name, item] arrays) that match url. For * cosistent behavior, please don't modify the pattern tree while iterating over * the results. */ function* pattern_tree_search(patterns_by_proto, url) { const deco = deconstruct_url(url, false); tree = patterns_by_proto[deco.proto] || make_tree_node(); let path_trees; if (deco.proto === "file") { path_trees = [tree]; } else { /* We need an aray of domain labels ordered most-significant-first. */ const domain = [...deco.domain].reverse(); path_trees = search_sequence(tree, domain); } for (const path_tree of path_trees) { for (const match_obj of search_sequence(path_tree, deco.path)) { for (const entry of Object.entries(match_obj)) yield entry; } } } const patterns_tree = { make: () => {}, register: patterns_tree_register, search: patterns_tree_search } /* * EXPORTS_START * EXPORT patterns_tree * EXPORTS_END */