aboutsummaryrefslogtreecommitdiff
/**
 * This file is part of Haketilo.
 *
 * Function: Data structure to query items by URL patterns.
 *
 * Copyright (C) 2021 Wojtek Kosior
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * As additional permission under GNU GPL version 3 section 7, you
 * may distribute forms of that code without the copy of the GNU
 * GPL normally required by section 4, provided you include this
 * license notice and, in case of non-source distribution, a URL
 * through which recipients can access the Corresponding Source.
 * If you modify file(s) with this exception, you may extend this
 * exception to your version of the file(s), but you are not
 * obligated to do so. If you do not wish to do so, delete this
 * exception statement from your version.
 *
 * As a special exception to the GPL, any HTML file which merely
 * makes function calls to this code, and for that purpose
 * includes it by reference shall be deemed a separate work for
 * copyright law purposes. If you modify this code, you may extend
 * this exception to your version of the code, but you are not
 * obligated to do so. If you do not wish to do so, delete this
 * exception statement from your version.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 *
 * I, Wojtek Kosior, thereby promise not to sue for violation of this file's
 * license. Although I request that you do not make use this code in a
 * proprietary program, I am not going to enforce this in court.
 */

/*
 * IMPORTS_START
 * IMPORT deconstruct_url
 * IMPORTS_END
 */

/*
 * This is a javascript rewrite of Python implementation of the algorithm here:
 * https://git.koszko.org/pydrilla/tree/src/pydrilla/pydrilla.py?id=f4edcbe7f4739d6f82a2e1bb180960b003b30862#n205
 */

/* "Pattern Tree" is how we refer to the data structure used for querying
 * Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal
 * is to make it possible for given URL to quickly retrieve all known patterns
 * that match it.
 */
function make_tree_node() {
    return {
	wildcard_matches: [null, null, null],
	literal_match:    null,
	children:         null
    };
}

const is_wildcard = segment => ["*", "**", "***"].lastIndexOf(segment) >= 0;

/*
 * Yields all matches of this segments sequence against the tree that starts at
 * this node. Results are produces in order from greatest to lowest pattern
 * specificity.
 */
function* search_sequence(tree_node, segments)
{
    const nodes = [tree_node];

    for (const segment of segments) {
	const next_node = nodes[nodes.length - 1].children[segment];
	if (next_node === undefined)
	    break;

	nodes.push(next_node);
    }

    const nsegments = segments.length;

    const conds = [
	/* literal pattern match */
	() => nodes.length     == nsegments,
	/* wildcard pattern matches */
	() => nodes.length + 1 == nsegments && segments[nsegments - 1] != "*",
	() => nodes.length + 1 <  nsegments,
	() => nodes.length + 1 != nsegments || segments[nsegments - 1] != "***"
    ];

    while (nodes.length) {
	const node = nodes.pop();
	const items = [node.literal_match, ...node.wildcard_matches];

	for (let i = 0; i < 4; i++) {
	    if (items[i] !== null && conds[i]())
		yield items[i];
	}
    }
}

/*
 * Make item queryable through (this branch of) the Pattern Tree. If there was
 * not yet any item associated with the tree path designated by segments, create
 * a new one using item_instantiator() function. Return all items matching this
 * path (both the ones that existed and the ones just created).
 */
function add_sequence(tree_node, segments, item_instantiator)
{
    let wildcards;

    for (var segment of segments) {
	wildcards = tree_node.wildcard_matches;

	const child = tree_node.children[segment] || make_tree_node();
	tree_node.children[segment] = child;
	tree_node = child;
    }

    if (tree_node.literal_match === null)
	tree_node.literal_match = item_instantiator();

    if (!is_wildcard(segment))
	return [tree_node.literal_match];

    if (wildcards[segment.length - 1] === null)
	wildcards[segment.length - 1] = item_instantiator();

    return [tree_node.literal_match, wildcards[segment.length - 1]];
}

/*
 * Can be used to remove item from (this branch of) the Pattern Tree.
 * item_modifier should be a function that accepts 1 argument, the item stored
 * in the tree, and returns an item that should be used in place of the first
 * one. It is also legal for it to return the same item modifying it first. If
 * it returns `undefined`, it means the item should be deleted from the Tree.
 *
 * item_modifier() will be called with all items in (this branch of) the Pattern
 * Tree that correspond to the path designated by segments. The Tree will be
 * updated accordingly.
 */
function modify_sequence(tree_node, segments, item_modifier)
{
    const nodes = [tree_node];

    for (const segment of segments) {
	const next_node = nodes[nodes.length - 1].children[segment];
	nodes.push(next_node);
    }

    const nsegments = segments.length;
    let i = segments.length - 1;

    if (nsegments > 0 && is_wildcard(segments[i])) {
	const node = nodes[nodes.length - 2];
	const asterisks = segments[i].length;
    }

    ............................

    for (let i = segments.length - 1; i >= 0; i--) {

	if (i
    }

    segments

    const conds = [
	/* literal pattern match */
	() => nodes.length     == nsegments,
	/* wildcard pattern matches */
	() => nodes.length + 1 == nsegments && segments[nsegments - 1] != "*",
	() => nodes.length + 1 <  nsegments,
	() => nodes.length + 1 != nsegments || segments[nsegments - 1] != "***"
    ];

    while (nodes.length) {
	const node = nodes.pop();
	const items = [node.literal_match, ...node.wildcard_matches];

	for (let i = 0; i < 4; i++) {
	    if (items[i] !== null && conds[i]())
		yield items[i];
	}
    }
    .................
}

/*
 * Make item queryable through the Pattern Tree that starts with the protocols
 * dictionary object passed in the first argument.
 */
function pattern_tree_register(patterns_by_proto, pattern, item_name, item)
{
    /*
     * We pass 'false' to disable length limits on URL parts. Length limits are
     * mostly useful in case of iteration over all patterns matching given URL.
     * Here we don't do that.
     */
    const deco = deconstruct_url(pattern, false);

    const tree = patterns_by_proto[deco.proto] || make_tree_node();
    patterns_by_proto[deco.proto] = tree;

    let path_trees;

    if (deco.proto === "file") {
	path_trees = [tree];
    } else {
	/* We need an aray of domain labels ordered most-significant-first. */
	const domain = [...deco.domain].reverse();
	path_trees = add_sequence(tree, domain, make_tree_node);
    }

    for (const path_tree of path_trees) {
        for (const match_obj of add_sequence(path_tree, deco.path, () => {}))
            match_obj[item_name] = item;
    }
}

/*
 * Remove registered item from the Pattern Tree that starts with the protocols
 * dictionary object passed in the first argument. The remaining 2 arguments
 * should be pattern and name that have been earlier passed to
 * pattern_tree_register().
 */
function pattern_tree_deregister(patterns_by_proto, pattern, item_name)
{
    const deco = deconstruct_url(pattern, false);

    const tree = patterns_by_proto[deco.proto] || make_tree_node();
    patterns_by_proto[deco.proto] = tree;

    let path_trees;

    if (deco.proto === "file") {
	path_trees = [tree];
    } else {
	/* We need an aray of domain labels ordered most-significant-first. */
	const domain = [...deco.domain].reverse();
	..........................
	path_trees = add_sequence(tree, domain, make_tree_node);
    }
}

/*
 * Yield registered items (as [item_name, item] arrays) that match url. For
 * cosistent behavior, please don't modify the pattern tree while iterating over
 * the results.
 */
function* pattern_tree_search(patterns_by_proto, url)
{
    const deco = deconstruct_url(url, false);

    tree = patterns_by_proto[deco.proto] || make_tree_node();

    let path_trees;

    if (deco.proto === "file") {
	path_trees = [tree];
    } else {
	/* We need an aray of domain labels ordered most-significant-first. */
	const domain = [...deco.domain].reverse();
	path_trees = search_sequence(tree, domain);
    }

    for (const path_tree of path_trees) {
	for (const match_obj of search_sequence(path_tree, deco.path)) {
	    for (const entry of Object.entries(match_obj))
		yield entry;
	}
    }
}

const patterns_tree = {
    make: () => {},
    register: patterns_tree_register,
    search: patterns_tree_search
}

/*
 * EXPORTS_START
 * EXPORT patterns_tree
 * EXPORTS_END
 */