/**
* This file is part of Haketilo.
*
* Function: Operations on page URL patterns.
*
* Copyright (C) 2021 Wojtek Kosior
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* As additional permission under GNU GPL version 3 section 7, you
* may distribute forms of that code without the copy of the GNU
* GPL normally required by section 4, provided you include this
* license notice and, in case of non-source distribution, a URL
* through which recipients can access the Corresponding Source.
* If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not
* obligated to do so. If you do not wish to do so, delete this
* exception statement from your version.
*
* As a special exception to the GPL, any HTML file which merely
* makes function calls to this code, and for that purpose
* includes it by reference shall be deemed a separate work for
* copyright law purposes. If you modify this code, you may extend
* this exception to your version of the code, but you are not
* obligated to do so. If you do not wish to do so, delete this
* exception statement from your version.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*
* I, Wojtek Kosior, thereby promise not to sue for violation of this file's
* license. Although I request that you do not make use of this code in a
* proprietary program, I am not going to enforce this in court.
*/
const MAX = {
URL_PATH_LEN: 12,
URL_PATH_CHARS: 255,
DOMAIN_LEN: 7,
DOMAIN_CHARS: 100
};
const proto_regex = /^(\w+):\/\/(.*)$/;
const user_re = "[^/?#@]+@"
const domain_re = "[.*a-zA-Z0-9-]+";
const path_re = "[^?#]*";
const query_re = "\\??[^#]*";
const http_regex = new RegExp(`^(${domain_re})(${path_re})(${query_re}).*`);
const file_regex = new RegExp(`^(/${path_re}).*`);
const ftp_regex = new RegExp(`^(${user_re})?(${domain_re})(${path_re}).*`);
function match_or_throw(regex, string, error_msg)
{
const match = regex.exec(string);
if (match === null)
throw error_msg;
return match;
}
function deconstruct_url(url, use_limits=true)
{
const max = Object.assign({}, MAX);
if (!use_limits) {
for (const key in MAX)
max[key] = Infinity;
}
const matcher = (re, str) => match_or_throw(re, str, `bad url '${url}'`)
const proto_match = matcher(proto_regex, url);
const deco = {proto: proto_match[1]};
if (deco.proto === "file") {
deco.path = matcher(file_regex, proto_match[2])[1];
} else if (deco.proto === "ftp") {
[deco.domain, deco.path] =
matcher(ftp_regex, proto_match[2]).slice(2, 4);
} else if (deco.proto === "http" || deco.proto === "https") {
[deco.domain, deco.path, deco.query] =
matcher(http_regex, proto_match[2]).slice(1, 4);
deco.domain = deco.domain.toLowerCase();
} else {
throw `unsupported protocol in url '${url}'`;
}
deco.trailing_slash = deco.path[deco.path.length - 1] === "/";
if (deco.domain) {
if (deco.domain.length > max.DOMAIN_CHARS) {
const idx = deco.domain.indexOf(".", deco.domain.length -
max.DOMAIN_CHARS);
if (idx === -1)
deco.domain = [];
else
deco.domain = deco.domain.substring(idx + 1);
deco.domain_truncated = true;
}
if (deco.path.length > max.URL_PATH_CHARS) {
deco.path = deco.path.substring(0, deco.path.lastIndexOf("/"));
deco.path_truncated = true;
}
}
if (typeof deco.domain === "string") {
deco.domain = deco.domain.split(".");
if (deco.domain.splice(0, deco.domain.length - max.DOMAIN_LEN).length
> 0)
deco.domain_truncated = true;
}
deco.path = deco.path.split("/").filter(s => s !== "");
if (deco.domain && deco.path.splice(max.URL_PATH_LEN).length > 0)
deco.path_truncated = true;
return deco;
}
#EXPORT deconstruct_url
function* each_domain_pattern(deco)
{
for (let slice = 0; slice < deco.domain.length - 1; slice++) {
const domain_part = deco.domain.slice(slice).join(".");
const domain_wildcards = [];
if (slice === 0 && !deco.domain_truncated)
yield domain_part;
if (slice === 1 && !deco.domain_truncated)
yield "*." + domain_part;
if (slice > 1)
yield "**." + domain_part;
yield "***." + domain_part;
}
}
function* each_path_pattern(deco)
{
for (let slice = deco.path.length; slice >= 0; slice--) {
const path_part = ["", ...deco.path.slice(0, slice)].join("/");
con
2021-10-30 | Fix license notices on JS and SH files... | jahoti |
2021-09-13 | rename the extension to "Haketilo" | Wojtek Kosior |
2021-09-02 | enable toggling of global script blocking policy\n\nThis commit also introduc... | Wojtek Kosior |