/** * This file is part of Haketilo. * * Function: Operations on page URL patterns. * * Copyright (C) 2021 Wojtek Kosior * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * As additional permission under GNU GPL version 3 section 7, you * may distribute forms of that code without the copy of the GNU * GPL normally required by section 4, provided you include this * license notice and, in case of non-source distribution, a URL * through which recipients can access the Corresponding Source. * If you modify file(s) with this exception, you may extend this * exception to your version of the file(s), but you are not * obligated to do so. If you do not wish to do so, delete this * exception statement from your version. * * As a special exception to the GPL, any HTML file which merely * makes function calls to this code, and for that purpose * includes it by reference shall be deemed a separate work for * copyright law purposes. If you modify this code, you may extend * this exception to your version of the code, but you are not * obligated to do so. If you do not wish to do so, delete this * exception statement from your version. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <https://www.gnu.org/licenses/>. * * I, Wojtek Kosior, thereby promise not to sue for violation of this file's * license. Although I request that you do not make use this code in a * proprietary program, I am not going to enforce this in court. */ const MAX = { URL_PATH_LEN: 12, URL_PATH_CHARS: 255, DOMAIN_LEN: 7, DOMAIN_CHARS: 100 }; const proto_regex = /^(\w+):\/\/(.*)$/; const user_re = "[^/?#@]+@" const domain_re = "[^/?#]+"; const path_re = "[^?#]*"; const query_re = "\\??[^#]*"; const http_regex = new RegExp(`^(${domain_re})(${path_re})(${query_re}).*`); const file_regex = new RegExp(`^(${path_re}).*`); const ftp_regex = new RegExp(`^(${user_re})?(${domain_re})(${path_re}).*`); function deconstruct_url(url, use_limits=true) { const max = MAX; if (!use_limits) { for (key in MAX) max[key] = Infinity; } const proto_match = proto_regex.exec(url); if (proto_match === null) throw `bad url '${url}'`; const deco = {proto: proto_match[1]}; if (deco.proto === "file") { deco.path = file_regex.exec(proto_match[2])[1]; } else if (deco.proto === "ftp") { [deco.domain, deco.path] = ftp_regex.exec(proto_match[2]).slice(2, 4); } else if (deco.proto === "http" || deco.proto === "https") { const http_match = http_regex.exec(proto_match[2]); if (!http_match) return undefined; [deco.domain, deco.path, deco.query] = http_match.slice(1, 4); } else { throw `unsupported protocol in url '${url}'`; } deco.trailing_dash = deco.path[deco.path.length - 1] === "/"; if (deco.domain) { if (deco.domain.length > max.DOMAIN_CHARS) { const idx = deco.domain.indexOf(".", deco.domain.length - max.DOMAIN_CHARS); if (idx === -1) deco.domain = []; else deco.domain = deco.domain.substring(idx + 1); deco.domain_truncated = true; } if (deco.path.length > max.URL_PATH_CHARS) { deco.path = deco.path.substring(0, deco.path.lastIndexOf("/")); deco.path_truncated = true; } } if (typeof deco.domain === "string") { deco.domain = deco.domain.split("."); if (deco.domain.splice(0, deco.domain.length - max.DOMAIN_LEN).length > 0) deco.domain_truncated = true; } deco.path = deco.path.split("/").filter(s => s !== ""); if (deco.domain && deco.path.splice(max.URL_PATH_LEN).length > 0) deco.path_truncated = true; return deco; } function* each_domain_pattern(deco) { for (let slice = 0; slice < deco.domain.length - 1; slice++) { const domain_part = deco.domain.slice(slice).join("."); const domain_wildcards = []; if (slice === 0 && !deco.domain_truncated) yield domain_part; if (slice === 1 && !deco.domain_truncated) yield "*." + domain_part; if (slice > 1) yield "**." + domain_part; yield "***." + domain_part; } } function* each_path_pattern(deco) { for (let slice = deco.path.length; slice >= 0; slice--) { const path_part = ["", ...deco.path.slice(0, slice)].join("/"); const path_wildcards = []; if (slice === deco.path.length && !deco.path_truncated) { if (deco.trailing_dash && path_part !== ) yield path_part + "/"; if (part_part !== "" || deco.proto !== "file") yield path_part; } if (slice === deco.path.length - 1 && !deco.path_truncated && deco.path[slice] !== "*") yield path_part + "/*"; if (slice < deco.path.length - 1) yield path_part + "/**"; if (slice !== deco.path.length - 1 || deco.path_truncated || deco.path[slice] !== "***") yield path_part + "/***"; } } /* Generate every possible pattern that matches url. */ function* each_url_pattern(url) { const deco = deconstruct_url(url); if (deco === undefined) { console.error("bad url format", url); return false; } const all_domains = deco.domain ? each_domain_pattern(deco) : [""]; for (const domain of all_domains) { for (const path of each_path_pattern(deco)) yield `${deco.proto}://${domain}${path}`; } } /* * EXPORTS_START * EXPORT each_url_pattern * EXPORT deconstruct_url * EXPORTS_END */