/** * This file is part of Haketilo. * * Function: Operations on page URL patterns. * * Copyright (C) 2021 Wojtek Kosior * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * As additional permission under GNU GPL version 3 section 7, you * may distribute forms of that code without the copy of the GNU * GPL normally required by section 4, provided you include this * license notice and, in case of non-source distribution, a URL * through which recipients can access the Corresponding Source. * If you modify file(s) with this exception, you may extend this * exception to your version of the file(s), but you are not * obligated to do so. If you do not wish to do so, delete this * exception statement from your version. * * As a special exception to the GPL, any HTML file which merely * makes function calls to this code, and for that purpose * includes it by reference shall be deemed a separate work for * copyright law purposes. If you modify this code, you may extend * this exception to your version of the code, but you are not * obligated to do so. If you do not wish to do so, delete this * exception statement from your version. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <https://www.gnu.org/licenses/>. * * I, Wojtek Kosior, thereby promise not to sue for violation of this file's * license. Although I request that you do not make use of this code in a * proprietary program, I am not going to enforce this in court. */ const MAX = { URL_PATH_LEN: 12, URL_PATH_CHARS: 255, DOMAIN_LEN: 7, DOMAIN_CHARS: 100 }; const proto_regex = /^(\w+):\/\/(.*)$/; const user_re = "[^/?#@]+@" const domain_re = "[.*a-zA-Z0-9-]+"; const path_re = "[^?#]*"; const query_re = "\\??[^#]*"; const http_regex = new RegExp(`^(${domain_re})(${path_re})(${query_re}).*`); const file_regex = new RegExp(`^(/${path_re}).*`); const ftp_regex = new RegExp(`^(${user_re})?(${domain_re})(${path_re}).*`); function match_or_throw(regex, string, error_msg) { const match = regex.exec(string); if (match === null) throw error_msg; return match; } function deconstruct_url(url, use_limits=true) { const max = Object.assign({}, MAX); if (!use_limits) { for (const key in MAX) max[key] = Infinity; } const matcher = (re, str) => match_or_throw(re, str, `bad url '${url}'`) const proto_match = matcher(proto_regex, url); const deco = {proto: proto_match[1]}; if (deco.proto === "file") { deco.path = matcher(file_regex, proto_match[2])[1]; } else if (deco.proto === "ftp") { [deco.domain, deco.path] = matcher(ftp_regex, proto_match[2]).slice(2, 4); } else if (deco.proto === "http" || deco.proto === "https") { [deco.domain, deco.path, deco.query] = matcher(http_regex, proto_match[2]).slice(1, 4); deco.domain = deco.domain.toLowerCase(); } else { throw `unsupported protocol in url '${url}'`; } deco.trailing_slash = deco.path[deco.path.length - 1] === "/"; if (deco.domain) { if (deco.domain.length > max.DOMAIN_CHARS) { const idx = deco.domain.indexOf(".", deco.domain.length - max.DOMAIN_CHARS); if (idx === -1) deco.domain = []; else deco.domain = deco.domain.substring(idx + 1); deco.domain_truncated = true; } if (deco.path.length > max.URL_PATH_CHARS) { deco.path = deco.path.substring(0, deco.path.lastIndexOf("/")); deco.path_truncated = true; } } if (typeof deco.domain === "string") { deco.domain = deco.domain.split("."); if (deco.domain.splice(0, deco.domain.length - max.DOMAIN_LEN).length > 0) deco.domain_truncated = true; } deco.path = deco.path.split("/").filter(s => s !== ""); if (deco.domain && deco.path.splice(max.URL_PATH_LEN).length > 0) deco.path_truncated = true; return deco; } #EXPORT deconstruct_url function* each_domain_pattern(deco) { for (let slice = 0; slice < deco.domain.length - 1; slice++) { const domain_part = deco.domain.slice(slice).join("."); const domain_wildcards = []; if (slice === 0 && !deco.domain_truncated) yield domain_part; if (slice === 1 && !deco.domain_truncated) yield "*." + domain_part; if (slice > 1) yield "**." + domain_part; yield "***." + domain_part; } } function* each_path_pattern(deco) { for (let slice = deco.path.length; slice >= 0; slice--) { const path_part = ["", ...deco.path.slice(0, slice)].join("/"); const path_wildcards = []; if (slice === deco.path.length && !deco.path_truncated) { if (deco.trailing_slash) yield path_part + "/"; if (slice > 0 || deco.proto !== "file") yield path_part; } if (slice === deco.path.length - 1 && !deco.path_truncated && deco.path[slice] !== "*") yield path_part + "/*"; if (slice < deco.path.length - 1) yield path_part + "/**"; if (slice !== deco.path.length - 1 || deco.path_truncated || deco.path[slice] !== "***") yield path_part + "/***"; } } /* Generate every possible pattern that matches url. */ function* each_url_pattern(url) { const deco = deconstruct_url(url); if (deco === undefined) { console.error("Haketilo: bad url format", url); return false; } const all_domains = deco.domain ? each_domain_pattern(deco) : [""]; for (const domain of all_domains) { for (const path of each_path_pattern(deco)) yield `${deco.proto}://${domain}${path}`; } } #EXPORT each_url_pattern const patterns_doc_url = "https://hydrillabugs.koszko.org/projects/haketilo/wiki/URL_patterns"; #EXPORT patterns_doc_url function reconstruct_url(deco) { const domain = (deco.domain || []).join("."); const path = ["", ...deco.path].join("/"); const trail = deco.trailing_slash ? "/" : ""; return `${deco.proto}://${domain}${path}${trail}`; } #EXPORT reconstruct_url function validate_normalize_url_pattern(url_pattern) { try { return reconstruct_url(deconstruct_url(url_pattern)); } catch(e) { const patterns_doc_link = document.createElement("a"); patterns_doc_link.href = patterns_doc_url; patterns_doc_link.innerText = "here"; const msg = document.createElement("span"); msg.prepend(`'${url_pattern}' is not a valid URL pattern. See `, patterns_doc_link, " for more details."); throw msg; } } #EXPORT validate_normalize_url_pattern