From 5c583de820c0d5f666a830ca1e8205fe7d55e61e Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Wed, 1 Dec 2021 21:08:03 +0100 Subject: start implementing more efficient querying of URL patterns --- common/patterns.js | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) (limited to 'common/patterns.js') diff --git a/common/patterns.js b/common/patterns.js index 635b128..054e610 100644 --- a/common/patterns.js +++ b/common/patterns.js @@ -17,16 +17,25 @@ const MAX = { const proto_regex = /^(\w+):\/\/(.*)$/; const user_re = "[^/?#@]+@" -const domain_re = "[.a-zA-Z0-9-]+"; +const domain_re = "[.*a-zA-Z0-9-]+"; const path_re = "[^?#]*"; const query_re = "\\??[^#]*"; const http_regex = new RegExp(`^(${domain_re})(${path_re})(${query_re}).*`); -const file_regex = new RegExp(`^(${path_re}).*`); +const file_regex = new RegExp(`^(/${path_re}).*`); const ftp_regex = new RegExp(`^(${user_re})?(${domain_re})(${path_re}).*`); +function match_or_throw(regex, string, error_msg) +{ + const match = regex.exec(string); + if (match === null) + throw error_msg; + + return match; +} + function deconstruct_url(url, use_limits=true) { const max = MAX; @@ -35,21 +44,19 @@ function deconstruct_url(url, use_limits=true) max[key] = Infinity; } - const proto_match = proto_regex.exec(url); - if (proto_match === null) - throw `bad url '${url}'`; + const matcher = (re, str) => match_or_throw(re, str, `bad url '${url}'`) + const proto_match = matcher(proto_regex, url); const deco = {proto: proto_match[1]}; if (deco.proto === "file") { - deco.path = file_regex.exec(proto_match[2])[1]; + deco.path = matcher(file_regex, proto_match[2])[1]; } else if (deco.proto === "ftp") { - [deco.domain, deco.path] = ftp_regex.exec(proto_match[2]).slice(2, 4); + [deco.domain, deco.path] = + matcher(ftp_regex, proto_match[2]).slice(2, 4); } else if (deco.proto === "http" || deco.proto === "https") { - const http_match = http_regex.exec(proto_match[2]); - if (!http_match) - return undefined; - [deco.domain, deco.path, deco.query] = http_match.slice(1, 4); + [deco.domain, deco.path, deco.query] = + matcher(http_regex, proto_match[2]).slice(1, 4); deco.domain = deco.domain.toLowerCase(); } else { throw `unsupported protocol in url '${url}'`; -- cgit v1.2.3