aboutsummaryrefslogtreecommitdiff
path: root/common/patterns.js
diff options
context:
space:
mode:
Diffstat (limited to 'common/patterns.js')
-rw-r--r--common/patterns.js187
1 files changed, 187 insertions, 0 deletions
diff --git a/common/patterns.js b/common/patterns.js
new file mode 100644
index 0000000..f7da0ff
--- /dev/null
+++ b/common/patterns.js
@@ -0,0 +1,187 @@
+/**
+ * Hydrilla/Lernette operations on page url patterns
+ *
+ * Copyright (C) 2021 Wojtek Kosior
+ * Redistribution terms are gathered in the `copyright' file.
+ */
+
+const proto_re = "[a-zA-Z]*:\/\/";
+const domain_re = "[^/?#]+";
+const segments_re = "/[^?#]*";
+const query_re = "\\?[^#]*";
+
+const url_regex = new RegExp(`\
+^\
+(${proto_re})\
+(${domain_re})\
+(${segments_re})?\
+(${query_re})?\
+#?.*\$\
+`);
+
+function deconstruct_url(url)
+{
+ const regex_match = url_regex.exec(url);
+ if (regex_match === null)
+ return undefined;
+
+ let [_, proto, domain, path, query] = regex_match;
+
+ domain = domain.split(".");
+ let path_trailing_dash =
+ path && path[path.length - 1] === "/";
+ path = (path || "").split("/").filter(s => s !== "");
+ path.unshift("");
+
+ return {proto, domain, path, query, path_trailing_dash};
+}
+
+/* Be sane: both arguments should be arrays of length >= 2 */
+function domain_matches(url_domain, pattern_domain)
+{
+ const length_difference = url_domain.length - pattern_domain.length;
+
+ for (let i = 1; i <= url_domain.length; i++) {
+ const url_part = url_domain[url_domain.length - i];
+ const pattern_part = pattern_domain[pattern_domain.length - i];
+
+ if (pattern_domain.length === i) {
+ if (pattern_part === "*")
+ return length_difference === 0;
+ if (pattern_part === "**")
+ return length_difference > 0;
+ if (pattern_part === "***")
+ return true;
+ return length_difference === 0 && pattern_part === url_part;
+ }
+
+ if (pattern_part !== url_part)
+ return false;
+ }
+
+ return pattern_domain.length === url_domain.length + 1 &&
+ pattern_domain[0] === "***";
+}
+
+function path_matches(url_path, url_trailing_dash,
+ pattern_path, pattern_trailing_dash)
+{
+ const dashes_ok = !(pattern_trailing_dash && !url_trailing_dash);
+
+ if (pattern_path.length === 0)
+ return url_path.length === 0 && dashes_ok;
+
+ const length_difference = url_path.length - pattern_path.length;
+
+ for (let i = 0; i < url_path.length; i++) {
+ if (pattern_path.length === i + 1) {
+ if (pattern_path[i] === "*")
+ return length_difference === 0;
+ if (pattern_path[i] === "**") {
+ return length_difference > 0 ||
+ (url_path[i] === "**" && dashes_ok);
+ }
+ if (pattern_path[i] === "***")
+ return length_difference >= 0;
+ return length_difference === 0 &&
+ pattern_path[i] === url_path[i] && dashes_ok;
+ }
+
+ if (pattern_path[i] !== url_path[i])
+ return false;
+ }
+
+ return false;
+}
+
+function url_matches(url, pattern)
+{
+ const url_deco = deconstruct_url(url);
+ const pattern_deco = deconstruct_url(pattern);
+
+ if (url_deco === undefined || pattern_deco === undefined) {
+ console.log(`bad comparison: ${url} and ${pattern}`);
+ return false
+ }
+
+ if (pattern_deco.proto !== url_deco.proto)
+ return false;
+
+ return domain_matches(url_deco.domain, pattern_deco.domain) &&
+ path_matches(url_deco.path, url_deco.path_trailing_dash,
+ pattern_deco.path, pattern_deco.path_trailing_dash);
+}
+
+/*
+ * Call callback for every possible pattern that matches url. Return when there
+ * are no more patterns or callback returns false.
+ */
+function for_each_possible_pattern(url, callback)
+{
+ const deco = deconstruct_url(url);
+
+ if (deco === undefined) {
+ console.log("bad url format", url);
+ return;
+ }
+
+ for (let d_slice = 0; d_slice < deco.domain.length; d_slice++) {
+ const domain_part = deco.domain.slice(d_slice).join(".");
+ const domain_wildcards = [];
+ if (d_slice === 0)
+ domain_wildcards.push("");
+ if (d_slice === 1)
+ domain_wildcards.push("*.");
+ if (d_slice > 0)
+ domain_wildcards.push("**.");
+ domain_wildcards.push("***.");
+
+ for (const domain_wildcard of domain_wildcards) {
+ const domain_pattern = domain_wildcard + domain_part;
+
+ for (let s_slice = deco.path.length; s_slice > 0; s_slice--) {
+ const path_part = deco.path.slice(0, s_slice).join("/");
+ const path_wildcards = [];
+ if (s_slice === deco.path.length) {
+ if (deco.path_trailing_dash)
+ path_wildcards.push("/");
+ path_wildcards.push("");
+ }
+ if (s_slice === deco.path.length - 1 &&
+ deco.path[s_slice] !== "*")
+ path_wildcards.push("/*");
+ if (s_slice < deco.path.length &&
+ (deco.path[s_slice] !== "**" ||
+ s_slice < deco.path.length - 1))
+ path_wildcards.push("/**");
+ if (deco.path[s_slice] !== "***" || s_slice < deco.path.length)
+ path_wildcards.push("/***");
+
+ for (const path_wildcard of path_wildcards) {
+ const path_pattern = path_part + path_wildcard;
+
+ const pattern = deco.proto + domain_pattern + path_pattern;
+
+ if (callback(pattern) === false)
+ return;
+ }
+ }
+ }
+ }
+}
+
+function possible_patterns(url)
+{
+ const patterns = [];
+ for_each_possible_pattern(url, patterns.push);
+
+ return patterns;
+}
+
+/*
+ * EXPORTS_START
+ * EXPORT url_matches
+ * EXPORT for_each_possible_pattern
+ * EXPORT possible_patterns
+ * EXPORTS_END
+ */