aboutsummaryrefslogtreecommitdiff
path: root/scriptbase_query.c
diff options
context:
space:
mode:
authorWojtek Kosior <koszko@koszko.org>2021-08-07 16:58:11 +0200
committerWojtek Kosior <koszko@koszko.org>2021-08-07 17:01:19 +0200
commit939c0c2e799734d46e3c3b784545f7c0c489c191 (patch)
tree9e308bf45b1c5ad015409198d141fc8cae1dbd95 /scriptbase_query.c
parente3c86f7ff37de0af10b4165216da14bf0f91dc0b (diff)
downloadhydrilla-939c0c2e799734d46e3c3b784545f7c0c489c191.tar.gz
hydrilla-939c0c2e799734d46e3c3b784545f7c0c489c191.zip
migrate to Autotools
Diffstat (limited to 'scriptbase_query.c')
-rw-r--r--scriptbase_query.c278
1 files changed, 0 insertions, 278 deletions
diff --git a/scriptbase_query.c b/scriptbase_query.c
deleted file mode 100644
index fe9a910..0000000
--- a/scriptbase_query.c
+++ /dev/null
@@ -1,278 +0,0 @@
-/**
- * part of Hydrilla
- * Routines for querying in-memory scriptbase, operating on data structures from
- * `scripbase.h'.
- *
- * Copyright (C) 2021 Wojtek Kosior
- * Redistribution terms are gathered in the `copyright' file.
- */
-
-#include <stddef.h>
-#include <regex.h>
-#include <stdbool.h>
-#include <string.h>
-
-#include "hashtable.h"
-#include "string_buf.h"
-
-#include "scriptbase.h"
-
-const struct script *get_script(const char *name, struct scriptbase *base)
-{
- void *val;
-
- if (ht_get_threadsafe(&base->scripts, name, NULL, &val))
- return NULL;
-
- return ((struct script*) val)->filled ? val : NULL;
-}
-
-const struct bag *get_bag(const char *name, struct scriptbase *base)
-{
- void *val;
-
- if (ht_get_threadsafe(&base->bags, name, NULL, &val))
- return NULL;
-
- return ((struct bag*) val)->filled ? val : NULL;
-}
-
-const struct page *get_pattern(const char *pattern, struct scriptbase *base)
-{
- void *val = NULL;
-
- ht_get_threadsafe(&base->pages, pattern, NULL, &val);
-
- return val;
-}
-
-static const char url_regex[] =
- "^"
- "([a-zA-Z]{1,20}://)" /* protocol */
- "([^/?#]{1,253})" /* domain */
- "(/[^?#]*)?" /* path */
- "\\\\?[^#]*" /* query */
- "#?.*" /* target */
- "$";
-
-static regex_t url_regex_comp;
-static bool url_regex_ready;
-
-int init_url_lookup_regex(void)
-{
- int retval;
-
- retval = regcomp(&url_regex_comp, url_regex, REG_EXTENDED);
-
- url_regex_ready = !retval;
-
- return retval;
-}
-
-void destroy_url_lookup_regex(void)
-{
- if (!url_regex_ready) {
- fprintf(stderr, "Attempt to destroy uninitialized regex in " __FILE__ "\n");
- return;
- }
-
- regfree(&url_regex_comp);
-}
-
-#define URL_REGEX_NMATCH 4
-
-#define PROTOCOL_MATCH 1
-#define DOMAIN_MATCH 2
-#define PATH_MATCH 3
-
-static int lookup_url_path(const char *path_begin, const char *path_end,
- struct stringbuf *buf, struct scriptbase *base,
- int (*callback)(struct page*, void*), void *data)
-{
- const char *segment_end = path_begin;
- int segments_dropped = 0;
- int initial_len = buf->buf_filled;
- size_t len_path, previous_segment;
- void *val;
- bool trailing_dash = path_end != path_begin && path_end[-1] == '/';
- char asterisks[] = "/***";
- int trailing_asterisks = 0, i;
- int result;
-
- while (true) {
- do {
- if (path_begin >= path_end)
- goto after_path_normalization;
- } while (*(path_begin++) == '/');
- path_begin -= 2;
-
- segment_end = path_begin + 1;
- while (*segment_end != '/' && ++segment_end < path_end);
-
- if (sb_bytes(buf, path_begin, segment_end - path_begin))
- return -2;
-
- path_begin = segment_end;
- }
-
-after_path_normalization:
-#define TRY_WILDCARD(condition, wildcard) \
- if (condition) { \
- stringbuf_truncate(buf, len_path); \
- if (sb_string(buf, wildcard)) \
- return -2; \
- \
- result = ht_get_threadsafe(&base->pages, buf->buf, \
- NULL, &val); \
- if (!result && callback(val, data)) \
- return 1; \
- }
-
- while (true) {
- len_path = buf->buf_filled;
- previous_segment = len_path;
- while (previous_segment > initial_len &&
- buf->buf[--previous_segment] != '/');
-
- if (!trailing_asterisks) {/* only on first iteration */
- trailing_asterisks = -1;
-
- for (i = 3; i > 0; i--) {
- asterisks[i + 1] = '\0';
-
- if (strncmp(buf->buf + previous_segment,
- asterisks, i + 1))
- continue;
-
- trailing_asterisks = i;
-
- if (i != 3)
- break;
-
- if (buf->buf[previous_segment + i + 1] == '*')
- trailing_asterisks = -1;
-
- break;
- }
- }
-
- TRY_WILDCARD(segments_dropped == 0, "");
- TRY_WILDCARD(segments_dropped == 0 && trailing_dash, "/");
- TRY_WILDCARD(segments_dropped == 1 && trailing_asterisks != 1,
- "/*");
- TRY_WILDCARD(segments_dropped > 1, "/**");
- TRY_WILDCARD(segments_dropped > 0 &&
- (segments_dropped > 1 || trailing_asterisks != 3),
- "/***");
-
- stringbuf_truncate(buf, previous_segment);
-
- if (previous_segment == len_path)
- return 0;
-
- /*
- * We only ever care if this count is 0, 1 or > 1,
- * hence size_t is not necessary.
- */
- if (segments_dropped < 2)
- segments_dropped++;
- }
-
-#undef TRY_WILDCARD
-}
-
-static int lookup_url_domain(const char *domain_begin, const char *domain_end,
- const char *path_begin, const char *path_end,
- struct stringbuf *buf, struct scriptbase *base,
- int (*callback)(struct page*, void*), void *data)
-{
- const char *next_label = domain_begin;
- int labels_dropped = 0;
- int initial_len = buf->buf_filled;
- int result;
-
-#define TRY_WILDCARD(condition, wildcard) \
- if (condition) { \
- stringbuf_truncate(buf, initial_len); \
- if (sb_string(buf, wildcard) || \
- sb_bytes(buf, domain_begin, domain_end - domain_begin)) \
- return -2; \
- \
- result = lookup_url_path(path_begin, path_end, \
- buf, base, callback, data); \
- if (result) \
- return result; \
- }
-
- while (true) {
- domain_begin = next_label;
-
- while (*(next_label++) != '.') {
- if (next_label >= domain_end)
- return 0;
- }
-
- TRY_WILDCARD(labels_dropped == 0, "");
- TRY_WILDCARD(labels_dropped == 1, "*.");
- TRY_WILDCARD(labels_dropped > 0, "**.");
- TRY_WILDCARD(true, "***.");
-
- labels_dropped++;
- }
-
-#undef TRY_WILDCARD
-}
-
-static int lookup_url_proto(const char *proto_begin, const char *proto_end,
- const char *domain_begin, const char *domain_end,
- const char *path_begin, const char *path_end,
- struct stringbuf *buf, struct scriptbase *base,
- int (*callback)(struct page*, void*), void *data)
-{
- if (sb_bytes(buf, proto_begin, proto_end - proto_begin))
- return -2;
-
- return lookup_url_domain(domain_begin, domain_end, path_begin, path_end,
- buf, base, callback, data);
-}
-
-int lookup_url(const char *url, struct scriptbase *base,
- int (*callback)(struct page*, void*), void *data)
-{
- regmatch_t reg_matched[URL_REGEX_NMATCH];
- struct stringbuf buf;
- const char *path_begin, *path_end;
- int retval;
-
- if (!url_regex_ready) {
- fprintf(stderr, "Regex not initialized in " __FILE__ "\n");
- return -3;
- }
-
- printf("matching: %s\n", url);
-
- if (regexec(&url_regex_comp, url,
- URL_REGEX_NMATCH, reg_matched, 0) ||
- reg_matched[DOMAIN_MATCH].rm_so == -1)
- return -1;
-
- stringbuf_init(&buf);
-
- path_begin = url + reg_matched[PATH_MATCH].rm_so;
- path_end = url + reg_matched[PATH_MATCH].rm_eo;
- if (path_begin == url - 1) {
- path_begin = NULL;
- path_end = NULL;
- }
-
- retval = lookup_url_proto(url + reg_matched[PROTOCOL_MATCH].rm_so,
- url + reg_matched[PROTOCOL_MATCH].rm_eo,
- url + reg_matched[DOMAIN_MATCH].rm_so,
- url + reg_matched[DOMAIN_MATCH].rm_eo,
- path_begin, path_end,
- &buf, base, callback, data);
-
- stringbuf_destroy(&buf);
-
- return retval;
-}