diff options
author | Wojtek Kosior <koszko@koszko.org> | 2021-08-07 16:58:11 +0200 |
---|---|---|
committer | Wojtek Kosior <koszko@koszko.org> | 2021-08-07 17:01:19 +0200 |
commit | 939c0c2e799734d46e3c3b784545f7c0c489c191 (patch) | |
tree | 9e308bf45b1c5ad015409198d141fc8cae1dbd95 /scriptbase_query.c | |
parent | e3c86f7ff37de0af10b4165216da14bf0f91dc0b (diff) | |
download | hydrilla-939c0c2e799734d46e3c3b784545f7c0c489c191.tar.gz hydrilla-939c0c2e799734d46e3c3b784545f7c0c489c191.zip |
migrate to Autotools
Diffstat (limited to 'scriptbase_query.c')
-rw-r--r-- | scriptbase_query.c | 278 |
1 files changed, 0 insertions, 278 deletions
diff --git a/scriptbase_query.c b/scriptbase_query.c deleted file mode 100644 index fe9a910..0000000 --- a/scriptbase_query.c +++ /dev/null @@ -1,278 +0,0 @@ -/** - * part of Hydrilla - * Routines for querying in-memory scriptbase, operating on data structures from - * `scripbase.h'. - * - * Copyright (C) 2021 Wojtek Kosior - * Redistribution terms are gathered in the `copyright' file. - */ - -#include <stddef.h> -#include <regex.h> -#include <stdbool.h> -#include <string.h> - -#include "hashtable.h" -#include "string_buf.h" - -#include "scriptbase.h" - -const struct script *get_script(const char *name, struct scriptbase *base) -{ - void *val; - - if (ht_get_threadsafe(&base->scripts, name, NULL, &val)) - return NULL; - - return ((struct script*) val)->filled ? val : NULL; -} - -const struct bag *get_bag(const char *name, struct scriptbase *base) -{ - void *val; - - if (ht_get_threadsafe(&base->bags, name, NULL, &val)) - return NULL; - - return ((struct bag*) val)->filled ? val : NULL; -} - -const struct page *get_pattern(const char *pattern, struct scriptbase *base) -{ - void *val = NULL; - - ht_get_threadsafe(&base->pages, pattern, NULL, &val); - - return val; -} - -static const char url_regex[] = - "^" - "([a-zA-Z]{1,20}://)" /* protocol */ - "([^/?#]{1,253})" /* domain */ - "(/[^?#]*)?" /* path */ - "\\\\?[^#]*" /* query */ - "#?.*" /* target */ - "$"; - -static regex_t url_regex_comp; -static bool url_regex_ready; - -int init_url_lookup_regex(void) -{ - int retval; - - retval = regcomp(&url_regex_comp, url_regex, REG_EXTENDED); - - url_regex_ready = !retval; - - return retval; -} - -void destroy_url_lookup_regex(void) -{ - if (!url_regex_ready) { - fprintf(stderr, "Attempt to destroy uninitialized regex in " __FILE__ "\n"); - return; - } - - regfree(&url_regex_comp); -} - -#define URL_REGEX_NMATCH 4 - -#define PROTOCOL_MATCH 1 -#define DOMAIN_MATCH 2 -#define PATH_MATCH 3 - -static int lookup_url_path(const char *path_begin, const char *path_end, - struct stringbuf *buf, struct scriptbase *base, - int (*callback)(struct page*, void*), void *data) -{ - const char *segment_end = path_begin; - int segments_dropped = 0; - int initial_len = buf->buf_filled; - size_t len_path, previous_segment; - void *val; - bool trailing_dash = path_end != path_begin && path_end[-1] == '/'; - char asterisks[] = "/***"; - int trailing_asterisks = 0, i; - int result; - - while (true) { - do { - if (path_begin >= path_end) - goto after_path_normalization; - } while (*(path_begin++) == '/'); - path_begin -= 2; - - segment_end = path_begin + 1; - while (*segment_end != '/' && ++segment_end < path_end); - - if (sb_bytes(buf, path_begin, segment_end - path_begin)) - return -2; - - path_begin = segment_end; - } - -after_path_normalization: -#define TRY_WILDCARD(condition, wildcard) \ - if (condition) { \ - stringbuf_truncate(buf, len_path); \ - if (sb_string(buf, wildcard)) \ - return -2; \ - \ - result = ht_get_threadsafe(&base->pages, buf->buf, \ - NULL, &val); \ - if (!result && callback(val, data)) \ - return 1; \ - } - - while (true) { - len_path = buf->buf_filled; - previous_segment = len_path; - while (previous_segment > initial_len && - buf->buf[--previous_segment] != '/'); - - if (!trailing_asterisks) {/* only on first iteration */ - trailing_asterisks = -1; - - for (i = 3; i > 0; i--) { - asterisks[i + 1] = '\0'; - - if (strncmp(buf->buf + previous_segment, - asterisks, i + 1)) - continue; - - trailing_asterisks = i; - - if (i != 3) - break; - - if (buf->buf[previous_segment + i + 1] == '*') - trailing_asterisks = -1; - - break; - } - } - - TRY_WILDCARD(segments_dropped == 0, ""); - TRY_WILDCARD(segments_dropped == 0 && trailing_dash, "/"); - TRY_WILDCARD(segments_dropped == 1 && trailing_asterisks != 1, - "/*"); - TRY_WILDCARD(segments_dropped > 1, "/**"); - TRY_WILDCARD(segments_dropped > 0 && - (segments_dropped > 1 || trailing_asterisks != 3), - "/***"); - - stringbuf_truncate(buf, previous_segment); - - if (previous_segment == len_path) - return 0; - - /* - * We only ever care if this count is 0, 1 or > 1, - * hence size_t is not necessary. - */ - if (segments_dropped < 2) - segments_dropped++; - } - -#undef TRY_WILDCARD -} - -static int lookup_url_domain(const char *domain_begin, const char *domain_end, - const char *path_begin, const char *path_end, - struct stringbuf *buf, struct scriptbase *base, - int (*callback)(struct page*, void*), void *data) -{ - const char *next_label = domain_begin; - int labels_dropped = 0; - int initial_len = buf->buf_filled; - int result; - -#define TRY_WILDCARD(condition, wildcard) \ - if (condition) { \ - stringbuf_truncate(buf, initial_len); \ - if (sb_string(buf, wildcard) || \ - sb_bytes(buf, domain_begin, domain_end - domain_begin)) \ - return -2; \ - \ - result = lookup_url_path(path_begin, path_end, \ - buf, base, callback, data); \ - if (result) \ - return result; \ - } - - while (true) { - domain_begin = next_label; - - while (*(next_label++) != '.') { - if (next_label >= domain_end) - return 0; - } - - TRY_WILDCARD(labels_dropped == 0, ""); - TRY_WILDCARD(labels_dropped == 1, "*."); - TRY_WILDCARD(labels_dropped > 0, "**."); - TRY_WILDCARD(true, "***."); - - labels_dropped++; - } - -#undef TRY_WILDCARD -} - -static int lookup_url_proto(const char *proto_begin, const char *proto_end, - const char *domain_begin, const char *domain_end, - const char *path_begin, const char *path_end, - struct stringbuf *buf, struct scriptbase *base, - int (*callback)(struct page*, void*), void *data) -{ - if (sb_bytes(buf, proto_begin, proto_end - proto_begin)) - return -2; - - return lookup_url_domain(domain_begin, domain_end, path_begin, path_end, - buf, base, callback, data); -} - -int lookup_url(const char *url, struct scriptbase *base, - int (*callback)(struct page*, void*), void *data) -{ - regmatch_t reg_matched[URL_REGEX_NMATCH]; - struct stringbuf buf; - const char *path_begin, *path_end; - int retval; - - if (!url_regex_ready) { - fprintf(stderr, "Regex not initialized in " __FILE__ "\n"); - return -3; - } - - printf("matching: %s\n", url); - - if (regexec(&url_regex_comp, url, - URL_REGEX_NMATCH, reg_matched, 0) || - reg_matched[DOMAIN_MATCH].rm_so == -1) - return -1; - - stringbuf_init(&buf); - - path_begin = url + reg_matched[PATH_MATCH].rm_so; - path_end = url + reg_matched[PATH_MATCH].rm_eo; - if (path_begin == url - 1) { - path_begin = NULL; - path_end = NULL; - } - - retval = lookup_url_proto(url + reg_matched[PROTOCOL_MATCH].rm_so, - url + reg_matched[PROTOCOL_MATCH].rm_eo, - url + reg_matched[DOMAIN_MATCH].rm_so, - url + reg_matched[DOMAIN_MATCH].rm_eo, - path_begin, path_end, - &buf, base, callback, data); - - stringbuf_destroy(&buf); - - return retval; -} |