From 9bfea753efa3ec80122d50d72d3ed0172587d331 Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Fri, 10 Sep 2021 15:36:19 +0200 Subject: update&improve URL pattern matching Hydrilla now limits the lengths of domain and path in queried URLs. This is to make DoS attacks harder. --- src/scriptbase_query.c | 107 ++++++++++++++++++++++++++++++------------------- 1 file changed, 66 insertions(+), 41 deletions(-) diff --git a/src/scriptbase_query.c b/src/scriptbase_query.c index fe9a910..b89a966 100644 --- a/src/scriptbase_query.c +++ b/src/scriptbase_query.c @@ -17,6 +17,11 @@ #include "scriptbase.h" +#define MAX_URL_PATH_LEN 12 +#define MAX_URL_PATH_CHARS 255 +#define MAX_DOMAIN_LEN 7 +#define MAX_DOMAIN_CHARS 100 + const struct script *get_script(const char *name, struct scriptbase *base) { void *val; @@ -77,6 +82,7 @@ void destroy_url_lookup_regex(void) } regfree(&url_regex_comp); + url_regex_ready = false; } #define URL_REGEX_NMATCH 4 @@ -89,16 +95,26 @@ static int lookup_url_path(const char *path_begin, const char *path_end, struct stringbuf *buf, struct scriptbase *base, int (*callback)(struct page*, void*), void *data) { + bool path_truncated = false; const char *segment_end = path_begin; + int segments_allowed_left = MAX_URL_PATH_LEN; int segments_dropped = 0; int initial_len = buf->buf_filled; size_t len_path, previous_segment; void *val; bool trailing_dash = path_end != path_begin && path_end[-1] == '/'; - char asterisks[] = "/***"; - int trailing_asterisks = 0, i; + int trailing_asterisks = 0; int result; + if (path_end - path_begin > MAX_URL_PATH_CHARS) { + path_truncated = true; + path_end = path_begin + MAX_URL_PATH_CHARS; + while (*path_end != '/') { + if (--path_end == path_begin) + break; + } + } + while (true) { do { if (path_begin >= path_end) @@ -106,6 +122,11 @@ static int lookup_url_path(const char *path_begin, const char *path_end, } while (*(path_begin++) == '/'); path_begin -= 2; + if (!segments_allowed_left--) { + path_truncated = true; + break; + } + segment_end = path_begin + 1; while (*segment_end != '/' && ++segment_end < path_end); @@ -135,47 +156,29 @@ after_path_normalization: buf->buf[--previous_segment] != '/'); if (!trailing_asterisks) {/* only on first iteration */ - trailing_asterisks = -1; - - for (i = 3; i > 0; i--) { - asterisks[i + 1] = '\0'; - - if (strncmp(buf->buf + previous_segment, - asterisks, i + 1)) - continue; - - trailing_asterisks = i; - - if (i != 3) - break; - - if (buf->buf[previous_segment + i + 1] == '*') - trailing_asterisks = -1; - - break; - } + if (!strcmp(buf->buf + previous_segment, "/*")) + trailing_asterisks = 1; + else if (!strcmp(buf->buf + previous_segment, "/***")) + trailing_asterisks = 3; + else + trailing_asterisks = -1; } - TRY_WILDCARD(segments_dropped == 0, ""); - TRY_WILDCARD(segments_dropped == 0 && trailing_dash, "/"); - TRY_WILDCARD(segments_dropped == 1 && trailing_asterisks != 1, - "/*"); + TRY_WILDCARD(segments_dropped == 0 && !path_truncated, ""); + TRY_WILDCARD(segments_dropped == 0 && trailing_dash && + !path_truncated, "/"); + TRY_WILDCARD(segments_dropped == 1 && !path_truncated && + trailing_asterisks != 1, "/*"); TRY_WILDCARD(segments_dropped > 1, "/**"); - TRY_WILDCARD(segments_dropped > 0 && - (segments_dropped > 1 || trailing_asterisks != 3), - "/***"); + TRY_WILDCARD((segments_dropped != 1 || path_truncated || + trailing_asterisks != 3), "/***"); stringbuf_truncate(buf, previous_segment); if (previous_segment == len_path) return 0; - /* - * We only ever care if this count is 0, 1 or > 1, - * hence size_t is not necessary. - */ - if (segments_dropped < 2) - segments_dropped++; + segments_dropped++; } #undef TRY_WILDCARD @@ -186,7 +189,9 @@ static int lookup_url_domain(const char *domain_begin, const char *domain_end, struct stringbuf *buf, struct scriptbase *base, int (*callback)(struct page*, void*), void *data) { - const char *next_label = domain_begin; + bool domain_truncated = false; + const char *label_start; + int labels_allowed_left = MAX_DOMAIN_LEN; int labels_dropped = 0; int initial_len = buf->buf_filled; int result; @@ -204,17 +209,37 @@ static int lookup_url_domain(const char *domain_begin, const char *domain_end, return result; \ } + if (domain_end - domain_begin > MAX_DOMAIN_CHARS) { + domain_truncated = true; + domain_begin = domain_end - MAX_DOMAIN_CHARS; + while (domain_begin[-1] != '.') { + if (++domain_begin == domain_end) + return 0; + } + } + + for (label_start = domain_end; + label_start > domain_begin; + label_start--) { + if (label_start[-1] == '.' && !--labels_allowed_left) + break; + } + if (label_start != domain_begin) + domain_truncated = true; + else + labels_allowed_left--; + while (true) { - domain_begin = next_label; + domain_begin = label_start; - while (*(next_label++) != '.') { - if (next_label >= domain_end) + while (*(label_start++) != '.') { + if (label_start >= domain_end) return 0; } - TRY_WILDCARD(labels_dropped == 0, ""); - TRY_WILDCARD(labels_dropped == 1, "*."); - TRY_WILDCARD(labels_dropped > 0, "**."); + TRY_WILDCARD(labels_dropped == 0 && !domain_truncated, ""); + TRY_WILDCARD(labels_dropped == 1 && !domain_truncated, "*."); + TRY_WILDCARD(labels_dropped > 1, "**."); TRY_WILDCARD(true, "***."); labels_dropped++; -- cgit v1.2.3