aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWojtek Kosior <koszko@koszko.org>2021-09-10 15:36:19 +0200
committerWojtek Kosior <koszko@koszko.org>2021-09-10 15:52:30 +0200
commit9bfea753efa3ec80122d50d72d3ed0172587d331 (patch)
treea764468e7ebfb3b7cf495986f583e01742588c40
parent78e4ddc4b34ac51da55e33bb2c75af573d63caff (diff)
downloadhydrilla-9bfea753efa3ec80122d50d72d3ed0172587d331.tar.gz
hydrilla-9bfea753efa3ec80122d50d72d3ed0172587d331.zip
update&improve URL pattern matching
Hydrilla now limits the lengths of domain and path in queried URLs. This is to make DoS attacks harder.
-rw-r--r--src/scriptbase_query.c107
1 files changed, 66 insertions, 41 deletions
diff --git a/src/scriptbase_query.c b/src/scriptbase_query.c
index fe9a910..b89a966 100644
--- a/src/scriptbase_query.c
+++ b/src/scriptbase_query.c
@@ -17,6 +17,11 @@
#include "scriptbase.h"
+#define MAX_URL_PATH_LEN 12
+#define MAX_URL_PATH_CHARS 255
+#define MAX_DOMAIN_LEN 7
+#define MAX_DOMAIN_CHARS 100
+
const struct script *get_script(const char *name, struct scriptbase *base)
{
void *val;
@@ -77,6 +82,7 @@ void destroy_url_lookup_regex(void)
}
regfree(&url_regex_comp);
+ url_regex_ready = false;
}
#define URL_REGEX_NMATCH 4
@@ -89,16 +95,26 @@ static int lookup_url_path(const char *path_begin, const char *path_end,
struct stringbuf *buf, struct scriptbase *base,
int (*callback)(struct page*, void*), void *data)
{
+ bool path_truncated = false;
const char *segment_end = path_begin;
+ int segments_allowed_left = MAX_URL_PATH_LEN;
int segments_dropped = 0;
int initial_len = buf->buf_filled;
size_t len_path, previous_segment;
void *val;
bool trailing_dash = path_end != path_begin && path_end[-1] == '/';
- char asterisks[] = "/***";
- int trailing_asterisks = 0, i;
+ int trailing_asterisks = 0;
int result;
+ if (path_end - path_begin > MAX_URL_PATH_CHARS) {
+ path_truncated = true;
+ path_end = path_begin + MAX_URL_PATH_CHARS;
+ while (*path_end != '/') {
+ if (--path_end == path_begin)
+ break;
+ }
+ }
+
while (true) {
do {
if (path_begin >= path_end)
@@ -106,6 +122,11 @@ static int lookup_url_path(const char *path_begin, const char *path_end,
} while (*(path_begin++) == '/');
path_begin -= 2;
+ if (!segments_allowed_left--) {
+ path_truncated = true;
+ break;
+ }
+
segment_end = path_begin + 1;
while (*segment_end != '/' && ++segment_end < path_end);
@@ -135,47 +156,29 @@ after_path_normalization:
buf->buf[--previous_segment] != '/');
if (!trailing_asterisks) {/* only on first iteration */
- trailing_asterisks = -1;
-
- for (i = 3; i > 0; i--) {
- asterisks[i + 1] = '\0';
-
- if (strncmp(buf->buf + previous_segment,
- asterisks, i + 1))
- continue;
-
- trailing_asterisks = i;
-
- if (i != 3)
- break;
-
- if (buf->buf[previous_segment + i + 1] == '*')
- trailing_asterisks = -1;
-
- break;
- }
+ if (!strcmp(buf->buf + previous_segment, "/*"))
+ trailing_asterisks = 1;
+ else if (!strcmp(buf->buf + previous_segment, "/***"))
+ trailing_asterisks = 3;
+ else
+ trailing_asterisks = -1;
}
- TRY_WILDCARD(segments_dropped == 0, "");
- TRY_WILDCARD(segments_dropped == 0 && trailing_dash, "/");
- TRY_WILDCARD(segments_dropped == 1 && trailing_asterisks != 1,
- "/*");
+ TRY_WILDCARD(segments_dropped == 0 && !path_truncated, "");
+ TRY_WILDCARD(segments_dropped == 0 && trailing_dash &&
+ !path_truncated, "/");
+ TRY_WILDCARD(segments_dropped == 1 && !path_truncated &&
+ trailing_asterisks != 1, "/*");
TRY_WILDCARD(segments_dropped > 1, "/**");
- TRY_WILDCARD(segments_dropped > 0 &&
- (segments_dropped > 1 || trailing_asterisks != 3),
- "/***");
+ TRY_WILDCARD((segments_dropped != 1 || path_truncated ||
+ trailing_asterisks != 3), "/***");
stringbuf_truncate(buf, previous_segment);
if (previous_segment == len_path)
return 0;
- /*
- * We only ever care if this count is 0, 1 or > 1,
- * hence size_t is not necessary.
- */
- if (segments_dropped < 2)
- segments_dropped++;
+ segments_dropped++;
}
#undef TRY_WILDCARD
@@ -186,7 +189,9 @@ static int lookup_url_domain(const char *domain_begin, const char *domain_end,
struct stringbuf *buf, struct scriptbase *base,
int (*callback)(struct page*, void*), void *data)
{
- const char *next_label = domain_begin;
+ bool domain_truncated = false;
+ const char *label_start;
+ int labels_allowed_left = MAX_DOMAIN_LEN;
int labels_dropped = 0;
int initial_len = buf->buf_filled;
int result;
@@ -204,17 +209,37 @@ static int lookup_url_domain(const char *domain_begin, const char *domain_end,
return result; \
}
+ if (domain_end - domain_begin > MAX_DOMAIN_CHARS) {
+ domain_truncated = true;
+ domain_begin = domain_end - MAX_DOMAIN_CHARS;
+ while (domain_begin[-1] != '.') {
+ if (++domain_begin == domain_end)
+ return 0;
+ }
+ }
+
+ for (label_start = domain_end;
+ label_start > domain_begin;
+ label_start--) {
+ if (label_start[-1] == '.' && !--labels_allowed_left)
+ break;
+ }
+ if (label_start != domain_begin)
+ domain_truncated = true;
+ else
+ labels_allowed_left--;
+
while (true) {
- domain_begin = next_label;
+ domain_begin = label_start;
- while (*(next_label++) != '.') {
- if (next_label >= domain_end)
+ while (*(label_start++) != '.') {
+ if (label_start >= domain_end)
return 0;
}
- TRY_WILDCARD(labels_dropped == 0, "");
- TRY_WILDCARD(labels_dropped == 1, "*.");
- TRY_WILDCARD(labels_dropped > 0, "**.");
+ TRY_WILDCARD(labels_dropped == 0 && !domain_truncated, "");
+ TRY_WILDCARD(labels_dropped == 1 && !domain_truncated, "*.");
+ TRY_WILDCARD(labels_dropped > 1, "**.");
TRY_WILDCARD(true, "***.");
labels_dropped++;