diff options
author | W. Kosior <koszko@koszko.org> | 2025-02-12 20:36:29 +0100 |
---|---|---|
committer | W. Kosior <koszko@koszko.org> | 2025-02-12 20:36:29 +0100 |
commit | 1aec87a4966772428ade576895d419c61fd16ef8 (patch) | |
tree | 8bc27d2021633eb6eeb56ddb33284a8a98c391c5 | |
parent | 93f4adf3c4213fc992a489e53fd0253b9b16f379 (diff) | |
download | koszko-org-guix-server-1aec87a4966772428ade576895d419c61fd16ef8.tar.gz koszko-org-guix-server-1aec87a4966772428ade576895d419c61fd16ef8.zip |
Prevent bots from DoS'ing cgit.
-rw-r--r-- | salamina.scm | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/salamina.scm b/salamina.scm index 123f584..73278d2 100644 --- a/salamina.scm +++ b/salamina.scm @@ -225,6 +225,35 @@ Redirect permanent / https://git.koszko.org/ </If> + Redirect permanent /pydrilla \ + https://git.koszko.org/haketilo-hydrilla + + RewriteEngine On + + # Let's put some limits on which cgit pages bots can view. + RewriteCond \"%{HTTP_USER_AGENT}\" \"bot\" [NC] + + # Allow crawlers to look at different git branches/tags (`h' param) + # and also allow them to view commits (they might be of some + # interest to search engine users). + RewriteCond \"%{REQUEST_URI}\" \"!^[/][^/]+[/]+commit[/]*$\" [OR] + RewriteCond \"%{QUERY_STRING}\" \ + \"!^(h=[^&]+[&]*)?(id=[^&]+[&]*)?$\" + + # But block other requests with query strings as they might make + # cgit generate something from old Guix commits. This causes too + # high load. + RewriteCond \"%{QUERY_STRING}\" \"!^(h=[^&]+[&]*)?$\" + + RewriteRule \".?\" \"-\" [F] + + # Now, tell bots not to delve deep into the cgit pages graph. Just + # crawling the branches/tags. From there they can follow links to + # the most recent commits but no paging of the listing to see older + # commits, no parent commit navigation, etc. + Header set X-Robots-Tag \"nofollow\" \ + \"expr=%{QUERY_STRING} !~ /^(h=[^&]+[&]*)?$/\" + SetEnv GIT_CONFIG_GLOBAL " ,%httpd-gitconfig " <Directory " ,(file-append git "/libexec/git-core") "> |