aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorW. Kosior <koszko@koszko.org>2025-02-12 20:36:29 +0100
committerW. Kosior <koszko@koszko.org>2025-02-12 20:36:29 +0100
commit1aec87a4966772428ade576895d419c61fd16ef8 (patch)
tree8bc27d2021633eb6eeb56ddb33284a8a98c391c5
parent93f4adf3c4213fc992a489e53fd0253b9b16f379 (diff)
downloadkoszko-org-guix-server-1aec87a4966772428ade576895d419c61fd16ef8.tar.gz
koszko-org-guix-server-1aec87a4966772428ade576895d419c61fd16ef8.zip
Prevent bots from DoS'ing cgit.
-rw-r--r--salamina.scm29
1 files changed, 29 insertions, 0 deletions
diff --git a/salamina.scm b/salamina.scm
index 123f584..73278d2 100644
--- a/salamina.scm
+++ b/salamina.scm
@@ -225,6 +225,35 @@
Redirect permanent / https://git.koszko.org/
</If>
+ Redirect permanent /pydrilla \
+ https://git.koszko.org/haketilo-hydrilla
+
+ RewriteEngine On
+
+ # Let's put some limits on which cgit pages bots can view.
+ RewriteCond \"%{HTTP_USER_AGENT}\" \"bot\" [NC]
+
+ # Allow crawlers to look at different git branches/tags (`h' param)
+ # and also allow them to view commits (they might be of some
+ # interest to search engine users).
+ RewriteCond \"%{REQUEST_URI}\" \"!^[/][^/]+[/]+commit[/]*$\" [OR]
+ RewriteCond \"%{QUERY_STRING}\" \
+ \"!^(h=[^&]+[&]*)?(id=[^&]+[&]*)?$\"
+
+ # But block other requests with query strings as they might make
+ # cgit generate something from old Guix commits. This causes too
+ # high load.
+ RewriteCond \"%{QUERY_STRING}\" \"!^(h=[^&]+[&]*)?$\"
+
+ RewriteRule \".?\" \"-\" [F]
+
+ # Now, tell bots not to delve deep into the cgit pages graph. Just
+ # crawling the branches/tags. From there they can follow links to
+ # the most recent commits but no paging of the listing to see older
+ # commits, no parent commit navigation, etc.
+ Header set X-Robots-Tag \"nofollow\" \
+ \"expr=%{QUERY_STRING} !~ /^(h=[^&]+[&]*)?$/\"
+
SetEnv GIT_CONFIG_GLOBAL " ,%httpd-gitconfig "
<Directory " ,(file-append git "/libexec/git-core") ">