From 23debef62104c70600be2b745ec3957538eeac6e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 28 May 2013 14:17:00 +0200 Subject: [PATCH] robots.txt: disallow access to snapshots My dmesg is filled with the oom killer bringing down processes while the Bingbot downloads every snapshot for every commit of the Linux kernel in tar.xz format. Sure, I should be running with memory limits, and now I'm using cgroups, but a more general solution is to prevent crawlers from wasting resources like that in the first place. Suggested-by: Natanael Copa Suggested-by: Julius Plenz Signed-off-by: Jason A. Donenfeld --- Makefile | 1 + robots.txt | 3 +++ 2 files changed, 4 insertions(+) create mode 100644 robots.txt diff --git a/Makefile b/Makefile index 00b3269..f11b60f 100644 --- a/Makefile +++ b/Makefile @@ -78,6 +78,7 @@ install: all $(INSTALL) -m 0644 cgit.css $(DESTDIR)$(CGIT_DATA_PATH)/cgit.css $(INSTALL) -m 0644 cgit.png $(DESTDIR)$(CGIT_DATA_PATH)/cgit.png $(INSTALL) -m 0644 favicon.ico $(DESTDIR)$(CGIT_DATA_PATH)/favicon.ico + $(INSTALL) -m 0644 robots.txt $(DESTDIR)$(CGIT_DATA_PATH)/robots.txt $(INSTALL) -m 0755 -d $(DESTDIR)$(filterdir) $(COPYTREE) filters/* $(DESTDIR)$(filterdir) diff --git a/robots.txt b/robots.txt new file mode 100644 index 0000000..4ce948f --- /dev/null +++ b/robots.txt @@ -0,0 +1,3 @@ +User-agent: * +Disallow: /*/snapshot/* +Allow: /