From a404ac80936380af8b4920da49c94a19ad22718b Mon Sep 17 00:00:00 2001 From: Helen Chong <119173961+helenclx@users.noreply.github.com> Date: Sun, 29 Sep 2024 09:55:21 +0800 Subject: [PATCH] Update list of bad bots to block --- .htaccess | 8 +++++--- robots.txt | 4 +++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.htaccess b/.htaccess index 5cf35de..845c687 100644 --- a/.htaccess +++ b/.htaccess @@ -38,23 +38,25 @@ RewriteCond %{HTTP_USER_AGENT} (Google-Extended) [NC,OR] RewriteCond %{HTTP_USER_AGENT} (GoogleOther) [NC,OR] RewriteCond %{HTTP_USER_AGENT} (GoogleOther-Image) [NC,OR] RewriteCond %{HTTP_USER_AGENT} (GoogleOther-Video) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (iaskspider/2.0) [NC,OR] RewriteCond %{HTTP_USER_AGENT} (ICC-Crawler) [NC,OR] RewriteCond %{HTTP_USER_AGENT} (ImagesiftBot) [NC,OR] +RewriteCond %{HTTP_USER_AGENT} (Kangaroo Bot) [NC,OR] RewriteCond %{HTTP_USER_AGENT} (Meta-ExternalAgent) [NC,OR] RewriteCond %{HTTP_USER_AGENT} (Meta-ExternalFetcher) [NC,OR] RewriteCond %{HTTP_USER_AGENT} (OAI-SearchBot) [NC,OR] RewriteCond %{HTTP_USER_AGENT} (PerplexityBot) [NC,OR] RewriteCond %{HTTP_USER_AGENT} (PetalBot) [NC,OR] RewriteCond %{HTTP_USER_AGENT} (Scrapy) [NC,OR] +RewriteCond %{HTTP_USER_AGENT} (Sidetrade indexer bot) [NC,OR] RewriteCond %{HTTP_USER_AGENT} (Timpibot) [NC,OR] RewriteCond %{HTTP_USER_AGENT} (VelenPublicWebCrawler) [NC,OR] +RewriteCond %{HTTP_USER_AGENT} (Webzio-Extended) [NC,OR] RewriteCond %{HTTP_USER_AGENT} (YouBot) [NC,OR] RewriteCond %{HTTP_USER_AGENT} (anthropic-ai) [NC,OR] RewriteCond %{HTTP_USER_AGENT} (cohere-ai) [NC,OR] RewriteCond %{HTTP_USER_AGENT} (facebookexternalhit) [NC,OR] +RewriteCond %{HTTP_USER_AGENT} (iaskspider/2.0) [NC,OR] RewriteCond %{HTTP_USER_AGENT} (img2dataset) [NC,OR] RewriteCond %{HTTP_USER_AGENT} (omgili) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (omgilibot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (Webzio-Extended) [NC] +RewriteCond %{HTTP_USER_AGENT} (omgilibot) [NC] RewriteRule .* https://nocommercialuse.org/ [L] \ No newline at end of file diff --git a/robots.txt b/robots.txt index fecdebe..21abf82 100644 --- a/robots.txt +++ b/robots.txt @@ -18,15 +18,16 @@ User-agent: Google-Extended User-agent: GoogleOther User-agent: GoogleOther-Image User-agent: GoogleOther-Video -User-agent: iaskspider/2.0 User-agent: ICC-Crawler User-agent: ImagesiftBot +User-agent: Kangaroo Bot User-agent: Meta-ExternalAgent User-agent: Meta-ExternalFetcher User-agent: OAI-SearchBot User-agent: PerplexityBot User-agent: PetalBot User-agent: Scrapy +User-agent: Sidetrade indexer bot User-agent: Timpibot User-agent: VelenPublicWebCrawler User-agent: Webzio-Extended @@ -34,6 +35,7 @@ User-agent: YouBot User-agent: anthropic-ai User-agent: cohere-ai User-agent: facebookexternalhit +User-agent: iaskspider/2.0 User-agent: img2dataset User-agent: omgili User-agent: omgilibot