From b5eef5793129a3fa4bb22ff08ab54492b9147307 Mon Sep 17 00:00:00 2001 From: Helen Chong <119173961+helenclx@users.noreply.github.com> Date: Sat, 5 Oct 2024 16:13:37 +0800 Subject: [PATCH] Inline list of bad bots to block --- .htaccess | 40 +--------------------------------------- 1 file changed, 1 insertion(+), 39 deletions(-) diff --git a/.htaccess b/.htaccess index ddfdd56..8bf6ed9 100644 --- a/.htaccess +++ b/.htaccess @@ -20,43 +20,5 @@ RewriteRule ^.+$ index.php [L] # Block bad bots -RewriteCond %{HTTP_USER_AGENT} (AI2Bot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (Ai2Bot-Dolma) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (Amazonbot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (Applebot-Extended) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (Bytespider) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (CCBot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (ChatGPT-User) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (Claude-Web) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (ClaudeBot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (Diffbot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (FacebookBot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (FriendlyCrawler) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (GPTBot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (Google-Extended) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (GoogleOther) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (GoogleOther-Image) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (GoogleOther-Video) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (ICC-Crawler) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (ISSCyberRiskCrawler) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (ImagesiftBot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (Kangaroo.*Bot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (Meta-ExternalAgent) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (Meta-ExternalFetcher) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (OAI-SearchBot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (PerplexityBot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (PetalBot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (Scrapy) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (Sidetrade.*indexer.*bot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (Timpibot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (VelenPublicWebCrawler) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (Webzio-Extended) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (YouBot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (anthropic-ai) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (cohere-ai) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (facebookexternalhit) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (iaskspider/2.0) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (img2dataset) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (omgili) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (omgilibot) [NC] +RewriteCond %{HTTP_USER_AGENT} (AI2Bot|Ai2Bot-Dolma|Amazonbot|Applebot-Extended|Bytespider|CCBot|ChatGPT-User|Claude-Web|ClaudeBot|Diffbot|FacebookBot|FriendlyCrawler|GPTBot|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|ICC-Crawler|ISSCyberRiskCrawler|ImagesiftBot|Kangaroo.*Bot|Meta-ExternalAgent|Meta-ExternalFetcher|OAI-SearchBot|PerplexityBot|PetalBot|Scrapy|Sidetrade.*indexer.*bot|Timpibot|VelenPublicWebCrawler|Webzio-Extended|YouBot|anthropic-ai|cohere-ai|facebookexternalhit|iaskspider/2.0|img2dataset|omgili|omgilibot) [NC] RewriteRule .* https://nocommercialuse.org/ [L] \ No newline at end of file