Inline list of bad bots to block

This commit is contained in:
Helen Chong 2024-10-05 16:13:37 +08:00
parent 176d8dfc21
commit b5eef57931
1 changed files with 1 additions and 39 deletions

View File

@ -20,43 +20,5 @@ RewriteRule ^.+$ index.php [L]
</IfModule> </IfModule>
# Block bad bots # Block bad bots
RewriteCond %{HTTP_USER_AGENT} (AI2Bot) [NC,OR] RewriteCond %{HTTP_USER_AGENT} (AI2Bot|Ai2Bot-Dolma|Amazonbot|Applebot-Extended|Bytespider|CCBot|ChatGPT-User|Claude-Web|ClaudeBot|Diffbot|FacebookBot|FriendlyCrawler|GPTBot|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|ICC-Crawler|ISSCyberRiskCrawler|ImagesiftBot|Kangaroo.*Bot|Meta-ExternalAgent|Meta-ExternalFetcher|OAI-SearchBot|PerplexityBot|PetalBot|Scrapy|Sidetrade.*indexer.*bot|Timpibot|VelenPublicWebCrawler|Webzio-Extended|YouBot|anthropic-ai|cohere-ai|facebookexternalhit|iaskspider/2.0|img2dataset|omgili|omgilibot) [NC]
RewriteCond %{HTTP_USER_AGENT} (Ai2Bot-Dolma) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (Amazonbot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (Applebot-Extended) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (Bytespider) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (CCBot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (ChatGPT-User) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (Claude-Web) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (ClaudeBot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (Diffbot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (FacebookBot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (FriendlyCrawler) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (GPTBot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (Google-Extended) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (GoogleOther) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (GoogleOther-Image) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (GoogleOther-Video) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (ICC-Crawler) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (ISSCyberRiskCrawler) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (ImagesiftBot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (Kangaroo.*Bot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (Meta-ExternalAgent) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (Meta-ExternalFetcher) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (OAI-SearchBot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (PerplexityBot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (PetalBot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (Scrapy) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (Sidetrade.*indexer.*bot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (Timpibot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (VelenPublicWebCrawler) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (Webzio-Extended) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (YouBot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (anthropic-ai) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (cohere-ai) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (facebookexternalhit) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (iaskspider/2.0) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (img2dataset) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (omgili) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (omgilibot) [NC]
RewriteRule .* https://nocommercialuse.org/ [L] RewriteRule .* https://nocommercialuse.org/ [L]