Inline list of bad bots to block
This commit is contained in:
parent
176d8dfc21
commit
b5eef57931
40
.htaccess
40
.htaccess
|
@ -20,43 +20,5 @@ RewriteRule ^.+$ index.php [L]
|
||||||
</IfModule>
|
</IfModule>
|
||||||
|
|
||||||
# Block bad bots
|
# Block bad bots
|
||||||
RewriteCond %{HTTP_USER_AGENT} (AI2Bot) [NC,OR]
|
RewriteCond %{HTTP_USER_AGENT} (AI2Bot|Ai2Bot-Dolma|Amazonbot|Applebot-Extended|Bytespider|CCBot|ChatGPT-User|Claude-Web|ClaudeBot|Diffbot|FacebookBot|FriendlyCrawler|GPTBot|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|ICC-Crawler|ISSCyberRiskCrawler|ImagesiftBot|Kangaroo.*Bot|Meta-ExternalAgent|Meta-ExternalFetcher|OAI-SearchBot|PerplexityBot|PetalBot|Scrapy|Sidetrade.*indexer.*bot|Timpibot|VelenPublicWebCrawler|Webzio-Extended|YouBot|anthropic-ai|cohere-ai|facebookexternalhit|iaskspider/2.0|img2dataset|omgili|omgilibot) [NC]
|
||||||
RewriteCond %{HTTP_USER_AGENT} (Ai2Bot-Dolma) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (Amazonbot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (Applebot-Extended) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (Bytespider) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (CCBot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (ChatGPT-User) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (Claude-Web) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (ClaudeBot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (Diffbot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (FacebookBot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (FriendlyCrawler) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (GPTBot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (Google-Extended) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (GoogleOther) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (GoogleOther-Image) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (GoogleOther-Video) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (ICC-Crawler) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (ISSCyberRiskCrawler) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (ImagesiftBot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (Kangaroo.*Bot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (Meta-ExternalAgent) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (Meta-ExternalFetcher) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (OAI-SearchBot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (PerplexityBot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (PetalBot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (Scrapy) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (Sidetrade.*indexer.*bot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (Timpibot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (VelenPublicWebCrawler) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (Webzio-Extended) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (YouBot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (anthropic-ai) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (cohere-ai) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (facebookexternalhit) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (iaskspider/2.0) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (img2dataset) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (omgili) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (omgilibot) [NC]
|
|
||||||
RewriteRule .* https://nocommercialuse.org/ [L]
|
RewriteRule .* https://nocommercialuse.org/ [L]
|
Loading…
Reference in New Issue