Update bad bot block list
This commit is contained in:
parent
fa95ff36ea
commit
72fb410503
|
@ -20,5 +20,5 @@ RewriteRule ^.+$ index.php [L]
|
|||
</IfModule>
|
||||
|
||||
# Block bad bots
|
||||
RewriteCond %{HTTP_USER_AGENT} (AI2Bot|Ai2Bot-Dolma|Amazonbot|Applebot-Extended|Bytespider|CCBot|ChatGPT-User|Claude-Web|ClaudeBot|Diffbot|DuckAssistBot|FacebookBot|FriendlyCrawler|GPTBot|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|ICC-Crawler|ISSCyberRiskCrawler|ImagesiftBot|Kangaroo.*Bot|Meta-ExternalAgent|Meta-ExternalFetcher|OAI-SearchBot|PerplexityBot|PetalBot|Scrapy|Sidetrade.*indexer.*bot|Timpibot|VelenPublicWebCrawler|Webzio-Extended|YouBot|anthropic-ai|cohere-ai|facebookexternalhit|iaskspider/2.0|img2dataset|omgili|omgilibot) [NC]
|
||||
RewriteCond %{HTTP_USER_AGENT} (AI2Bot|Ai2Bot-Dolma|Amazonbot|anthropic-ai|Applebot-Extended|Bytespider|CCBot|ChatGPT-User|Claude-Web|ClaudeBot|cohere-ai|Diffbot|DuckAssistBot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo.*Bot|Meta-ExternalAgent|Meta-ExternalFetcher|OAI-SearchBot|omgili|omgilibot|PanguBot|PerplexityBot|PetalBot|Scrapy|Sidetrade.*indexer.*bot|Timpibot|VelenPublicWebCrawler|Webzio-Extended|YouBot) [NC]
|
||||
RewriteRule .* https://nocommercialuse.org/ [L]
|
18
robots.txt
18
robots.txt
|
@ -4,28 +4,35 @@ Disallow:
|
|||
User-agent: AI2Bot
|
||||
User-agent: Ai2Bot-Dolma
|
||||
User-agent: Amazonbot
|
||||
User-agent: anthropic-ai
|
||||
User-agent: Applebot-Extended
|
||||
User-agent: Bytespider
|
||||
User-agent: CCBot
|
||||
User-agent: ChatGPT-User
|
||||
User-agent: Claude-Web
|
||||
User-agent: ClaudeBot
|
||||
User-agent: cohere-ai
|
||||
User-agent: Diffbot
|
||||
User-agent: DuckAssistBot
|
||||
User-agent: FacebookBot
|
||||
User-agent: FriendlyCrawler
|
||||
User-agent: GPTBot
|
||||
User-agent: Google-Extended
|
||||
User-agent: GoogleOther
|
||||
User-agent: GoogleOther-Image
|
||||
User-agent: GoogleOther-Video
|
||||
User-agent: GPTBot
|
||||
User-agent: iaskspider/2.0
|
||||
User-agent: ICC-Crawler
|
||||
User-agent: ISSCyberRiskCrawler
|
||||
User-agent: ImagesiftBot
|
||||
User-agent: img2dataset
|
||||
User-agent: ISSCyberRiskCrawler
|
||||
User-agent: Kangaroo Bot
|
||||
User-agent: Meta-ExternalAgent
|
||||
User-agent: Meta-ExternalFetcher
|
||||
User-agent: OAI-SearchBot
|
||||
User-agent: omgili
|
||||
User-agent: omgilibot
|
||||
User-agent: PanguBot
|
||||
User-agent: PerplexityBot
|
||||
User-agent: PetalBot
|
||||
User-agent: Scrapy
|
||||
|
@ -34,11 +41,4 @@ User-agent: Timpibot
|
|||
User-agent: VelenPublicWebCrawler
|
||||
User-agent: Webzio-Extended
|
||||
User-agent: YouBot
|
||||
User-agent: anthropic-ai
|
||||
User-agent: cohere-ai
|
||||
User-agent: facebookexternalhit
|
||||
User-agent: iaskspider/2.0
|
||||
User-agent: img2dataset
|
||||
User-agent: omgili
|
||||
User-agent: omgilibot
|
||||
Disallow: /
|
Loading…
Reference in New Issue