Build: (4d88d00
) Dynamically generate bad bots to block by fetching remote list
This commit is contained in:
parent
6ab5e55be7
commit
398f9719ba
34
.htaccess
34
.htaccess
|
@ -10,37 +10,5 @@ Header set Cache-Control "no-cache, public"
|
||||||
RewriteEngine on
|
RewriteEngine on
|
||||||
|
|
||||||
# Block bad bots
|
# Block bad bots
|
||||||
RewriteCond %{HTTP_USER_AGENT} (Amazonbot) [NC,OR]
|
RewriteCond %{HTTP_USER_AGENT} Amazonbot|Applebot-Extended|Bytespider|CCBot|ChatGPT-User|Claude-Web|ClaudeBot|Diffbot|FacebookBot|FriendlyCrawler|GPTBot|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|ICC-Crawler|ImagesiftBot|Meta-ExternalAgent|Meta-ExternalFetcher|OAI-SearchBot|PerplexityBot|PetalBot|Scrapy|Timpibot|VelenPublicWebCrawler|YouBot|anthropic-ai|cohere-ai|facebookexternalhit|img2dataset|omgili|omgilibot [NC]
|
||||||
RewriteCond %{HTTP_USER_AGENT} (Applebot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (Applebot-Extended) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (Bytespider) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (CCBot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (ChatGPT-User) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (Claude-Web) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (ClaudeBot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (Diffbot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (FacebookBot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (FriendlyCrawler) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (GPTBot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (Google-Extended) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (GoogleOther) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (GoogleOther-Image) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (GoogleOther-Video) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (ICC-Crawler) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (ImagesiftBot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (Meta-ExternalAgent) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (Meta-ExternalFetcher) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (OAI-SearchBot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (PerplexityBot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (PetalBot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (Scrapy) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (Timpibot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (VelenPublicWebCrawler) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (YouBot) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (anthropic-ai) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (cohere-ai) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (facebookexternalhit) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (img2dataset) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (omgili) [NC,OR]
|
|
||||||
RewriteCond %{HTTP_USER_AGENT} (omgilibot) [NC]
|
|
||||||
RewriteRule .* https://nocommercialuse.org/ [L]
|
RewriteRule .* https://nocommercialuse.org/ [L]
|
2
feed.xml
2
feed.xml
|
@ -4,7 +4,7 @@
|
||||||
<subtitle>Leilukin's personal website.</subtitle>
|
<subtitle>Leilukin's personal website.</subtitle>
|
||||||
<link href="https://leilukin.com/feed.xml" rel="self"/>
|
<link href="https://leilukin.com/feed.xml" rel="self"/>
|
||||||
<link href="https://leilukin.com"/>
|
<link href="https://leilukin.com"/>
|
||||||
<updated>2024-08-20T00:38:56Z</updated>
|
<updated>2024-08-20T01:53:56Z</updated>
|
||||||
<id>https://leilukin.com/</id>
|
<id>https://leilukin.com/</id>
|
||||||
<author>
|
<author>
|
||||||
<name>Leilukin</name>
|
<name>Leilukin</name>
|
||||||
|
|
|
@ -1,10 +1,7 @@
|
||||||
Sitemap: https://leilukin.com/sitemap/
|
|
||||||
|
|
||||||
User-agent: *
|
User-agent: *
|
||||||
Disallow:
|
Disallow:
|
||||||
|
|
||||||
User-agent: Amazonbot
|
User-agent: Amazonbot
|
||||||
User-agent: Applebot
|
|
||||||
User-agent: Applebot-Extended
|
User-agent: Applebot-Extended
|
||||||
User-agent: Bytespider
|
User-agent: Bytespider
|
||||||
User-agent: CCBot
|
User-agent: CCBot
|
||||||
|
@ -37,3 +34,5 @@ User-agent: img2dataset
|
||||||
User-agent: omgili
|
User-agent: omgili
|
||||||
User-agent: omgilibot
|
User-agent: omgilibot
|
||||||
Disallow: /
|
Disallow: /
|
||||||
|
|
||||||
|
Sitemap: https://leilukin.com/sitemap/
|
Loading…
Reference in New Issue