From 398f9719ba0a5b7562e0b042aba03dd52bd6a5c6 Mon Sep 17 00:00:00 2001 From: helenclx Date: Tue, 20 Aug 2024 01:53:59 +0000 Subject: [PATCH] Build: (4d88d00) Dynamically generate bad bots to block by fetching remote list --- .htaccess | 34 +--------------------------------- feed.xml | 2 +- robots.txt | 7 +++---- 3 files changed, 5 insertions(+), 38 deletions(-) diff --git a/.htaccess b/.htaccess index 799af670..8e4b2836 100644 --- a/.htaccess +++ b/.htaccess @@ -10,37 +10,5 @@ Header set Cache-Control "no-cache, public" RewriteEngine on # Block bad bots -RewriteCond %{HTTP_USER_AGENT} (Amazonbot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (Applebot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (Applebot-Extended) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (Bytespider) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (CCBot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (ChatGPT-User) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (Claude-Web) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (ClaudeBot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (Diffbot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (FacebookBot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (FriendlyCrawler) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (GPTBot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (Google-Extended) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (GoogleOther) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (GoogleOther-Image) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (GoogleOther-Video) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (ICC-Crawler) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (ImagesiftBot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (Meta-ExternalAgent) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (Meta-ExternalFetcher) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (OAI-SearchBot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (PerplexityBot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (PetalBot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (Scrapy) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (Timpibot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (VelenPublicWebCrawler) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (YouBot) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (anthropic-ai) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (cohere-ai) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (facebookexternalhit) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (img2dataset) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (omgili) [NC,OR] -RewriteCond %{HTTP_USER_AGENT} (omgilibot) [NC] +RewriteCond %{HTTP_USER_AGENT} Amazonbot|Applebot-Extended|Bytespider|CCBot|ChatGPT-User|Claude-Web|ClaudeBot|Diffbot|FacebookBot|FriendlyCrawler|GPTBot|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|ICC-Crawler|ImagesiftBot|Meta-ExternalAgent|Meta-ExternalFetcher|OAI-SearchBot|PerplexityBot|PetalBot|Scrapy|Timpibot|VelenPublicWebCrawler|YouBot|anthropic-ai|cohere-ai|facebookexternalhit|img2dataset|omgili|omgilibot [NC] RewriteRule .* https://nocommercialuse.org/ [L] \ No newline at end of file diff --git a/feed.xml b/feed.xml index 7a9d7c80..9c073bee 100644 --- a/feed.xml +++ b/feed.xml @@ -4,7 +4,7 @@ Leilukin's personal website. - 2024-08-20T00:38:56Z + 2024-08-20T01:53:56Z https://leilukin.com/ Leilukin diff --git a/robots.txt b/robots.txt index 31af8802..ae08300a 100644 --- a/robots.txt +++ b/robots.txt @@ -1,10 +1,7 @@ -Sitemap: https://leilukin.com/sitemap/ - User-agent: * Disallow: User-agent: Amazonbot -User-agent: Applebot User-agent: Applebot-Extended User-agent: Bytespider User-agent: CCBot @@ -36,4 +33,6 @@ User-agent: facebookexternalhit User-agent: img2dataset User-agent: omgili User-agent: omgilibot -Disallow: / \ No newline at end of file +Disallow: / + +Sitemap: https://leilukin.com/sitemap/ \ No newline at end of file