From 111ea627df5a1e6682f2d7a248aa6f87e044909e Mon Sep 17 00:00:00 2001 From: Helen Chong <119173961+helenclx@users.noreply.github.com> Date: Sat, 19 Apr 2025 07:28:55 +0800 Subject: [PATCH] Add imgproxy, NovaAct and Operator --- .htaccess | 4 ++-- robots.txt | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.htaccess b/.htaccess index 89aa91d..6118bd7 100644 --- a/.htaccess +++ b/.htaccess @@ -20,5 +20,5 @@ RewriteRule ^.+$ index.php [L] # Block bad bots -RewriteCond %{HTTP_USER_AGENT} (AI2Bot|Ai2Bot-Dolma|Amazonbot|anthropic-ai|Applebot-Extended|Brightbot.*1.0|Bytespider|ChatGPT-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Diffbot|DuckAssistBot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo.*Bot|Meta-ExternalAgent|Meta-ExternalFetcher|OAI-SearchBot|omgili|omgilibot|PanguBot|PerplexityBot|PetalBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade.*indexer.*bot|Timpibot|VelenPublicWebCrawler|Webzio-Extended|YouBot) [NC] -RewriteRule .* https://nocommercialuse.org/ [L] \ No newline at end of file +RewriteCond %{HTTP_USER_AGENT} (AI2Bot|Ai2Bot-Dolma|Amazonbot|anthropic-ai|Applebot-Extended|Brightbot.*1.0|Bytespider|ChatGPT-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Diffbot|DuckAssistBot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|imgproxy|ISSCyberRiskCrawler|Kangaroo.*Bot|Meta-ExternalAgent|Meta-ExternalFetcher|NovaAct|OAI-SearchBot|omgili|omgilibot|Operator|PanguBot|PerplexityBot|PetalBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade.*indexer.*bot|Timpibot|VelenPublicWebCrawler|Webzio-Extended|YouBot) [NC] +RewriteRule .* https://nocommercialuse.org/ [L] diff --git a/robots.txt b/robots.txt index 87ca0e1..2a0aaeb 100644 --- a/robots.txt +++ b/robots.txt @@ -26,13 +26,16 @@ User-agent: iaskspider/2.0 User-agent: ICC-Crawler User-agent: ImagesiftBot User-agent: img2dataset +User-agent: imgproxy User-agent: ISSCyberRiskCrawler User-agent: Kangaroo Bot User-agent: Meta-ExternalAgent User-agent: Meta-ExternalFetcher +User-agent: NovaAct User-agent: OAI-SearchBot User-agent: omgili User-agent: omgilibot +User-agent: Operator User-agent: PanguBot User-agent: PerplexityBot User-agent: PetalBot @@ -44,4 +47,4 @@ User-agent: Timpibot User-agent: VelenPublicWebCrawler User-agent: Webzio-Extended User-agent: YouBot -Disallow: / \ No newline at end of file +Disallow: /