Refactor bot blocking setup

This commit is contained in:
Leilukin 2025-05-09 18:43:28 +08:00
parent 2e9503a008
commit e0cd4713d3
4 changed files with 71 additions and 33 deletions

View File

@ -19,7 +19,6 @@
},
"dependencies": {
"@11ty/eleventy": "^3.1.0-beta.1",
"@11ty/eleventy-fetch": "^5.1.0",
"@11ty/eleventy-navigation": "^1.0.4",
"@11ty/eleventy-plugin-rss": "^2.0.4",
"@11ty/eleventy-plugin-syntaxhighlight": "^5.0.1",

View File

@ -1,32 +1,71 @@
/*
Modified from Robb Knight's script:
https://rknight.me/blog/blocking-bots-with-nginx/
*/
const blockedUserAgents = [
"AcademicBotRTU",
"AI2Bot",
"Ai2Bot-Dolma",
"aiHitBot",
"Amazonbot",
"anthropic-ai",
"Applebot-Extended",
"BLEXBot",
"BrandVerity/1.0",
"Brightbot 1.0",
"Bytespider",
"ChatGPT-User",
"CheckMarkNetwork/1.0",
"Claude-Web",
"ClaudeBot",
"cohere-ai",
"cohere-training-data-crawler",
"Cotoyogi",
"Crawlspace",
"Diffbot",
"DuckAssistBot",
"FacebookBot",
"Factset_spyderbot",
"FirecrawlAgent",
"FriendlyCrawler",
"Google-Extended",
"GoogleOther",
"GoogleOther-Image",
"GoogleOther-Video",
"GPTBot",
"iaskspider/2.0",
"ICC-Crawler",
"ImagesiftBot",
"img2dataset",
"imgproxy",
"ISSCyberRiskCrawler",
"Kangaroo Bot",
"meta-externalagent",
"Meta-ExternalAgent",
"meta-externalfetcher",
"Meta-ExternalFetcher",
"NovaAct",
"OAI-SearchBot",
"omgili",
"omgilibot",
"Operator",
"PanguBot",
"Perplexity-User",
"PerplexityBot",
"PetalBot",
"Scrapy",
"SemrushBot-OCOB",
"SemrushBot-SWA",
"Sidetrade indexer bot",
"SlySearch",
"TikTokSpider",
"Timpibot",
"TurnitinBot",
"VelenPublicWebCrawler",
"Webzio-Extended",
"YouBot",
];
import EleventyFetch from "@11ty/eleventy-fetch";
const txt = blockedUserAgents.map((bot) => `User-agent: ${bot}`).join("\n");
const htaccess = blockedUserAgents.join("|");
export default async function () {
const url = "https://raw.githubusercontent.com/ai-robots-txt/ai.robots.txt/refs/heads/main/robots.txt";
let txt = await EleventyFetch(url, {
duration: "1w",
type: "text",
});
const botExceptions = ["Applebot", "CCBot"];
const botExceptionsFullStr = botExceptions.map(bot => "User-agent: " + bot)
txt = txt
.split("\n")
.filter((line) => !botExceptionsFullStr.includes(line))
.join("\n");
const bots = txt
.split("\n")
.filter((line) => line.startsWith("User-agent:"))
.map((line) => line.split(":")[1].trim().replace(/\s/gi, ".*"));
return {
txt: txt,
htaccess: bots.join('|'),
};
export default {
txt: txt,
htaccess: htaccess.replace(/\s/gi, ".*"),
}

View File

@ -1,7 +1,7 @@
---
title: Colophon
keyword: colophon page
updated: 2025-05-09T09:51:16+0800
updated: 2025-05-09T18:42:58+0800
toc: true
eleventyNavigation:
order: 16
@ -29,7 +29,6 @@ Previously, this website was hosted on [Neocities](https://neocities.org/) until
* Eleventy's official [RSS](https://www.11ty.dev/docs/plugins/rss/) plugin
* Eleventy's official [Navigation](https://www.11ty.dev/docs/plugins/navigation/) plugin
* Eleventy's official [Syntax Highlighting](https://www.11ty.dev/docs/plugins/syntaxhighlight/) plugin
* Eleventy's official [Fetch](https://www.11ty.dev/docs/plugins/fetch/) plugin
* [@zachleat/details-utils](https://www.npmjs.com/package/@zachleat/details-utils)
### Eleventy Community Plugins

View File

@ -6,5 +6,6 @@ User-agent: *
Disallow:
{{ robots.txt }}
Disallow: /
Sitemap: {{ sitemeta.siteUrl }}/sitemap.xml
Sitemap: {{ sitemeta.siteUrl }}/sitemap.xml