Refactor bot blocking setup
This commit is contained in:
parent
2e9503a008
commit
e0cd4713d3
@ -19,7 +19,6 @@
|
|||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@11ty/eleventy": "^3.1.0-beta.1",
|
"@11ty/eleventy": "^3.1.0-beta.1",
|
||||||
"@11ty/eleventy-fetch": "^5.1.0",
|
|
||||||
"@11ty/eleventy-navigation": "^1.0.4",
|
"@11ty/eleventy-navigation": "^1.0.4",
|
||||||
"@11ty/eleventy-plugin-rss": "^2.0.4",
|
"@11ty/eleventy-plugin-rss": "^2.0.4",
|
||||||
"@11ty/eleventy-plugin-syntaxhighlight": "^5.0.1",
|
"@11ty/eleventy-plugin-syntaxhighlight": "^5.0.1",
|
||||||
|
@ -1,32 +1,71 @@
|
|||||||
/*
|
const blockedUserAgents = [
|
||||||
Modified from Robb Knight's script:
|
"AcademicBotRTU",
|
||||||
https://rknight.me/blog/blocking-bots-with-nginx/
|
"AI2Bot",
|
||||||
*/
|
"Ai2Bot-Dolma",
|
||||||
|
"aiHitBot",
|
||||||
|
"Amazonbot",
|
||||||
|
"anthropic-ai",
|
||||||
|
"Applebot-Extended",
|
||||||
|
"BLEXBot",
|
||||||
|
"BrandVerity/1.0",
|
||||||
|
"Brightbot 1.0",
|
||||||
|
"Bytespider",
|
||||||
|
"ChatGPT-User",
|
||||||
|
"CheckMarkNetwork/1.0",
|
||||||
|
"Claude-Web",
|
||||||
|
"ClaudeBot",
|
||||||
|
"cohere-ai",
|
||||||
|
"cohere-training-data-crawler",
|
||||||
|
"Cotoyogi",
|
||||||
|
"Crawlspace",
|
||||||
|
"Diffbot",
|
||||||
|
"DuckAssistBot",
|
||||||
|
"FacebookBot",
|
||||||
|
"Factset_spyderbot",
|
||||||
|
"FirecrawlAgent",
|
||||||
|
"FriendlyCrawler",
|
||||||
|
"Google-Extended",
|
||||||
|
"GoogleOther",
|
||||||
|
"GoogleOther-Image",
|
||||||
|
"GoogleOther-Video",
|
||||||
|
"GPTBot",
|
||||||
|
"iaskspider/2.0",
|
||||||
|
"ICC-Crawler",
|
||||||
|
"ImagesiftBot",
|
||||||
|
"img2dataset",
|
||||||
|
"imgproxy",
|
||||||
|
"ISSCyberRiskCrawler",
|
||||||
|
"Kangaroo Bot",
|
||||||
|
"meta-externalagent",
|
||||||
|
"Meta-ExternalAgent",
|
||||||
|
"meta-externalfetcher",
|
||||||
|
"Meta-ExternalFetcher",
|
||||||
|
"NovaAct",
|
||||||
|
"OAI-SearchBot",
|
||||||
|
"omgili",
|
||||||
|
"omgilibot",
|
||||||
|
"Operator",
|
||||||
|
"PanguBot",
|
||||||
|
"Perplexity-User",
|
||||||
|
"PerplexityBot",
|
||||||
|
"PetalBot",
|
||||||
|
"Scrapy",
|
||||||
|
"SemrushBot-OCOB",
|
||||||
|
"SemrushBot-SWA",
|
||||||
|
"Sidetrade indexer bot",
|
||||||
|
"SlySearch",
|
||||||
|
"TikTokSpider",
|
||||||
|
"Timpibot",
|
||||||
|
"TurnitinBot",
|
||||||
|
"VelenPublicWebCrawler",
|
||||||
|
"Webzio-Extended",
|
||||||
|
"YouBot",
|
||||||
|
];
|
||||||
|
|
||||||
import EleventyFetch from "@11ty/eleventy-fetch";
|
const txt = blockedUserAgents.map((bot) => `User-agent: ${bot}`).join("\n");
|
||||||
|
const htaccess = blockedUserAgents.join("|");
|
||||||
|
|
||||||
export default async function () {
|
export default {
|
||||||
const url = "https://raw.githubusercontent.com/ai-robots-txt/ai.robots.txt/refs/heads/main/robots.txt";
|
txt: txt,
|
||||||
let txt = await EleventyFetch(url, {
|
htaccess: htaccess.replace(/\s/gi, ".*"),
|
||||||
duration: "1w",
|
|
||||||
type: "text",
|
|
||||||
});
|
|
||||||
|
|
||||||
const botExceptions = ["Applebot", "CCBot"];
|
|
||||||
const botExceptionsFullStr = botExceptions.map(bot => "User-agent: " + bot)
|
|
||||||
|
|
||||||
txt = txt
|
|
||||||
.split("\n")
|
|
||||||
.filter((line) => !botExceptionsFullStr.includes(line))
|
|
||||||
.join("\n");
|
|
||||||
|
|
||||||
const bots = txt
|
|
||||||
.split("\n")
|
|
||||||
.filter((line) => line.startsWith("User-agent:"))
|
|
||||||
.map((line) => line.split(":")[1].trim().replace(/\s/gi, ".*"));
|
|
||||||
|
|
||||||
return {
|
|
||||||
txt: txt,
|
|
||||||
htaccess: bots.join('|'),
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
---
|
---
|
||||||
title: Colophon
|
title: Colophon
|
||||||
keyword: colophon page
|
keyword: colophon page
|
||||||
updated: 2025-05-09T09:51:16+0800
|
updated: 2025-05-09T18:42:58+0800
|
||||||
toc: true
|
toc: true
|
||||||
eleventyNavigation:
|
eleventyNavigation:
|
||||||
order: 16
|
order: 16
|
||||||
@ -29,7 +29,6 @@ Previously, this website was hosted on [Neocities](https://neocities.org/) until
|
|||||||
* Eleventy's official [RSS](https://www.11ty.dev/docs/plugins/rss/) plugin
|
* Eleventy's official [RSS](https://www.11ty.dev/docs/plugins/rss/) plugin
|
||||||
* Eleventy's official [Navigation](https://www.11ty.dev/docs/plugins/navigation/) plugin
|
* Eleventy's official [Navigation](https://www.11ty.dev/docs/plugins/navigation/) plugin
|
||||||
* Eleventy's official [Syntax Highlighting](https://www.11ty.dev/docs/plugins/syntaxhighlight/) plugin
|
* Eleventy's official [Syntax Highlighting](https://www.11ty.dev/docs/plugins/syntaxhighlight/) plugin
|
||||||
* Eleventy's official [Fetch](https://www.11ty.dev/docs/plugins/fetch/) plugin
|
|
||||||
* [@zachleat/details-utils](https://www.npmjs.com/package/@zachleat/details-utils)
|
* [@zachleat/details-utils](https://www.npmjs.com/package/@zachleat/details-utils)
|
||||||
|
|
||||||
### Eleventy Community Plugins
|
### Eleventy Community Plugins
|
||||||
|
@ -6,5 +6,6 @@ User-agent: *
|
|||||||
Disallow:
|
Disallow:
|
||||||
|
|
||||||
{{ robots.txt }}
|
{{ robots.txt }}
|
||||||
|
Disallow: /
|
||||||
|
|
||||||
Sitemap: {{ sitemeta.siteUrl }}/sitemap.xml
|
Sitemap: {{ sitemeta.siteUrl }}/sitemap.xml
|
||||||
|
Loading…
x
Reference in New Issue
Block a user