diff --git a/package.json b/package.json index 9b296af4..e87d2308 100644 --- a/package.json +++ b/package.json @@ -19,7 +19,6 @@ }, "dependencies": { "@11ty/eleventy": "^3.1.0-beta.1", - "@11ty/eleventy-fetch": "^5.1.0", "@11ty/eleventy-navigation": "^1.0.4", "@11ty/eleventy-plugin-rss": "^2.0.4", "@11ty/eleventy-plugin-syntaxhighlight": "^5.0.1", diff --git a/src/_data/robots.js b/src/_data/robots.js index aad0565f..08cbf7bf 100644 --- a/src/_data/robots.js +++ b/src/_data/robots.js @@ -1,32 +1,71 @@ -/* - Modified from Robb Knight's script: - https://rknight.me/blog/blocking-bots-with-nginx/ -*/ +const blockedUserAgents = [ + "AcademicBotRTU", + "AI2Bot", + "Ai2Bot-Dolma", + "aiHitBot", + "Amazonbot", + "anthropic-ai", + "Applebot-Extended", + "BLEXBot", + "BrandVerity/1.0", + "Brightbot 1.0", + "Bytespider", + "ChatGPT-User", + "CheckMarkNetwork/1.0", + "Claude-Web", + "ClaudeBot", + "cohere-ai", + "cohere-training-data-crawler", + "Cotoyogi", + "Crawlspace", + "Diffbot", + "DuckAssistBot", + "FacebookBot", + "Factset_spyderbot", + "FirecrawlAgent", + "FriendlyCrawler", + "Google-Extended", + "GoogleOther", + "GoogleOther-Image", + "GoogleOther-Video", + "GPTBot", + "iaskspider/2.0", + "ICC-Crawler", + "ImagesiftBot", + "img2dataset", + "imgproxy", + "ISSCyberRiskCrawler", + "Kangaroo Bot", + "meta-externalagent", + "Meta-ExternalAgent", + "meta-externalfetcher", + "Meta-ExternalFetcher", + "NovaAct", + "OAI-SearchBot", + "omgili", + "omgilibot", + "Operator", + "PanguBot", + "Perplexity-User", + "PerplexityBot", + "PetalBot", + "Scrapy", + "SemrushBot-OCOB", + "SemrushBot-SWA", + "Sidetrade indexer bot", + "SlySearch", + "TikTokSpider", + "Timpibot", + "TurnitinBot", + "VelenPublicWebCrawler", + "Webzio-Extended", + "YouBot", +]; -import EleventyFetch from "@11ty/eleventy-fetch"; +const txt = blockedUserAgents.map((bot) => `User-agent: ${bot}`).join("\n"); +const htaccess = blockedUserAgents.join("|"); -export default async function () { - const url = "https://raw.githubusercontent.com/ai-robots-txt/ai.robots.txt/refs/heads/main/robots.txt"; - let txt = await EleventyFetch(url, { - duration: "1w", - type: "text", - }); - - const botExceptions = ["Applebot", "CCBot"]; - const botExceptionsFullStr = botExceptions.map(bot => "User-agent: " + bot) - - txt = txt - .split("\n") - .filter((line) => !botExceptionsFullStr.includes(line)) - .join("\n"); - - const bots = txt - .split("\n") - .filter((line) => line.startsWith("User-agent:")) - .map((line) => line.split(":")[1].trim().replace(/\s/gi, ".*")); - - return { - txt: txt, - htaccess: bots.join('|'), - }; +export default { + txt: txt, + htaccess: htaccess.replace(/\s/gi, ".*"), } diff --git a/src/pages/statements/colophon.md b/src/pages/statements/colophon.md index c1792d6c..a0062953 100644 --- a/src/pages/statements/colophon.md +++ b/src/pages/statements/colophon.md @@ -1,7 +1,7 @@ --- title: Colophon keyword: colophon page -updated: 2025-05-09T09:51:16+0800 +updated: 2025-05-09T18:42:58+0800 toc: true eleventyNavigation: order: 16 @@ -29,7 +29,6 @@ Previously, this website was hosted on [Neocities](https://neocities.org/) until * Eleventy's official [RSS](https://www.11ty.dev/docs/plugins/rss/) plugin * Eleventy's official [Navigation](https://www.11ty.dev/docs/plugins/navigation/) plugin * Eleventy's official [Syntax Highlighting](https://www.11ty.dev/docs/plugins/syntaxhighlight/) plugin -* Eleventy's official [Fetch](https://www.11ty.dev/docs/plugins/fetch/) plugin * [@zachleat/details-utils](https://www.npmjs.com/package/@zachleat/details-utils) ### Eleventy Community Plugins diff --git a/src/robots.txt.vto b/src/robots.txt.vto index fdb9cd8d..084d308f 100644 --- a/src/robots.txt.vto +++ b/src/robots.txt.vto @@ -6,5 +6,6 @@ User-agent: * Disallow: {{ robots.txt }} +Disallow: / -Sitemap: {{ sitemeta.siteUrl }}/sitemap.xml \ No newline at end of file +Sitemap: {{ sitemeta.siteUrl }}/sitemap.xml