Compare commits

...

6 Commits

Author SHA1 Message Date
Helen Chong 4c944c16d7 Changing browser testing wording 2024-08-20 09:58:15 +08:00
Helen Chong 221c05a87a Add Eleventy's official fetch plugin 2024-08-20 09:57:08 +08:00
Helen Chong 31a68a4f77 Add browser used to test 2024-08-20 09:56:53 +08:00
Helen Chong 4d88d0033b Dynamically generate bad bots to block by fetching remote list 2024-08-20 09:53:37 +08:00
Helen Chong 31631b9fe6 Remove slash from permalink 2024-08-20 09:17:10 +08:00
Helen Chong 483d395416 Install Eleventy Fetch plugin 2024-08-20 09:16:00 +08:00
6 changed files with 41 additions and 71 deletions

BIN
bun.lockb

Binary file not shown.

View File

@ -10,6 +10,7 @@
},
"dependencies": {
"@11ty/eleventy": "^3.0.0-beta.1",
"@11ty/eleventy-fetch": "^4.0.1",
"@11ty/eleventy-navigation": "^0.3.5",
"@11ty/eleventy-plugin-rss": "^2.0.2",
"@11ty/eleventy-plugin-syntaxhighlight": "^5.0.0",

View File

@ -14,37 +14,5 @@ Header set Cache-Control "no-cache, public"
RewriteEngine on
# Block bad bots
RewriteCond %{HTTP_USER_AGENT} (Amazonbot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (Applebot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (Applebot-Extended) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (Bytespider) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (CCBot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (ChatGPT-User) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (Claude-Web) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (ClaudeBot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (Diffbot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (FacebookBot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (FriendlyCrawler) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (GPTBot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (Google-Extended) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (GoogleOther) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (GoogleOther-Image) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (GoogleOther-Video) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (ICC-Crawler) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (ImagesiftBot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (Meta-ExternalAgent) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (Meta-ExternalFetcher) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (OAI-SearchBot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (PerplexityBot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (PetalBot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (Scrapy) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (Timpibot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (VelenPublicWebCrawler) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (YouBot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (anthropic-ai) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (cohere-ai) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (facebookexternalhit) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (img2dataset) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (omgili) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (omgilibot) [NC]
RewriteCond %{HTTP_USER_AGENT} {{ robots.htaccess }} [NC]
RewriteRule .* https://nocommercialuse.org/ [L]

32
src/_data/robots.js Normal file
View File

@ -0,0 +1,32 @@
// by Robb Knight: https://rknight.me/blog/blocking-bots-with-nginx/
import EleventyFetch from "@11ty/eleventy-fetch";
export default async function () {
const url =
"https://raw.githubusercontent.com/ai-robots-txt/ai.robots.txt/main/robots.txt";
let txt = await EleventyFetch(url, {
duration: "1w",
type: "text",
});
txt = txt
.split("\n")
.filter((line) => line !== "User-agent: Applebot")
.join("\n");
const bots = txt
.split("\n")
.filter((line) => {
return (
line.startsWith("User-agent:") &&
line !== "User-agent: Applebot"
);
})
.map((line) => line.split(":")[1].trim());
return {
txt: txt,
htaccess: bots.join("|"),
};
}

View File

@ -1,43 +1,10 @@
---
permalink: /robots.txt
permalink: robots.txt
eleventyExcludeFromCollections: true
---
Sitemap: {{ sitemeta.siteUrl }}/sitemap/
User-agent: *
Disallow:
User-agent: Amazonbot
User-agent: Applebot
User-agent: Applebot-Extended
User-agent: Bytespider
User-agent: CCBot
User-agent: ChatGPT-User
User-agent: Claude-Web
User-agent: ClaudeBot
User-agent: Diffbot
User-agent: FacebookBot
User-agent: FriendlyCrawler
User-agent: GPTBot
User-agent: Google-Extended
User-agent: GoogleOther
User-agent: GoogleOther-Image
User-agent: GoogleOther-Video
User-agent: ICC-Crawler
User-agent: ImagesiftBot
User-agent: Meta-ExternalAgent
User-agent: Meta-ExternalFetcher
User-agent: OAI-SearchBot
User-agent: PerplexityBot
User-agent: PetalBot
User-agent: Scrapy
User-agent: Timpibot
User-agent: VelenPublicWebCrawler
User-agent: YouBot
User-agent: anthropic-ai
User-agent: cohere-ai
User-agent: facebookexternalhit
User-agent: img2dataset
User-agent: omgili
User-agent: omgilibot
Disallow: /
{{ robots.txt }}
Sitemap: {{ sitemeta.siteUrl }}/sitemap/

View File

@ -1,7 +1,7 @@
---
title: Colophon
keyword: colophon page
updated: 2024-08-02
updated: 2024-08-20T09:55:39+0800
toc: true
eleventyNavigation:
order: 14
@ -14,6 +14,7 @@ Information about how this website is built.
* Templating languages: Nunjucks and Markdown
* Code written with Visual Studio Code
* JavaScript runtime: [Bun](https://bun.sh)
* Tested browsers: Mozilla Firefox and Lynx
* Source code available on [GitHub](https://github.com/helenclx/leilukin-site) and its [Gitea mirror](https://git.32bit.cafe/Leilukin/leilukin-site)
## Host and Domain
@ -26,6 +27,7 @@ Previously, this website was hosted on [Neocities](https://neocities.org/) until
* Eleventy's official [RSS](https://www.11ty.dev/docs/plugins/rss/) plugin
* Eleventy's official [Navigation](https://www.11ty.dev/docs/plugins/navigation/) plugin
* Eleventy's official [Syntax Highlighting](https://www.11ty.dev/docs/plugins/syntaxhighlight/) plugin
* Eleventy's official [Fetch](https://www.11ty.dev/docs/plugins/fetch/) plugin
* [@zachleat/details-utils](https://www.npmjs.com/package/@zachleat/details-utils)
### Eleventy Community Plugins