leilukin-site/src/_data/robots.js

72 lines
1.4 KiB
JavaScript

const blockedUserAgents = [
"AcademicBotRTU",
"AI2Bot",
"Ai2Bot-Dolma",
"aiHitBot",
"Amazonbot",
"anthropic-ai",
"Applebot-Extended",
"BLEXBot",
"BrandVerity/1.0",
"Brightbot 1.0",
"Bytespider",
"ChatGPT-User",
"CheckMarkNetwork/1.0",
"Claude-Web",
"ClaudeBot",
"cohere-ai",
"cohere-training-data-crawler",
"Cotoyogi",
"Crawlspace",
"Diffbot",
"DuckAssistBot",
"FacebookBot",
"Factset_spyderbot",
"FirecrawlAgent",
"FriendlyCrawler",
"Google-Extended",
"GoogleOther",
"GoogleOther-Image",
"GoogleOther-Video",
"GPTBot",
"iaskspider/2.0",
"ICC-Crawler",
"ImagesiftBot",
"img2dataset",
"imgproxy",
"ISSCyberRiskCrawler",
"Kangaroo Bot",
"meta-externalagent",
"Meta-ExternalAgent",
"meta-externalfetcher",
"Meta-ExternalFetcher",
"NovaAct",
"OAI-SearchBot",
"omgili",
"omgilibot",
"Operator",
"PanguBot",
"Perplexity-User",
"PerplexityBot",
"PetalBot",
"Scrapy",
"SemrushBot-OCOB",
"SemrushBot-SWA",
"Sidetrade indexer bot",
"SlySearch",
"TikTokSpider",
"Timpibot",
"TurnitinBot",
"VelenPublicWebCrawler",
"Webzio-Extended",
"YouBot",
];
const txt = blockedUserAgents.map((bot) => `User-agent: ${bot}`).join("\n");
const htaccess = blockedUserAgents.join("|");
export default {
txt: txt,
htaccess: htaccess.replace(/\s/gi, ".*"),
}