add ai.txt and robots.txt created by starbreaker
This commit is contained in:
parent
b5f6248be6
commit
e067b9cd74
82
public/ai.txt
Normal file
82
public/ai.txt
Normal file
@ -0,0 +1,82 @@
|
||||
# thank you to starbreaker for making his robots.txt
|
||||
# and ai.txt files available for others to use
|
||||
|
||||
# Want my work for your dataset? Fuck you, pay me.
|
||||
|
||||
User-Agent: *
|
||||
|
||||
# Text Permissions
|
||||
Disallow: *.txt
|
||||
Disallow: *.md
|
||||
Disallow: *.markdown
|
||||
Disallow: *.org
|
||||
Disallow: *.rst
|
||||
Disallow: *.pdf
|
||||
Disallow: *.doc
|
||||
Disallow: *.docx
|
||||
Disallow: *.odt
|
||||
Disallow: *.rtf
|
||||
Disallow: *.tex
|
||||
Disallow: *.wks
|
||||
Disallow: *.wpd
|
||||
Disallow: *.wps
|
||||
Disallow: *.htm
|
||||
Disallow: *.html
|
||||
Disallow: *.shtml
|
||||
Disallow: *.xml
|
||||
Disallow: *.rss
|
||||
|
||||
# Images Permissions
|
||||
Disallow: *.bmp
|
||||
Disallow: *.gif
|
||||
Disallow: *.ico
|
||||
Disallow: *.jpeg
|
||||
Disallow: *.jpg
|
||||
Disallow: *.png
|
||||
Disallow: *.svg
|
||||
Disallow: *.tif
|
||||
Disallow: *.tiff
|
||||
Disallow: *.webp
|
||||
|
||||
# Audio Permissions
|
||||
Disallow: *.aac
|
||||
Disallow: *.aiff
|
||||
Disallow: *.amr
|
||||
Disallow: *.flac
|
||||
Disallow: *.m4a
|
||||
Disallow: *.mp3
|
||||
Disallow: *.oga
|
||||
Disallow: *.opus
|
||||
Disallow: *.wav
|
||||
Disallow: *.wma
|
||||
|
||||
# Video Permissions
|
||||
Disallow: *.mp4
|
||||
Disallow: *.webm
|
||||
Disallow: *.ogg
|
||||
Disallow: *.avi
|
||||
Disallow: *.mov
|
||||
Disallow: *.wmv
|
||||
Disallow: *.flv
|
||||
Disallow: *.mkv
|
||||
|
||||
# Code Permissions
|
||||
Disallow: *.py
|
||||
Disallow: *.js
|
||||
Disallow: *.java
|
||||
Disallow: *.c
|
||||
Disallow: *.cpp
|
||||
Disallow: *.cs
|
||||
Disallow: *.h
|
||||
Disallow: *.css
|
||||
Disallow: *.php
|
||||
Disallow: *.swift
|
||||
Disallow: *.go
|
||||
Disallow: *.rb
|
||||
Disallow: *.pl
|
||||
Disallow: *.sh
|
||||
Disallow: *.sql
|
||||
Disallow: *.xsl
|
||||
|
||||
# Disallow
|
||||
Disallow: /
|
59
public/robots.txt
Normal file
59
public/robots.txt
Normal file
@ -0,0 +1,59 @@
|
||||
# thank you to starbreaker for making his robots.txt
|
||||
# and ai.txt files available for others to use
|
||||
|
||||
# Operating an AI crawler?
|
||||
# Kill yourself.
|
||||
# Do not pass Go.
|
||||
# Do not collect $200.
|
||||
# Go directly to Hell.
|
||||
|
||||
User-agent: AI2Bot
|
||||
User-agent: Ai2Bot-Dolma
|
||||
User-agent: Amazonbot
|
||||
User-agent: anthropic-ai
|
||||
User-agent: Applebot
|
||||
User-agent: Applebot-Extended
|
||||
User-agent: Bytespider
|
||||
User-agent: CCBot
|
||||
User-agent: ChatGPT-User
|
||||
User-agent: Claude-Web
|
||||
User-agent: ClaudeBot
|
||||
User-agent: cohere-ai
|
||||
User-agent: Diffbot
|
||||
User-agent: DuckAssistBot
|
||||
User-agent: FacebookBot
|
||||
User-agent: FriendlyCrawler
|
||||
User-agent: Google-Extended
|
||||
User-agent: GoogleOther
|
||||
User-agent: GoogleOther-Image
|
||||
User-agent: GoogleOther-Video
|
||||
User-agent: GPTBot
|
||||
User-agent: iaskspider/2.0
|
||||
User-agent: ICC-Crawler
|
||||
User-agent: ImagesiftBot
|
||||
User-agent: img2dataset
|
||||
User-agent: ISSCyberRiskCrawler
|
||||
User-agent: Kangaroo Bot
|
||||
User-agent: Meta-ExternalAgent
|
||||
User-agent: Meta-ExternalFetcher
|
||||
User-agent: OAI-SearchBot
|
||||
User-agent: omgili
|
||||
User-agent: omgilibot
|
||||
User-agent: PanguBot
|
||||
User-agent: PerplexityBot
|
||||
User-agent: PetalBot
|
||||
User-agent: Scrapy
|
||||
User-agent: Sidetrade indexer bot
|
||||
User-agent: Timpibot
|
||||
User-agent: VelenPublicWebCrawler
|
||||
User-agent: Webzio-Extended
|
||||
User-agent: YouBot
|
||||
Disallow: /
|
||||
|
||||
# Elon Musk is a fascist and a white supremacist, and Twitter is his Nazi bar.
|
||||
User-agent: Twitterbot
|
||||
Disallow: /
|
||||
|
||||
User-agent: *
|
||||
Disallow:
|
||||
Crawl-delay: 10
|
Loading…
x
Reference in New Issue
Block a user