User-agent: AI2Bot
Disallow: /

User-agent: Ai2Bot-Dolma
Disallow: /

User-agent: aiHitBot
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Brightbot 1.0
Disallow: /

User-agent: ChatGPT-User
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: cohere-training-data-crawler
Disallow: /

User-agent: Cotoyogi
Disallow: /

User-agent: Crawlspace
Disallow: /

User-agent: DuckAssistBot
Disallow: /

User-agent: Factset_spyderbot
Disallow: /

User-agent: FirecrawlAgent
Disallow: /

User-agent: FriendlyCrawler
Disallow: /

User-agent: GoogleOther
Disallow: /

User-agent: GoogleOther-Image
Disallow: /

User-agent: GoogleOther-Video
Disallow: /

User-agent: iaskspider/2.0
Disallow: /

User-agent: ICC-Crawler
Disallow: /

User-agent: img2dataset
Disallow: /

User-agent: imgproxy
Disallow: /

User-agent: ISSCyberRiskCrawler
Disallow: /

User-agent: Kangaroo Bot
Disallow: /

User-agent: meta-externalagent
Disallow: /

User-agent: Meta-ExternalAgent
Disallow: /

User-agent: meta-externalfetcher
Disallow: /

User-agent: Meta-ExternalFetcher
Disallow: /

User-agent: NovaAct
Disallow: /

User-agent: OAI-SearchBot
Disallow: /

User-agent: omgilibot
Disallow: /

User-agent: Operator
Disallow: /

User-agent: PanguBot
Disallow: /

User-agent: Perplexity-User
Disallow: /

User-agent: PetalBot
Disallow: /

User-agent: Scrapy
Disallow: /

User-agent: SemrushBot-OCOB
Disallow: /

User-agent: SemrushBot-SWA
Disallow: /

User-agent: Sidetrade indexer bot
Disallow: /

User-agent: TikTokSpider
Disallow: /

User-agent: Timpibot
Disallow: /

User-agent: VelenPublicWebCrawler
Disallow: /

User-agent: Webzio-Extended
Disallow: /

User-agent: ChatGPT-User
Disallow: /

# Amazon Bot - enabling Alexa to answer even more questions for customers.
User-agent: Amazonbot
Disallow: /

# Anthropic AI Bot
User-agent: anthropic-ai
Disallow: /

# Apple Bot - collects website data for its Siri and Spotlight services.
User-agent: Applebot
Disallow: /

# Claude Bot run by Anthropic
User-agent: Claude-Web
Disallow: /

# Cohere AI Bot - unconfirmed bot believed to be associated with Cohere’s chatbot.
User-agent: cohere-ai
Disallow: /

# Common Crawl's bot - Common Crawl is one of the largest public datasets used by AI for training, with ChatGPT, Bard and other large language models.
User-agent: CCBot
Disallow: /

# Diffbot - somewhat dishonest scraping bot used to collect data to train LLMs.
User-agent: Diffbot
Disallow: /

# Google Bard and VertexAI. This will not have an impact on Google Search indexing. This will not affect GoogleBot crawling.
User-agent: Google-Extended
Disallow: /

# ImagesiftBot is billed as a reverse image search tool, but it's associated with The Hive, a company that produces models for image generation.
User-agent: ImagesiftBot 
Disallow: /

# KUKA's youBot
User-agent: YouBot
Disallow: /

# OMGilibot - They sell data for training LLMs (large language models)
User-agent: omgilibot
Disallow: /

# Omgili (Oh My God I Love It)
User-agent: omgili
Disallow: /

# OpenAI API - bot that OpenAI specifically uses to collect bulk training data from your website for ChatGPT.
User-agent: GPTBot
Disallow: /

# Perplexity AI
User-agent: PerplexityBot
Disallow: /

## Social Media Bots

# Bytespider is a web crawler operated by ByteDance, the Chinese owner of TikTok
User-agent: Bytespider
Disallow: /

# Meta’s bot that crawls public web pages to improve language models for their speech recognition technology
User-agent: FacebookBot
Disallow: /

#Twitter's bot used to index the content of any given URL
User-agent: Twitterbot
Disallow: /