# robots.txt for / # Last Updated: January 25, 2026 (JST) Sitemap: /google-sitemap.xml Sitemap: /es/google-sitemap.xml Sitemap: /cht/google-sitemap.xml Sitemap: /chs/google-sitemap.xml # ================================================================== # DEFAULT RULES (applies to all bots unless overridden below) # ================================================================== User-agent: marsfinder Allow: / Crawl-delay: 5 User-agent: * Disallow: /search-results-google.html#/ Disallow: /es/search-results-google.html#/ Disallow: /chs/search-results-google.html#/ Disallow: /cht/search-results-google.html#/ # Sensitive dot-paths Disallow: /.env Disallow: /.git/ # Block sensitive file types Disallow: /*.sql$ Disallow: /*.log$ Disallow: /*.bak$ Disallow: /*.config$ # Internal search / duplicate content prevention Disallow: /search-results-google.html#/ Disallow: /es/search-results-google.html#/ Disallow: /chs/search-results-google.html#/ Disallow: /cht/search-results-google.html#/ # Tracking / session parameters (clean URLs) Disallow: /*?utm_ Disallow: /*&utm_ Disallow: /*?fbclid= Disallow: /*&fbclid= Disallow: /*?gclid= Disallow: /*&gclid= Disallow: /*?sessionid= Disallow: /*&sessionid= Disallow: /*?sid= Disallow: /*&sid= Disallow: /*?ref= Disallow: /*&ref= # ================================================================== # AI SEARCH & ANSWER ENGINES (explicitly allowed, light throttling) # ================================================================== User-agent: OAI-SearchBot User-agent: GPTBot User-agent: ChatGPT-User User-agent: ClaudeBot User-agent: Claude-SearchBot User-agent: Claude-User User-agent: Claude-Web User-agent: anthropic-ai User-agent: Googlebot User-agent: Googlebot-Image User-agent: Googlebot-News User-agent: Google-Extended User-agent: Google-CloudVertexBot User-agent: GoogleOther User-agent: Applebot User-agent: Applebot-Extended User-agent: bingbot User-agent: BingPreview User-agent: msnbot User-agent: MSNBot-Media User-agent: PerplexityBot User-agent: Perplexity-User User-agent: meta-externalagent User-agent: meta-externalfetcher User-agent: facebookexternalhit User-agent: Facebot User-agent: FacebookBot User-agent: Amazonbot User-agent: cohere-ai User-agent: ImagesiftBot User-agent: Diffbot User-agent: YouBot User-agent: DuckDuckBot User-agent: DuckAssistBot Allow: / Crawl-delay: 1 # ================================================================== # COMMON CRAWL & AI TRAINING DATA SOURCES (allowed, moderate throttling) # ================================================================== User-agent: CCBot User-agent: Bytespider User-agent: omgili User-agent: omgilibot User-agent: Timpibot User-agent: DeepSeekBot User-agent: VelenPublicWebCrawler Allow: / Crawl-delay: 3 # ================================================================== # INTERNATIONAL SEARCH ENGINES (allowed, moderate throttling) # ================================================================== User-agent: YandexBot User-agent: YandexImages User-agent: YandexVideo User-agent: YandexMedia User-agent: Baiduspider User-agent: Baiduspider-image User-agent: Baiduspider-video User-agent: Sogou web spider User-agent: Sogou inst spider Allow: / Crawl-delay: 2 User-agent: Yeti Allow: / # ================================================================== # SEO TOOLS & ANALYTICS CRAWLERS (heavier throttling) # ================================================================== User-agent: AhrefsBot User-agent: SemrushBot User-agent: MJ12bot User-agent: MJ12Bot User-agent: DotBot User-agent: DataForSeoBot Allow: / Crawl-delay: 10 User-agent: PetalBot Allow: / Crawl-delay: 15 # ================================================================== # BLOCK OFFLINE COPIERS / MALICIOUS SCRAPERS # ================================================================== User-agent: HTTrack User-agent: WebCopier User-agent: WebZIP User-agent: Teleport User-agent: TeleportPro User-agent: WebReaper User-agent: WebStripper User-agent: Offline Explorer User-agent: UbiCrawler User-agent: SentiBot User-agent: MauiBot User-agent: NPBot User-agent: Nuclei User-agent: serpstatbot User-agent: ZoominfoBot User-agent: EmailCollector User-agent: EmailSiphon User-agent: EmailWolf User-agent: ExtractorPro Disallow: / # ================================================================== # AI TRAINING OPT-OUT (OPTIONAL - currently disabled) # Uncomment specific blocks below to opt out of AI training datasets while # maintaining search indexing. Note: User-triggered fetches may ignore these. # ================================================================== # User-agent: GPTBot # Disallow: / # # User-agent: Google-Extended # Disallow: / # # User-agent: CCBot # Disallow: / # # User-agent: ClaudeBot # Disallow: / # # User-agent: Applebot-Extended # Disallow: / # # User-agent: meta-externalagent # Disallow: / # # User-agent: Bytespider # Disallow: / # # User-agent: cohere-ai # Disallow: / # ================================================================== # END OF ROBOTS.TXT # ==================================================================