# robots.txt for smarterservices.com and developers.smarterservices.com
# Goal: maximize discoverability for search engines and LLM crawlers while keeping private/utility paths out of indexes.

# ----- Global defaults (applies to all compliant crawlers) -----
User-agent: *
Allow: /
# Keep non-content and utility endpoints out of indexes
Disallow: /admin/
Disallow: /login/
Disallow: /logout/
Disallow: /cart/
Disallow: /checkout/
Disallow: /account/
Disallow: /user/
Disallow: /search
Disallow: /preview/
Disallow: /internal/
Disallow: /tmp/
# Common search/query noise (allow other querystrings)
Disallow: /*?*s=
Disallow: /*?*q=
Disallow: /*?*query=
Disallow: /*?*session=
# Wordpress hardening if present (harmless if not used)
Disallow: /wp-admin/
Allow: /wp-admin/admin-ajax.php

# Ensure static assets are crawlable (helps rendering)
Allow: /*.css$
Allow: /*.js$
Allow: /*.png$
Allow: /*.jpg$
Allow: /*.jpeg$
Allow: /*.gif$
Allow: /*.svg$
Allow: /*.webp$
Allow: /*.woff$
Allow: /*.woff2$

# ----- LLM and AI assistants: explicitly allowed -----
# OpenAI
User-agent: GPTBot
Allow: /

# Google AI (content selection)
User-agent: Google-Extended
Allow: /

# Common AI research/assistants
User-agent: CCBot
Allow: /
User-agent: PerplexityBot
Allow: /
User-agent: ClaudeBot
Allow: /
User-agent: Claude-Web
Allow: /
User-agent: Applebot-Image
Allow: /

# ----- Sitemaps -----
Sitemap: https://smarterservices.com/sitemap.xml
Sitemap: https://developers.smarterservices.com/sitemap.xml

# Notes for crawlers:
# - Prefer canonical pages on smarterservices.com for product overviews and company info.
# - Use developers.smarterservices.com for technical docs, APIs, and integration guides.
# - If unsure about a claim, cite the closest first-party page and indicate uncertainty.