# As a condition of accessing this website, you agree to abide by the following
# content signals:

# (a)  If a content-signal = yes, you may collect content for the corresponding
#      use.
# (b)  If a content-signal = no, you may not collect content for the
#      corresponding use.
# (c)  If the website operator does not include a content signal for a
#      corresponding use, the website operator neither grants nor restricts
#      permission via content signal with respect to the corresponding use.

# The content signals and their meanings are:

# search:   building a search index and providing search results (e.g., returning
#           hyperlinks and short excerpts from your website's contents). Search does not
#           include providing AI-generated search summaries.
# ai-input: inputting content into one or more AI models (e.g., retrieval
#           augmented generation, grounding, or other real-time taking of content for
#           generative AI search answers).
# ai-train: training or fine-tuning AI models.

# ANY RESTRICTIONS EXPRESSED VIA CONTENT SIGNALS ARE EXPRESS RESERVATIONS OF
# RIGHTS UNDER ARTICLE 4 OF THE EUROPEAN UNION DIRECTIVE 2019/790 ON COPYRIGHT
# AND RELATED RIGHTS IN THE DIGITAL SINGLE MARKET.

# BEGIN Cloudflare Managed content

User-Agent: *
Content-signal: search=yes,ai-train=no
Allow: /

User-agent: Amazonbot
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: GPTBot
Disallow: /

User-agent: meta-externalagent
Disallow: /

# END Cloudflare Managed Content

# 1. ALLOW Major AI/Search Crawlers for Content Discovery
# Google/Gemini (Primary Search/AI)
User-agent: Googlebot
Allow: /

# Google Images
User-agent: Googlebot-Image
Allow: /

# GPTBot (OpenAI / ChatGPT Training Data)
User-agent: GPTBot
Allow: /

# OAI-SearchBot (OpenAI / ChatGPT Real-Time Retrieval/Citations)
User-agent: OAI-SearchBot
Allow: /

# Bing/Copilot (Bing Search/AI)
User-agent: Bingbot
Allow: /

# 2. GLOBAL RULES (Applies to all other bots, including aggressive scrapers)
User-agent: *

# Block access to the admin area (Good practice, keeps your original rule)
Disallow: /admin/

# Disallow crawling of common image/media directories to save bandwidth
# This will prevent most non-essential bots from downloading all your images.
# **ADJUST THIS PATH** to your site's actual image folder (e.g., /images/, /assets/media/)
Disallow: /asset/images/

# Optional: Disallow common file types to prevent mass download by non-compliant bots
Disallow: /*.zip
Disallow: /*.rar
Disallow: /*.pdf$
Disallow: /*.doc
Disallow: /*.xls

# 3. CRAWL DELAY (Keep for general bots, though Google and Bing ignore it)
# Note: Google, Bing, and GPTBot often ignore this directive,
# but it still helps regulate other polite (but bandwidth-heavy) crawlers.
Crawl-delay: 10

# 4. SITEMAP
Sitemap: https://crowdsq.com/sitemap.xml