Last active
March 26, 2025 12:28
-
-
Save huksley/630a079c395fd7b44443ca84cb2d8deb to your computer and use it in GitHub Desktop.
Exclude AI bots from nginx, but not block them, show the placeholder page instead about your website / SaaS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Define a map of blocked user agents | |
map $http_user_agent $blocked_ua { | |
default 0; | |
"~*AI2Bot" 1; | |
"~*Ai2Bot-Dolma" 1; | |
"~*Amazonbot" 1; | |
"~*anthropic-ai" 1; | |
"~*Applebot" 1; | |
"~*Applebot-Extended" 1; | |
"~*Brightbot" 1; | |
"~*Bytespider" 1; | |
"~*CCBot" 1; | |
"~*ChatGPT-User" 1; | |
"~*Claude-Web" 1; | |
"~*ClaudeBot" 1; | |
"~*cohere-ai" 1; | |
"~*cohere-training-data-crawler" 1; | |
"~*Crawlspace" 1; | |
"~*Diffbot" 1; | |
"~*DuckAssistBot" 1; | |
"~*FacebookBot" 1; | |
"~*FriendlyCrawler" 1; | |
"~*Google-Extended" 1; | |
"~*GoogleOther" 1; | |
"~*GoogleOther-Image" 1; | |
"~*GoogleOther-Video" 1; | |
"~*GPTBot" 1; | |
"~*iaskspider" 1; | |
"~*ICC-Crawler" 1; | |
"~*ImagesiftBot" 1; | |
"~*img2dataset" 1; | |
"~*ISSCyberRiskCrawler" 1; | |
"~*Kangaroo" 1; | |
"~*Meta-ExternalAgent" 1; | |
"~*Meta-ExternalFetcher" 1; | |
"~*OAI-SearchBot" 1; | |
"~*omgili" 1; | |
"~*omgilibot" 1; | |
"~*PanguBot" 1; | |
"~*PerplexityBot" 1; | |
"~*PetalBot" 1; | |
"~*Scrapy" 1; | |
"~*SemrushBot-OCOB" 1; | |
"~*SemrushBot-SWA" 1; | |
"~*Sidetrade indexer bot" 1; | |
"~*Timpibot" 1; | |
"~*VelenPublicWebCrawler" 1; | |
"~*Webzio-Extended" 1; | |
"~*YouBot" 1; | |
} | |
server { | |
listen 80; | |
server_name example.com; | |
# Location for blocking specific user agents | |
location / { | |
# If user agent is in the blocked list | |
if ($blocked_ua = 1) { | |
# Serve a static page instead of forbidden error | |
try_files /blocked.html =403; | |
} | |
} | |
# Create a specific location for the blocked page | |
location = /blocked.html { | |
# Ensure this path points to your static blocked page | |
root /var/www/html/; | |
internal; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
See also all AI bots here https://github.com/ai-robots-txt/ai.robots.txt