Skip to content

Instantly share code, notes, and snippets.

@stanwu
Created April 7, 2026 03:44
Show Gist options
  • Select an option

  • Save stanwu/6b30392ec7412b0ee1496c28104a701e to your computer and use it in GitHub Desktop.

Select an option

Save stanwu/6b30392ec7412b0ee1496c28104a701e to your computer and use it in GitHub Desktop.
Check whether Cloudflare appears to block specific user agents.
#!/usr/bin/env python3
import argparse
import re
import sys
import urllib.error
import urllib.parse
import urllib.request
USER_AGENTS = {
"browser_chrome": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/136.0.0.0 Safari/537.36"
),
"googlebot": (
"Mozilla/5.0 (compatible; Googlebot/2.1; "
"+http://www.google.com/bot.html)"
),
"gptbot": "GPTBot/1.2 (+https://openai.com/gptbot)",
"claudebot": "ClaudeBot/1.0 (+https://www.anthropic.com/claudebot)",
"perplexitybot": "PerplexityBot/1.0 (+https://www.perplexity.ai/perplexitybot)",
"curl": "curl/8.7.1",
}
CF_HEADERS = [
"server",
"cf-ray",
"cf-cache-status",
"cf-mitigated",
"location",
"content-type",
]
def looks_like_cloudflare_block(status, body, headers):
server = (headers.get("server") or "").lower()
body_l = body.lower()
return any(
[
status in (403, 429, 503),
"cloudflare" in server and "attention required" in body_l,
"cf-chl" in body_l,
"challenge-platform" in body_l,
"sorry, you have been blocked" in body_l,
headers.get("cf-mitigated"),
]
)
def title_from_html(body):
match = re.search(r"<title[^>]*>(.*?)</title>", body, re.I | re.S)
if not match:
return ""
return re.sub(r"\s+", " ", match.group(1)).strip()
def fetch(url, user_agent, timeout):
request = urllib.request.Request(
url,
headers={
"User-Agent": user_agent,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
},
)
opener = urllib.request.build_opener(urllib.request.HTTPRedirectHandler())
try:
with opener.open(request, timeout=timeout) as response:
body_bytes = response.read(32768)
body = body_bytes.decode("utf-8", errors="replace")
return {
"ok": True,
"status": response.status,
"final_url": response.geturl(),
"headers": response.headers,
"body": body,
"error": "",
}
except urllib.error.HTTPError as exc:
body = exc.read(32768).decode("utf-8", errors="replace")
return {
"ok": False,
"status": exc.code,
"final_url": exc.geturl(),
"headers": exc.headers,
"body": body,
"error": f"HTTPError {exc.code}",
}
except Exception as exc: # noqa: BLE001
return {
"ok": False,
"status": None,
"final_url": "",
"headers": {},
"body": "",
"error": repr(exc),
}
def print_result(name, result):
headers = result["headers"]
title = title_from_html(result["body"])
cf_block = looks_like_cloudflare_block(result["status"], result["body"], headers)
print(f"=== {name} ===")
print(f"status: {result['status']}")
print(f"final_url: {result['final_url'] or '-'}")
print(f"cloudflare_block_suspected: {'yes' if cf_block else 'no'}")
if title:
print(f"title: {title}")
if result["error"]:
print(f"error: {result['error']}")
for header in CF_HEADERS:
value = headers.get(header)
if value:
print(f"{header}: {value}")
if result["body"]:
snippet = re.sub(r"\s+", " ", result["body"][:240]).strip()
print(f"body_snippet: {snippet}")
print()
def main():
parser = argparse.ArgumentParser(
description="Check whether Cloudflare appears to block specific user agents."
)
parser.add_argument("--url", required=True, help="URL to test")
parser.add_argument(
"--agent",
choices=["all", *USER_AGENTS.keys()],
default="all",
help="Agent profile to test",
)
parser.add_argument(
"--timeout",
type=float,
default=15.0,
help="Request timeout in seconds",
)
args = parser.parse_args()
url = args.url
parsed = urllib.parse.urlparse(url)
if parsed.scheme not in ("http", "https"):
print("error: --url must start with http:// or https://", file=sys.stderr)
raise SystemExit(2)
agents = USER_AGENTS.items() if args.agent == "all" else [(args.agent, USER_AGENTS[args.agent])]
for name, user_agent in agents:
result = fetch(url, user_agent, args.timeout)
print_result(name, result)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment