重點
- 要反代如
https://scholar.google.com.hk
等的地區域名,而不是https://scholar.google.com
- 需將客戶端原始 IP 回傳給 Google,否則會 404
- 不要用
http.cache
,以免被當爬蟲被封 IP
scholar.example.com { | |
timeouts 5m | |
proxy / https://scholar.google.com.hk { | |
except /robots.txt /usercontent | |
header_upstream X-Real-IP {remote} | |
header_upstream X-Forwarded-For {remote} | |
header_upstream User-Agent {>User-Agent} | |
# header_upstream X-Real-IP {>CF-Connecting-IP} | |
# header_upstream X-Forwarded-For {>CF-Connecting-IP} | |
header_upstream Accept-Language zh-HK | |
header_upstream Accept-Encoding identity | |
insecure_skip_verify | |
} | |
proxy /usercontent https://scholar.googleusercontent.com { | |
without /usercontent | |
except /robots.txt | |
header_upstream X-Real-IP {remote} | |
header_upstream X-Forwarded-For {remote} | |
header_upstream User-Agent {>User-Agent} | |
# header_upstream X-Real-IP {>CF-Connecting-IP} | |
# header_upstream X-Forwarded-For {>CF-Connecting-IP} | |
header_upstream Accept-Language zh-HK | |
header_upstream Accept-Encoding identity | |
insecure_skip_verify | |
} | |
filter rule { | |
content_type text/.* | |
search_pattern scholar\.google\.com | |
replacement scholar.example.com | |
} | |
filter rule { | |
content_type text/.* | |
search_pattern scholar\.googleusercontent\.com | |
replacement scholar.example.com/usercontent | |
} | |
tls { | |
dns cloudflare | |
} | |
} |