Last active
November 9, 2020 20:36
-
-
Save eksiscloud/78455472479abc50d0b09999ef54d7b1 to your computer and use it in GitHub Desktop.
Nginx: another bad bot banning list
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
map $http_user_agent $bad_bot { | |
default 0; | |
# libraries etc. | |
"~*(?:\b)aiohttp(?:\b)" 1; | |
"~*(?:\b)akka-http/(?:\b)" 1; # - done | |
"~*(?:\b)Amazon CloudFront(?:\b)" 1; # - done | |
"~*(?:\b)Facebot Twitterbot(?:\b)" 1; | |
"~*(?:\b)check_http/(?:\b)" 1; | |
"~*(?:\b)curl/(?:\b)" 1; | |
"~*(?:\b)Java/(?:\b)" 1; | |
"~*(?:\b)libwww-perl(?:\b)" 1; | |
"~*(?:\b)okhttp(?:\b)" 1; # - done | |
"~*(?:\b)PHP/(?:\b)" 1; | |
"~*(?:\b)print\((?:\b)" 1; | |
"~*(?:\b)python(?:\b)" 1; | |
"~*(?:\b)Python(?:\b)" 1; | |
"~*(?:\b)Ruby(?:\b)" 1; | |
"~*(?:\b)Wget(?:\b)" 1; | |
"~*(?:\b)zgrab/(?:\b)" 1; | |
# # | |
"~*(?:\b)360Spider(?:\b)" 1; # bad - done | |
"~*(?:\b)2345Explorer(?:\b)" 1; # malicious - done | |
# A | |
"~*(?:\b)Acast (?:\b)" 1; # bad - done | |
"~*(?:\b)AdAuth(?:\b)" 1; # bad - done | |
"~*(?:\b)adidxbot(?:\b)" 1; # good | |
"~*(?:\b)admantx-adform(?:\b)" 1; # bad - done | |
"~*(?:\b)AdsTxtCrawler(?:\b)" 1; # bad - done | |
"~*(?:\b)AffiliateLabz(?:\b)" 1; # good - done | |
"~*(?:\b)AHC(?:\b)" 1; # malicious | |
"~*(?:\b)AhrefsBot(?:\b)" 1; # good - done | |
"~*(?:\b)Anchorage DMP(?:\b)" 1; # bad - done | |
"~*(?:\b)Apache-HttpClient(?:\b)" 1; # malicious - done | |
"~*(?:\b)ApiTool(?:\b)" 1; | |
"~*(?:\b)AspiegelBot(?:\b)" 1; | |
"~*(?:\b)atc/(?:\b)" 1; | |
"~*(?:\b)AVSearch(?:\b)" 1; | |
"~*(?:\b)axios(?:\b)" 1; # bad | |
# B | |
"~*(?:\b)Baidu(?:\b)" 1; | |
"~*(?:\b)Barkrowler(?:\b)" 1; | |
"~*(?:\b)BDCbot(?:\b)" 1; | |
"~*(?:\b)bidswitchbot(?:\b)" 1; # bad | |
"~*(?:\b)BingPreview(?:\b)" 1; | |
"~*(?:\b)Blackboard Safeassign(?:\b)" 1; | |
"~*(?:\b)BLEXBot(?:\b)" 1; | |
"~*(?:\b)Bloglines(?:\b)" 1; | |
"~*(?:\b)BorneoBot(?:\b)" 1; | |
"~*(?:\b)botify(?:\b)" 1; | |
"~*(?:\b)Buck(?:\b)" 1; # bad | |
"~*(?:\b)BuiltWith(?:\b)" 1; | |
# C | |
"~*(?:\b)CarrierWave(?:\b)" 1; | |
"~*(?:\b)CatchBot(?:\b)" 1; | |
"~*(?:\b)CATExplorador(?:\b)" 1; # bad | |
"~*(?:\b)CCBot(?:\b)" 1; # bad | |
"~*(?:\b)Centro(?:\b)" 1; | |
"~*(?:\b)CheckMarkNetwork(?:\b)" 1; | |
"~*(?:\b)checkout-(?:\b)" 1; | |
"~*(?:\b)Clarabot(?:\b)" 1; | |
"~*(?:\b)Cliqzbot(?:\b)" 1; | |
"~*(?:\b)Cloud mapping experiment(?:\b)" 1; | |
"~*(?:\b)CMS Crawler(?:\b)" 1; | |
"~*(?:\b)coccocbot(?:\b)" 1; # bad, uses ordinary UA at same time | |
"~*(?:\b)crawler4j(?:\b)" 1; | |
# D | |
"~*(?:\b)datagnionbot(?:\b)" 1; | |
"~*(?:\b)Datanyze(?:\b)" 1; | |
"~*(?:\b)Dataprovider(?:\b)" 1; | |
"~*(?:\b)Daum(?:\b)" 1; | |
"~*(?:\b)deepcrawl.com(?:\b)" 1; | |
"~*(?:\b)digincore(?:\b)" 1; | |
"~*(?:\b)Directo-Indexer(?:\b)" 1; | |
"~*(?:\b)Discordbot(?:\b)" 1; | |
"~*(?:\b)DisqusAdstxtCrawler(?:\b)" 1; | |
"~*(?:\b)Dispatch(?:\b)" 1; # bad | |
"~*(?:\b)DomainStatsBot(?:\b)" 1; # bad | |
"~*(?:\b)Domnutch(?:\b)" 1; | |
"~*(?:\b)DotBot(?:\b)" 1; | |
"~*(?:\b)dproxy(?:\b)" 1; | |
# E | |
"~*(?:\b)eContext(?:\b)" 1; | |
"~*(?:\b)EnigmaBot(?:\b)" 1; | |
"~*(?:\b)Entale bot(?:\b)" 1; # bad | |
"~*(?:\b)Exabot(?:\b)" 1; | |
"~*(?:\b)Ezooms(?:\b)" 1; | |
# F | |
"~*(?:\b)fr-crawler(?:\b)" 1; | |
"~*(?:\b)FYEO(?:\b)" 1; | |
"~*(?:\b)fyeo-crawler(?:\b)" 1; | |
# G | |
"~*(?:\b)gobyus(?:\b)" 1; | |
"~*(?:\b)Go-http-client(?:\b)" 1; | |
"~*(?:\b)^got (?:\b)" 1; | |
# "~*(?:\b)GotSiteMonitor(?:\b)" 1; | |
"~*(?:\b)GrapeshotCrawler(?:\b)" 1; # bad | |
# H | |
"~*(?:\b)hackney(?:\b)" 1; | |
"~*(?:\b)Hello(?:\b)" 1; | |
"~*(?:\b)htInEdin(?:\b)" 1; | |
"~*(?:\b)HTTP Banner Detection(?:\b)" 1; | |
"~*(?:\b)hubspot(?:\b)" 1; | |
# I | |
"~*(?:\b)IAB ATQ(?:\b)" 1; | |
"~*(?:\b)IAS crawler(?:\b)" 1; # good | |
"~*(?:\b)ias-(?:\b)" 1; | |
"~*(?:\b)import.io(?:\b)" 1; | |
"~*(?:\b)InfoSeek(?:\b)" 1; | |
"~*(?:\b)Incutio(?:\b)" 1; | |
"~*(?:\b)INGRID/0.1(?:\b)" 1; | |
"~*(?:\b)Internet-structure-research-project-bot(?:\b)" 1; | |
"~*(?:\b)istellabot(?:\b)" 1; | |
# J | |
"~*(?:\b)Jersey(?:\b)" 1; # bad | |
"~*(?:\b)Jetty(?:\b)" 1; | |
"~*(?:\b)JobboerseBot(?:\b)" 1; | |
# K | |
"~*(?:\b)Kinza(?:\b)" 1; | |
"~*(?:\b)KOCMOHABT(?:\b)" 1; | |
"~*(?:\b)Kraphio(?:\b)" 1; | |
"~*(?:\b)Ktor(?:\b)" 1; | |
"~*(?:\b)kubectl(?:\b)" 1; # malicious | |
# L | |
"~*(?:\b)Liana(?:\b)" 1; | |
"~*(?:\b)LieBaoFast(?:\b)" 1; # bad | |
"~*(?:\b)LightSpeed(?:\b)" 1; | |
"~*(?:\b)LightspeedSystemsCrawler(?:\b)" 1; | |
"~*(?:\b)linkdexbot(?:\b)" 1; | |
"~*(?:\b)LinkedInBot(?:\b)" 1; # bad | |
"~*(?:\b)linklooker(?:\b)" 1; | |
"~*(?:\b)ltx71(?:\b)" 1; # bad | |
"~*(?:\b)Lycos(?:\b)" 1; | |
# M | |
"~*(?:\b)magpie-crawler(?:\b)" 1; | |
"~*(?:\b)Mail.RU_Bot(?:\b)" 1; | |
"~*(?:\b)masscan(?:\b)" 1; | |
"~*(?:\b)MauiBot(?:\b)" 1; | |
"~*(?:\b)Mb2345Browser(?:\b)" 1; # bad | |
"~*(?:\b)MegaIndex.ru(?:\b)" 1; | |
"~*(?:\b)Mercator(?:\b)" 1; | |
"~*(?:\b)MicroMessenger(?:\b)" 1; | |
"~*(?:\b)MixnodeCache(?:\b)" 1; | |
"~*(?:\b)MJ12bot(?:\b)" 1; # ok | |
# "~*(?:\b)ms-office(?:\b)" 1; | |
"~*(?:\b)MojeekBot(?:\b)" 1; | |
"~*(?:\b)MyTuner-ExoPlayerAdapter(?:\b)" 1; | |
# N | |
"~*(?:\b)NetcraftSurveyAgent(?:\b)" 1; # bad | |
"~*(?:\b)NetSeer(?:\b)" 1; | |
"~*(?:\b)NetSystemsResearch(?:\b)" 1; | |
"~*(?:\b)newspaper(?:\b)" 1; | |
"~*(?:\b)Nimbostratus-Bot(?:\b)" 1; # bad | |
"~*(?:\b)Nmap\ Scripting\ Engine(?:\b)" 1; | |
"~*(?:\b)node-fetch(?:\b)" 1; | |
"~*(?:\b)Nutch(?:\b)" 1; | |
# O | |
"~*(?:\b)oBot(?:\b)" 1; | |
"~*(?:\b)oncrawl.com(?:\b)" 1; | |
"~*(?:\b)OwlTail(?:\b)" 1; | |
# P | |
"~*(?:\b)panscient.com(?:\b)" 1; | |
"~*(?:\b)PaperLiBot(?:\b)" 1; | |
"~*(?:\b)PetalBot(?:\b)" 1; # same as AspiegelBot | |
"~*(?:\b)PhantomJS(?:\b)" 1; | |
"~*(?:\b)Photon/(?:\b)" 1; # Automattic | |
"~*(?:\b)Podalong(?:\b)" 1; | |
"~*(?:\b)Podchaser-Parser(?:\b)" 1; | |
"~*(?:\b)Podimo(?:\b)" 1; | |
"~*(?:\b)Poster(?:\b)" 1; # malicious | |
"~*(?:\b)proximic(?:\b)" 1; # bad | |
# Q | |
"~*(?:\b)Qwantify(?:\b)" 1; | |
# R | |
"~*(?:\b)R6_(?:\b)" 1; | |
"~*(?:\b)radio.at(?:\b)" 1; | |
"~*(?:\b)radio.de(?:\b)" 1; | |
"~*(?:\b)radio.es(?:\b)" 1; | |
"~*(?:\b)radio.fr(?:\b)" 1; | |
"~*(?:\b)radio.it(?:\b)" 1; | |
"~*(?:\b)radio.net(?:\b)" 1; | |
"~*(?:\b)RawVoice Generator(?:\b)" 1; | |
"~*(?:\b)Request-Promise(?:\b)" 1; | |
"~*(?:\b)RogerBot(?:\b)" 1; | |
"~*(?:\b)Rome Client(?:\b)" 1; | |
"~*(?:\b)RSSGet(?:\b)" 1; | |
# S | |
"~*(?:\b)SafetyNet(?:\b)" 1; | |
"~*(?:\b)Scooter(?:\b)" 1; | |
"~*(?:\b)Scrapy(?:\b)" 1; | |
"~*(?:\b)Screaming Frog SEO Spider(?:\b)" 1; | |
"~*(?:\b)SE 2.X MetaSr 1.0(?:\b)" 1; | |
"~*(?:\b)SearchAtlas(?:\b)" 1; | |
"~*(?:\b)Seekport(?:\b)" 1; | |
"~*(?:\b)seewithkids.com(?:\b)" 1; | |
"~*(?:\b)SemanticScholarBot(?:\b)" 1; | |
"~*(?:\b)SemrushBot/1.0~bm(?:\b)" 1; # bad | |
"~*(?:\b)SemrushBot/6~bl(?:\b)" 1; # good | |
"~*(?:\b)SemrushBot-BA(?:\b)" 1; | |
"~*(?:\b)SEMrushBot(?:\b)" 1; | |
"~*(?:\b)SEOkicks(?:\b)" 1; | |
"~*(?:\b)serpstatbot(?:\b)" 1; | |
"~*(?:\b)SeznamBot(?:\b)" 1; | |
"~*(?:\b)Sidetrade(?:\b)" 1; | |
"~*(?:\b)SimplePie(?:\b)" 1; | |
"~*(?:\b)SiteBot(?:\b)" 1; | |
"~*(?:\b)Slack-ImgProxy(?:\b)" 1; | |
"~*(?:\b)Slurp(?:\b)" 1; | |
"~*(?:\b)SMTBot(?:\b)" 1; | |
"~*(?:\b)Sodes/(?:\b)" 1; # podcaster IP 209.6.245.67 | |
"~*(?:\b)Sogou(?:\b)" 1; | |
"~*(?:\b)socialmediascanner(?:\b)" 1; | |
"~*(?:\b)ssearch_bot(?:\b)" 1; | |
"~*(?:\b)SSL Labs(?:\b)" 1; | |
"~*(?:\b)SurdotlyBot(?:\b)" 1; # bad | |
# T | |
"~*(?:\b)Talous(?:\b)" 1; | |
"~*(?:\b)tamarasdartsoss.nl(?:\b)" 1; | |
"~*(?:\b)tapai(?:\b)" 1; | |
"~*(?:\b)TelegramBot(?:\b)" 1; | |
"~*(?:\b)temnos.com(?:\b)" 1; | |
"~*(?:\b)Tentacles(?:\b)" 1; # bad | |
"~*(?:\b)Test Certificate Info(?:\b)" 1; # malicious | |
"~*(?:\b)The Incutio XML-RPC PHP Library(?:\b)" 1; # malicious | |
"~*(?:\b)Thumbor(?:\b)" 1; | |
"~*(?:\b)TPA/1.0.0(?:\b)" 1; | |
"~*(?:\b)Trade Desk(?:\b)" 1; | |
"~*(?:\b)trendictionbot(?:\b)" 1; | |
"~*(?:\b)TrendsmapResolver(?:\b)" 1; | |
"~*(?:\b)TTD-content(?:\b)" 1; # bad | |
"~*(?:\b)TTD-Content(?:\b)" 1; # good | |
"~*(?:\b)Typhoeus(?:\b)" 1; | |
"~*(?:\b)TweetmemeBot(?:\b)" 1; | |
"~*(?:\b)Twingly(?:\b)" 1; | |
# U | |
"~*(?:\b)UCBrowser(?:\b)" 1; | |
"~*(?:\b)UltraSeek(?:\b)" 1; | |
"~*(?:\b)um-IC(?:\b)" 1; # bad | |
"~*(?:\b)um-LN(?:\b)" 1; | |
"~*(?:\b)UniversalFeedParser(?:\b)" 1; # bad | |
# V | |
"~*(?:\b)VelenPublicWebCrawler(?:\b)" 1; | |
# W | |
"~*(?:\b)^w3m(?:\b)" 1; | |
"~*(?:\b)WebZIP(?:\b)" 1; | |
"~*(?:\b)Windows Live Writter(?:\b)" 1; # malicious | |
"~*(?:\b)Wordpress.com(?:\b)" 1; | |
"~*(?:\b)wp.com(?:\b)" 1; | |
# X | |
"~*(?:\b)XoviBot(?:\b)" 1; | |
# Y | |
"~*(?:\b)YaBrowser(?:\b)" 1; | |
"~*(?:\b)YahooSeeker(?:\b)" 1; | |
"~*(?:\b)YaK(?:\b)" 1; | |
"~*(?:\b)Yandex(?:\b)" 1; # bad | |
"~*(?:\b)YisouSpider(?:\b)" 1; | |
# Z | |
"~*(?:\b)zh_CN(?:\b)" 1; # malicious | |
"~*(?:\b)zh-CN(?:\b)" 1; # malicious | |
"~*(?:\b)zh-cn(?:\b)" 1; # malicious | |
"~*(?:\b)ZmEu(?:\b)" 1; | |
} |
You can use what ever http-error you like, but 444 will terminate the connection rightaway without any response from the Nginx.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Almost up-to-date live-version.
nano /etc/nginx/conf.d/blockbots.conf
and copy&paste the list; Nginx will load it automatically
nano /etc/nginx/nginx.conf
add this in the http section:
server_names_hash_bucket_size 64;
server_names_hash_max_size 4096;
variables_hash_max_size 4096;
variables_hash_bucket_size 4096;
You must have this too:
include /etc/nginx/conf.d/*.conf;
nano /etc/nginx/sites-available/some_virtual_host.conf
add this in the server section before location statesments:
# Block bad bots
if ($bad_bot = 1) {
return 444;
}
nginx -t
systemctl reload nginx