Last active
November 9, 2020 20:36
-
-
Save eksiscloud/78455472479abc50d0b09999ef54d7b1 to your computer and use it in GitHub Desktop.
Nginx: another bad bot banning list
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
map $http_user_agent $bad_bot { | |
default 0; | |
# libraries etc. | |
"~*(?:\b)aiohttp(?:\b)" 1; | |
"~*(?:\b)akka-http/(?:\b)" 1; # - done | |
"~*(?:\b)Amazon CloudFront(?:\b)" 1; # - done | |
"~*(?:\b)Facebot Twitterbot(?:\b)" 1; | |
"~*(?:\b)check_http/(?:\b)" 1; | |
"~*(?:\b)curl/(?:\b)" 1; | |
"~*(?:\b)Java/(?:\b)" 1; | |
"~*(?:\b)libwww-perl(?:\b)" 1; | |
"~*(?:\b)okhttp(?:\b)" 1; # - done | |
"~*(?:\b)PHP/(?:\b)" 1; | |
"~*(?:\b)print\((?:\b)" 1; | |
"~*(?:\b)python(?:\b)" 1; | |
"~*(?:\b)Python(?:\b)" 1; | |
"~*(?:\b)Ruby(?:\b)" 1; | |
"~*(?:\b)Wget(?:\b)" 1; | |
"~*(?:\b)zgrab/(?:\b)" 1; | |
# # | |
"~*(?:\b)360Spider(?:\b)" 1; # bad - done | |
"~*(?:\b)2345Explorer(?:\b)" 1; # malicious - done | |
# A | |
"~*(?:\b)Acast (?:\b)" 1; # bad - done | |
"~*(?:\b)AdAuth(?:\b)" 1; # bad - done | |
"~*(?:\b)adidxbot(?:\b)" 1; # good | |
"~*(?:\b)admantx-adform(?:\b)" 1; # bad - done | |
"~*(?:\b)AdsTxtCrawler(?:\b)" 1; # bad - done | |
"~*(?:\b)AffiliateLabz(?:\b)" 1; # good - done | |
"~*(?:\b)AHC(?:\b)" 1; # malicious | |
"~*(?:\b)AhrefsBot(?:\b)" 1; # good - done | |
"~*(?:\b)Anchorage DMP(?:\b)" 1; # bad - done | |
"~*(?:\b)Apache-HttpClient(?:\b)" 1; # malicious - done | |
"~*(?:\b)ApiTool(?:\b)" 1; | |
"~*(?:\b)AspiegelBot(?:\b)" 1; | |
"~*(?:\b)atc/(?:\b)" 1; | |
"~*(?:\b)AVSearch(?:\b)" 1; | |
"~*(?:\b)axios(?:\b)" 1; # bad | |
# B | |
"~*(?:\b)Baidu(?:\b)" 1; | |
"~*(?:\b)Barkrowler(?:\b)" 1; | |
"~*(?:\b)BDCbot(?:\b)" 1; | |
"~*(?:\b)bidswitchbot(?:\b)" 1; # bad | |
"~*(?:\b)BingPreview(?:\b)" 1; | |
"~*(?:\b)Blackboard Safeassign(?:\b)" 1; | |
"~*(?:\b)BLEXBot(?:\b)" 1; | |
"~*(?:\b)Bloglines(?:\b)" 1; | |
"~*(?:\b)BorneoBot(?:\b)" 1; | |
"~*(?:\b)botify(?:\b)" 1; | |
"~*(?:\b)Buck(?:\b)" 1; # bad | |
"~*(?:\b)BuiltWith(?:\b)" 1; | |
# C | |
"~*(?:\b)CarrierWave(?:\b)" 1; | |
"~*(?:\b)CatchBot(?:\b)" 1; | |
"~*(?:\b)CATExplorador(?:\b)" 1; # bad | |
"~*(?:\b)CCBot(?:\b)" 1; # bad | |
"~*(?:\b)Centro(?:\b)" 1; | |
"~*(?:\b)CheckMarkNetwork(?:\b)" 1; | |
"~*(?:\b)checkout-(?:\b)" 1; | |
"~*(?:\b)Clarabot(?:\b)" 1; | |
"~*(?:\b)Cliqzbot(?:\b)" 1; | |
"~*(?:\b)Cloud mapping experiment(?:\b)" 1; | |
"~*(?:\b)CMS Crawler(?:\b)" 1; | |
"~*(?:\b)coccocbot(?:\b)" 1; # bad, uses ordinary UA at same time | |
"~*(?:\b)crawler4j(?:\b)" 1; | |
# D | |
"~*(?:\b)datagnionbot(?:\b)" 1; | |
"~*(?:\b)Datanyze(?:\b)" 1; | |
"~*(?:\b)Dataprovider(?:\b)" 1; | |
"~*(?:\b)Daum(?:\b)" 1; | |
"~*(?:\b)deepcrawl.com(?:\b)" 1; | |
"~*(?:\b)digincore(?:\b)" 1; | |
"~*(?:\b)Directo-Indexer(?:\b)" 1; | |
"~*(?:\b)Discordbot(?:\b)" 1; | |
"~*(?:\b)DisqusAdstxtCrawler(?:\b)" 1; | |
"~*(?:\b)Dispatch(?:\b)" 1; # bad | |
"~*(?:\b)DomainStatsBot(?:\b)" 1; # bad | |
"~*(?:\b)Domnutch(?:\b)" 1; | |
"~*(?:\b)DotBot(?:\b)" 1; | |
"~*(?:\b)dproxy(?:\b)" 1; | |
# E | |
"~*(?:\b)eContext(?:\b)" 1; | |
"~*(?:\b)EnigmaBot(?:\b)" 1; | |
"~*(?:\b)Entale bot(?:\b)" 1; # bad | |
"~*(?:\b)Exabot(?:\b)" 1; | |
"~*(?:\b)Ezooms(?:\b)" 1; | |
# F | |
"~*(?:\b)fr-crawler(?:\b)" 1; | |
"~*(?:\b)FYEO(?:\b)" 1; | |
"~*(?:\b)fyeo-crawler(?:\b)" 1; | |
# G | |
"~*(?:\b)gobyus(?:\b)" 1; | |
"~*(?:\b)Go-http-client(?:\b)" 1; | |
"~*(?:\b)^got (?:\b)" 1; | |
# "~*(?:\b)GotSiteMonitor(?:\b)" 1; | |
"~*(?:\b)GrapeshotCrawler(?:\b)" 1; # bad | |
# H | |
"~*(?:\b)hackney(?:\b)" 1; | |
"~*(?:\b)Hello(?:\b)" 1; | |
"~*(?:\b)htInEdin(?:\b)" 1; | |
"~*(?:\b)HTTP Banner Detection(?:\b)" 1; | |
"~*(?:\b)hubspot(?:\b)" 1; | |
# I | |
"~*(?:\b)IAB ATQ(?:\b)" 1; | |
"~*(?:\b)IAS crawler(?:\b)" 1; # good | |
"~*(?:\b)ias-(?:\b)" 1; | |
"~*(?:\b)import.io(?:\b)" 1; | |
"~*(?:\b)InfoSeek(?:\b)" 1; | |
"~*(?:\b)Incutio(?:\b)" 1; | |
"~*(?:\b)INGRID/0.1(?:\b)" 1; | |
"~*(?:\b)Internet-structure-research-project-bot(?:\b)" 1; | |
"~*(?:\b)istellabot(?:\b)" 1; | |
# J | |
"~*(?:\b)Jersey(?:\b)" 1; # bad | |
"~*(?:\b)Jetty(?:\b)" 1; | |
"~*(?:\b)JobboerseBot(?:\b)" 1; | |
# K | |
"~*(?:\b)Kinza(?:\b)" 1; | |
"~*(?:\b)KOCMOHABT(?:\b)" 1; | |
"~*(?:\b)Kraphio(?:\b)" 1; | |
"~*(?:\b)Ktor(?:\b)" 1; | |
"~*(?:\b)kubectl(?:\b)" 1; # malicious | |
# L | |
"~*(?:\b)Liana(?:\b)" 1; | |
"~*(?:\b)LieBaoFast(?:\b)" 1; # bad | |
"~*(?:\b)LightSpeed(?:\b)" 1; | |
"~*(?:\b)LightspeedSystemsCrawler(?:\b)" 1; | |
"~*(?:\b)linkdexbot(?:\b)" 1; | |
"~*(?:\b)LinkedInBot(?:\b)" 1; # bad | |
"~*(?:\b)linklooker(?:\b)" 1; | |
"~*(?:\b)ltx71(?:\b)" 1; # bad | |
"~*(?:\b)Lycos(?:\b)" 1; | |
# M | |
"~*(?:\b)magpie-crawler(?:\b)" 1; | |
"~*(?:\b)Mail.RU_Bot(?:\b)" 1; | |
"~*(?:\b)masscan(?:\b)" 1; | |
"~*(?:\b)MauiBot(?:\b)" 1; | |
"~*(?:\b)Mb2345Browser(?:\b)" 1; # bad | |
"~*(?:\b)MegaIndex.ru(?:\b)" 1; | |
"~*(?:\b)Mercator(?:\b)" 1; | |
"~*(?:\b)MicroMessenger(?:\b)" 1; | |
"~*(?:\b)MixnodeCache(?:\b)" 1; | |
"~*(?:\b)MJ12bot(?:\b)" 1; # ok | |
# "~*(?:\b)ms-office(?:\b)" 1; | |
"~*(?:\b)MojeekBot(?:\b)" 1; | |
"~*(?:\b)MyTuner-ExoPlayerAdapter(?:\b)" 1; | |
# N | |
"~*(?:\b)NetcraftSurveyAgent(?:\b)" 1; # bad | |
"~*(?:\b)NetSeer(?:\b)" 1; | |
"~*(?:\b)NetSystemsResearch(?:\b)" 1; | |
"~*(?:\b)newspaper(?:\b)" 1; | |
"~*(?:\b)Nimbostratus-Bot(?:\b)" 1; # bad | |
"~*(?:\b)Nmap\ Scripting\ Engine(?:\b)" 1; | |
"~*(?:\b)node-fetch(?:\b)" 1; | |
"~*(?:\b)Nutch(?:\b)" 1; | |
# O | |
"~*(?:\b)oBot(?:\b)" 1; | |
"~*(?:\b)oncrawl.com(?:\b)" 1; | |
"~*(?:\b)OwlTail(?:\b)" 1; | |
# P | |
"~*(?:\b)panscient.com(?:\b)" 1; | |
"~*(?:\b)PaperLiBot(?:\b)" 1; | |
"~*(?:\b)PetalBot(?:\b)" 1; # same as AspiegelBot | |
"~*(?:\b)PhantomJS(?:\b)" 1; | |
"~*(?:\b)Photon/(?:\b)" 1; # Automattic | |
"~*(?:\b)Podalong(?:\b)" 1; | |
"~*(?:\b)Podchaser-Parser(?:\b)" 1; | |
"~*(?:\b)Podimo(?:\b)" 1; | |
"~*(?:\b)Poster(?:\b)" 1; # malicious | |
"~*(?:\b)proximic(?:\b)" 1; # bad | |
# Q | |
"~*(?:\b)Qwantify(?:\b)" 1; | |
# R | |
"~*(?:\b)R6_(?:\b)" 1; | |
"~*(?:\b)radio.at(?:\b)" 1; | |
"~*(?:\b)radio.de(?:\b)" 1; | |
"~*(?:\b)radio.es(?:\b)" 1; | |
"~*(?:\b)radio.fr(?:\b)" 1; | |
"~*(?:\b)radio.it(?:\b)" 1; | |
"~*(?:\b)radio.net(?:\b)" 1; | |
"~*(?:\b)RawVoice Generator(?:\b)" 1; | |
"~*(?:\b)Request-Promise(?:\b)" 1; | |
"~*(?:\b)RogerBot(?:\b)" 1; | |
"~*(?:\b)Rome Client(?:\b)" 1; | |
"~*(?:\b)RSSGet(?:\b)" 1; | |
# S | |
"~*(?:\b)SafetyNet(?:\b)" 1; | |
"~*(?:\b)Scooter(?:\b)" 1; | |
"~*(?:\b)Scrapy(?:\b)" 1; | |
"~*(?:\b)Screaming Frog SEO Spider(?:\b)" 1; | |
"~*(?:\b)SE 2.X MetaSr 1.0(?:\b)" 1; | |
"~*(?:\b)SearchAtlas(?:\b)" 1; | |
"~*(?:\b)Seekport(?:\b)" 1; | |
"~*(?:\b)seewithkids.com(?:\b)" 1; | |
"~*(?:\b)SemanticScholarBot(?:\b)" 1; | |
"~*(?:\b)SemrushBot/1.0~bm(?:\b)" 1; # bad | |
"~*(?:\b)SemrushBot/6~bl(?:\b)" 1; # good | |
"~*(?:\b)SemrushBot-BA(?:\b)" 1; | |
"~*(?:\b)SEMrushBot(?:\b)" 1; | |
"~*(?:\b)SEOkicks(?:\b)" 1; | |
"~*(?:\b)serpstatbot(?:\b)" 1; | |
"~*(?:\b)SeznamBot(?:\b)" 1; | |
"~*(?:\b)Sidetrade(?:\b)" 1; | |
"~*(?:\b)SimplePie(?:\b)" 1; | |
"~*(?:\b)SiteBot(?:\b)" 1; | |
"~*(?:\b)Slack-ImgProxy(?:\b)" 1; | |
"~*(?:\b)Slurp(?:\b)" 1; | |
"~*(?:\b)SMTBot(?:\b)" 1; | |
"~*(?:\b)Sodes/(?:\b)" 1; # podcaster IP 209.6.245.67 | |
"~*(?:\b)Sogou(?:\b)" 1; | |
"~*(?:\b)socialmediascanner(?:\b)" 1; | |
"~*(?:\b)ssearch_bot(?:\b)" 1; | |
"~*(?:\b)SSL Labs(?:\b)" 1; | |
"~*(?:\b)SurdotlyBot(?:\b)" 1; # bad | |
# T | |
"~*(?:\b)Talous(?:\b)" 1; | |
"~*(?:\b)tamarasdartsoss.nl(?:\b)" 1; | |
"~*(?:\b)tapai(?:\b)" 1; | |
"~*(?:\b)TelegramBot(?:\b)" 1; | |
"~*(?:\b)temnos.com(?:\b)" 1; | |
"~*(?:\b)Tentacles(?:\b)" 1; # bad | |
"~*(?:\b)Test Certificate Info(?:\b)" 1; # malicious | |
"~*(?:\b)The Incutio XML-RPC PHP Library(?:\b)" 1; # malicious | |
"~*(?:\b)Thumbor(?:\b)" 1; | |
"~*(?:\b)TPA/1.0.0(?:\b)" 1; | |
"~*(?:\b)Trade Desk(?:\b)" 1; | |
"~*(?:\b)trendictionbot(?:\b)" 1; | |
"~*(?:\b)TrendsmapResolver(?:\b)" 1; | |
"~*(?:\b)TTD-content(?:\b)" 1; # bad | |
"~*(?:\b)TTD-Content(?:\b)" 1; # good | |
"~*(?:\b)Typhoeus(?:\b)" 1; | |
"~*(?:\b)TweetmemeBot(?:\b)" 1; | |
"~*(?:\b)Twingly(?:\b)" 1; | |
# U | |
"~*(?:\b)UCBrowser(?:\b)" 1; | |
"~*(?:\b)UltraSeek(?:\b)" 1; | |
"~*(?:\b)um-IC(?:\b)" 1; # bad | |
"~*(?:\b)um-LN(?:\b)" 1; | |
"~*(?:\b)UniversalFeedParser(?:\b)" 1; # bad | |
# V | |
"~*(?:\b)VelenPublicWebCrawler(?:\b)" 1; | |
# W | |
"~*(?:\b)^w3m(?:\b)" 1; | |
"~*(?:\b)WebZIP(?:\b)" 1; | |
"~*(?:\b)Windows Live Writter(?:\b)" 1; # malicious | |
"~*(?:\b)Wordpress.com(?:\b)" 1; | |
"~*(?:\b)wp.com(?:\b)" 1; | |
# X | |
"~*(?:\b)XoviBot(?:\b)" 1; | |
# Y | |
"~*(?:\b)YaBrowser(?:\b)" 1; | |
"~*(?:\b)YahooSeeker(?:\b)" 1; | |
"~*(?:\b)YaK(?:\b)" 1; | |
"~*(?:\b)Yandex(?:\b)" 1; # bad | |
"~*(?:\b)YisouSpider(?:\b)" 1; | |
# Z | |
"~*(?:\b)zh_CN(?:\b)" 1; # malicious | |
"~*(?:\b)zh-CN(?:\b)" 1; # malicious | |
"~*(?:\b)zh-cn(?:\b)" 1; # malicious | |
"~*(?:\b)ZmEu(?:\b)" 1; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
You can use what ever http-error you like, but 444 will terminate the connection rightaway without any response from the Nginx.