Created
May 27, 2020 13:54
-
-
Save omrilotan/a8ac4ccf58a804c902a64940ae7dc266 to your computer and use it in GitHub Desktop.
Verify by reverse DNS of crawlers identity
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const { reverse } = require('dns') | |
const check = ip => new Promise( | |
(resolve, reject) => | |
addresses.includes(ip) || reverse( | |
ip, | |
(error, hostnames) => error | |
? reject(error) | |
: resolve( | |
hostnames.some( | |
hostname => pattern.test(hostname) | |
) | |
) | |
) | |
); | |
const BAIDU = '\\.crawl\\.baidu\\.(com|jp)$'; | |
const BING = '\\.search\\.msn\\.com$'; | |
const GOOGLE = '\\.google(bot)?\\.com$'; | |
const YAHOO = '\\.crawl\\.yahoo\\.net$'; | |
const YANDEX = '\\.yandex\\.(ru|net|com)$'; | |
// whois -h whois.radb.net -- '-i origin AS32934' | grep ^route | |
const FACEBOOK = []; | |
// whois -h whois.radb.net — '-i origin AS13414' | grep ^route | |
const TWITTER = []; | |
// https://duckduckgo.com/duckduckbot | |
const DUCKDUCKGO = [ | |
'72.94.249.34', | |
'72.94.249.35', | |
'72.94.249.36', | |
'72.94.249.37', | |
'72.94.249.38' | |
]; | |
const pattern = new RegExp([ | |
BAIDU, | |
BING, | |
GOOGLE, | |
YAHOO, | |
YANDEX | |
].join('|')); | |
const addresses = [ | |
FACEBOOK, | |
TWITTER, | |
DUCKDUCKGO | |
].flat() | |
// Example | |
const isCrawler = await check('66.249.69.45'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment