-
-
Save mitchellkrogza/754bf23fcad11fe0c329c2ddc90e1f85 to your computer and use it in GitHub Desktop.
Get the most up-to-date list of IP addresses for crawler bots, belonging to Google and Bing.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ipaddress | |
import requests | |
import pandas as pd | |
def bot_ip_addresses(): | |
bots_urls = { | |
'google': 'https://developers.google.com/search/apis/ipranges/googlebot.json', | |
'bing': 'https://www.bing.com/toolbox/bingbot.json' | |
} | |
ip_addresses = [] | |
for bot, url in bots_urls.items(): | |
bot_resp = requests.get(url) | |
for iprange in bot_resp.json()['prefixes']: | |
network = iprange.get('ipv4Prefix') | |
if network: | |
ip_list = [(bot, str(ip)) for ip in ipaddress.IPv4Network(network)] | |
ip_addresses.extend(ip_list) | |
return pd.DataFrame(ip_addresses, columns=['bot_name', 'ip_address']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment