Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save mitchellkrogza/754bf23fcad11fe0c329c2ddc90e1f85 to your computer and use it in GitHub Desktop.
Save mitchellkrogza/754bf23fcad11fe0c329c2ddc90e1f85 to your computer and use it in GitHub Desktop.
Get the most up-to-date list of IP addresses for crawler bots, belonging to Google and Bing.
import ipaddress
import requests
import pandas as pd
def bot_ip_addresses():
bots_urls = {
'google': 'https://developers.google.com/search/apis/ipranges/googlebot.json',
'bing': 'https://www.bing.com/toolbox/bingbot.json'
}
ip_addresses = []
for bot, url in bots_urls.items():
bot_resp = requests.get(url)
for iprange in bot_resp.json()['prefixes']:
network = iprange.get('ipv4Prefix')
if network:
ip_list = [(bot, str(ip)) for ip in ipaddress.IPv4Network(network)]
ip_addresses.extend(ip_list)
return pd.DataFrame(ip_addresses, columns=['bot_name', 'ip_address'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment