Created
August 31, 2010 17:32
-
-
Save somic/559397 to your computer and use it in GitHub Desktop.
Scrape AWS EC2 forums to obtain ranges of public IP addresses for each EC2 region
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# | |
# ec2dipr.py - ec2_describe_ipaddress_ranges | |
# | |
from BeautifulSoup import BeautifulSoup | |
import re, urllib2, socket, boto.ec2 | |
try: | |
import boto | |
REGION_NAMES = [ reg.name for reg in boto.ec2.regions() ] | |
except ImportError: | |
# hard code if boto is not installed | |
REGION_NAMES = [ "us-east-1", "us-west-1", "eu-west-1", "ap-southeast-1" ] | |
DEFAULT_REGION = "us-east-1" | |
ANN_URL = "https://forums.aws.amazon.com/ann.jspa?annID=1182" | |
def ec2_describe_ipaddress_ranges(): | |
f = urllib2.urlopen(ANN_URL) | |
soup = BeautifulSoup(f.read()) | |
f.close() | |
data = soup.findAll('div', attrs={ 'class': 'jive-body' })[0] | |
ranges = { } | |
for line in str(data).split('\n'): | |
m = re.search('(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/\d{1,2})', line) | |
if not m: continue | |
cidr = m.group(0) | |
ip = cidr[:cidr.find('/')-2] + '.1' | |
region = DEFAULT_REGION | |
try: | |
hostname = socket.gethostbyaddr(ip)[0] | |
region = [ reg for reg in REGION_NAMES if | |
hostname.find(reg) > 0 ][0] | |
except (socket.herror, IndexError): | |
pass | |
# workaround for one prefix that breaks the rule | |
if cidr == '46.51.224.0/19': region = 'ap-northeast-1' | |
if region not in ranges: ranges[region] = [ ] | |
ranges[region].append(cidr) | |
return ranges | |
def region_sizes(): | |
" Count the number of /24 prefixes allocated to each region " | |
d = { } | |
for region, ranges in ec2_describe_ipaddress_ranges().iteritems(): | |
d[region] = 0 | |
for range in ranges: | |
cidr = int(range.split('/')[-1]) | |
print region, cidr | |
d[region] += 2 ** (24 - cidr) | |
return d | |
if __name__ == '__main__': | |
for region, ranges in ec2_describe_ipaddress_ranges().iteritems(): | |
print region, ' '.join(ranges) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Disclaimer ( I work for Luminati Network),
Luminati is a P2P proxy network with over 40 million exit nodes in every city and country in the word.
Those exit nodes are real devices so that are not recognized as proxy/bot/tor.
Feel free to contact me at [email protected] or signup for a free trial at: http://j.mp/Hola-Luminati-p2p