Created
September 13, 2013 18:52
-
-
Save nfisher/6554585 to your computer and use it in GitHub Desktop.
Look-up request IP's and categorise into a distribution by country using MaxMind DB.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Usage: | |
# | |
# ip_locations.py LOCATIONS BLOCKS RESERVOIR | |
# | |
# Dependencies: | |
# | |
# - reservoir sample set with 'ips' column. | |
# - MaxMind IP and Location CSV. | |
# - python 2.6+ - pandas, numpy, scipy | |
# | |
import sys | |
import socket | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
maxmind_locations_filename = sys.argv[1] # 'GeoIPCity-134-Location.csv' | |
maxmind_blocks_filename = sys.argv[2] # 'GeoIPCity-134-Blocks.csv' | |
reservoir_filename = sys.argv[3] # 'reservoir_sample.txt' | |
reservoir = pd.read_csv(reservoir_filename) | |
blocks = pd.read_csv(maxmind_blocks_filename, skiprows=1) | |
locations = pd.read_csv(maxmind_locations_filename, skiprows=1) | |
location_blocks = pd.merge(blocks, locations, on='locId') | |
reservoir['ipAsInt'] = reservoir.apply(lambda row: int(socket.inet_aton(row["ips"]).encode('hex'), 16), axis=1) | |
def country_by_ip(ip): | |
if not isinstance(ip, int): | |
return 'unknown' | |
row = location_blocks[(ip >= location_blocks['startIpNum']) & (ip <= location_blocks['endIpNum'])] | |
if len(row['country']) == 1: | |
return row.irow(0).get('country', 'unknown') | |
return 'unknown' | |
reservoir['country'] = reservoir.apply(lambda x: country_by_ip(x['ipAsInt']), axis=1) | |
s = reservoir.groupby('country').size() | |
s.sort() | |
s.plot(kind='barh', figsize=(10,12)) | |
plt.savefig('regional-distrib.png') | |
print s.order(ascending=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment