Last active
January 3, 2016 18:09
-
-
Save hrbrmstr/8500539 to your computer and use it in GitHub Desktop.
for upcoming dds blog post
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import geoip2.database | |
# yeah, despite having a nice long int, the city lookup function | |
# requires a string so we have to do this | |
def to_string(ip): | |
return ".".join(map(lambda n: str(ip>>n & 0xFF), [24,16,8,0])) | |
# you'll need to download the city database and point this to it | |
reader = geoip2.database.Reader('GeoLite2-City.mmdb') | |
with open('marx.csv', 'rb') as marx: | |
with open('marx-geo.csv', 'w') as f: | |
flyreader = csv.reader(marx, delimiter=',', quotechar='"') | |
for fly in flyreader: | |
strIP = to_string(int(fly[2])) | |
try: # sometimes the city function coughs up blood | |
r = reader.city(strIP) | |
f.write("%s%s,%s,%s,%s,%s,%s,%s,%s\n" % | |
(','.join(fly), | |
strIP, | |
r.country.iso_code, | |
r.country.name, | |
r.subdivisions.most_specific.name, | |
r.subdivisions.most_specific.iso_code, | |
r.postal.code, | |
r.location.latitude, | |
r.location.longitude)) | |
except: | |
f.write("%s%s,,,,,,,,\n" % (','.join(fly), strIP)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table) | |
library(bit64) | |
# data.table is a wicked fast data.frame compatible object | |
# and fread() is a wicked fast file reader that behaves | |
# like read.csv(). it's even faster and more efficient if | |
# we provide a row count (estimate) so we do that here | |
marx <- fread("marx-geo.csv", nrows=451582, sep=",", header=TRUE) | |
# we only need lat/lon columns so we subset the data.table | |
# on those columns, remove any missing coordinate pairs | |
# and only retrieve unique pairs (since it's not important | |
# for this map demo to have duplicate points) | |
write.csv(unique(na.omit(marx[,14:15,with=FALSE])), "latlon.csv", row.names=FALSE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment