Skip to content

Instantly share code, notes, and snippets.

@emilepetrone
Created October 12, 2011 17:02
Show Gist options
  • Save emilepetrone/1281833 to your computer and use it in GitHub Desktop.
Save emilepetrone/1281833 to your computer and use it in GitHub Desktop.
import xlrd
import time
from simplegeo import Client, json, APIError
#from multiprocessing.pool import ThreadPool as Pool
from multiprocessing import Pool
def f(z):
z = str(z)
neighborhoods = []
error = ""
try:
# z = str(10001)
results = client.context.get_context_by_address(z)
for b in results["features"]:
if b["classifiers"][0]["category"] == 'Neighborhood':
n = b["name"]
#print n
neighborhoods.append(n)
except APIError, e:
return (z, error)
print "%s hoods=%s." % (z, len(neighborhoods))
return (z, neighborhoods)
if __name__ == '__main__':
row = 1
neighborhoods = []
zipcodes = set()
book = xlrd.open_workbook("zipcodes.xls")
sheet = book.sheet_by_index(0)
rows = sheet.nrows
print rows
client = Client('SbAc87cM7EK2qCvmjymQtXU66MdXGNPr','CNmJaCEGGY3V7Tq3NEJsvNJCdJZjvgfN')
while row <= 65535:
current_row = sheet.row(row)[0]
zip = sheet.cell_value(row,0)
zip = int(zip)
row += 1
z = str(zip)
z = z.zfill(5)
zipcodes.add(z)
print "Found %s codes" % len(zipcodes)
zipcodes = set(list(zipcodes))
print zipcodes
start = time.time()
pool = Pool(16))
result = pool.map(f, zipcodes)
from pprint import pprint
pprint(result)
print 'Duration %s' % (time.time() - start, )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment