Last active
July 23, 2018 20:03
-
-
Save paulgb/8430178 to your computer and use it in GitHub Desktop.
Basic geocoding demonstration using Toronto Dinesafe data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Geocode Toronto Dinesafe data. | |
Uses MapQuest's Nominatim mirror. | |
''' | |
import dbm | |
import urllib2 | |
import csv | |
import json | |
import time | |
# set up the cache. 'c' means create if necessary | |
cache = dbm.open('geocode_cache', 'c') | |
# Use MapQuest's open Nominatim server. | |
# http://developer.mapquest.com/web/products/open/nominatim | |
API_ENDPOINT = 'http://open.mapquestapi.com/nominatim/v1/search.php?format=json&q={}' | |
def geocode_location(location): | |
''' | |
Fetch the geodata associated with the given address and return | |
the entire response object (loaded from json). | |
''' | |
if location not in cache: | |
# construct the URL | |
url = API_ENDPOINT.format(urllib2.quote(location)) | |
# load the content at the URL | |
print 'fetching %s' % url | |
result_json = urllib2.urlopen(url).read() | |
# put the content into the cache | |
cache[location] = result_json | |
# pause to throttle requests | |
time.sleep(1) | |
# the response is (now) in the cache, so load it | |
return json.loads(cache[location]) | |
if __name__ == '__main__': | |
# open the input and output file objects | |
with open('dinesafe.csv') as infile, open('dinesafe_geocoded.csv', 'w') as outfile: | |
# wrap the files with CSV reader objects. | |
# the output file has two additional fields, lat and lon | |
reader = csv.DictReader(infile) | |
writer = csv.DictWriter(outfile, reader.fieldnames + ['lat', 'lon']) | |
# write the header row to the output file | |
writer.writeheader() | |
# iterate over the file by record | |
for record in reader: | |
# construct the full address | |
address = record['establishment_address'] | |
address += ', Toronto, ON, Canada' | |
# log the address to the console | |
print address | |
try: | |
# Nominatim returns a list of matches; take the first | |
geo_data = geocode_location(address)[0] | |
record['lat'] = geo_data['lat'] | |
record['lon'] = geo_data['lon'] | |
except IndexError: | |
# if there are no matches, don't raise an error | |
pass | |
writer.writerow(record) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Convert Toronto Dinesafe data | |
(available at http://opendata.toronto.ca/public.health/dinesafe/dinesafe.zip) | |
from XML to CSV | |
''' | |
from xml.dom.minidom import parse | |
from csv import DictWriter | |
fields = [ | |
'row_id', | |
'establishment_id', | |
'inspection_id', | |
'establishment_name', | |
'establishmenttype', | |
'establishment_address', | |
'establishment_status', | |
'minimum_inspections_peryear', | |
'infraction_details', | |
'inspection_date', | |
'severity', | |
'action', | |
'court_outcome', | |
'amount_fined' | |
] | |
doc = parse(file('dinesafe.xml')) | |
writer = DictWriter(file('dinesafe.csv', 'w'), fields) | |
writer.writeheader() | |
row_data = doc.getElementsByTagName('ROWDATA')[0] | |
for row in row_data.getElementsByTagName('ROW'): | |
row_values = dict() | |
for field in fields: | |
text_element = row.getElementsByTagName(field.upper())[0].firstChild | |
value = '' | |
if text_element: | |
value = text_element.wholeText.strip() | |
row_values[field] = value | |
writer.writerow(row_values) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment