Skip to content

Instantly share code, notes, and snippets.

@paulgb
Last active July 23, 2018 20:03
Show Gist options
  • Save paulgb/8430178 to your computer and use it in GitHub Desktop.
Save paulgb/8430178 to your computer and use it in GitHub Desktop.
Basic geocoding demonstration using Toronto Dinesafe data
'''
Geocode Toronto Dinesafe data.
Uses MapQuest's Nominatim mirror.
'''
import dbm
import urllib2
import csv
import json
import time
# set up the cache. 'c' means create if necessary
cache = dbm.open('geocode_cache', 'c')
# Use MapQuest's open Nominatim server.
# http://developer.mapquest.com/web/products/open/nominatim
API_ENDPOINT = 'http://open.mapquestapi.com/nominatim/v1/search.php?format=json&q={}'
def geocode_location(location):
'''
Fetch the geodata associated with the given address and return
the entire response object (loaded from json).
'''
if location not in cache:
# construct the URL
url = API_ENDPOINT.format(urllib2.quote(location))
# load the content at the URL
print 'fetching %s' % url
result_json = urllib2.urlopen(url).read()
# put the content into the cache
cache[location] = result_json
# pause to throttle requests
time.sleep(1)
# the response is (now) in the cache, so load it
return json.loads(cache[location])
if __name__ == '__main__':
# open the input and output file objects
with open('dinesafe.csv') as infile, open('dinesafe_geocoded.csv', 'w') as outfile:
# wrap the files with CSV reader objects.
# the output file has two additional fields, lat and lon
reader = csv.DictReader(infile)
writer = csv.DictWriter(outfile, reader.fieldnames + ['lat', 'lon'])
# write the header row to the output file
writer.writeheader()
# iterate over the file by record
for record in reader:
# construct the full address
address = record['establishment_address']
address += ', Toronto, ON, Canada'
# log the address to the console
print address
try:
# Nominatim returns a list of matches; take the first
geo_data = geocode_location(address)[0]
record['lat'] = geo_data['lat']
record['lon'] = geo_data['lon']
except IndexError:
# if there are no matches, don't raise an error
pass
writer.writerow(record)
'''
Convert Toronto Dinesafe data
(available at http://opendata.toronto.ca/public.health/dinesafe/dinesafe.zip)
from XML to CSV
'''
from xml.dom.minidom import parse
from csv import DictWriter
fields = [
'row_id',
'establishment_id',
'inspection_id',
'establishment_name',
'establishmenttype',
'establishment_address',
'establishment_status',
'minimum_inspections_peryear',
'infraction_details',
'inspection_date',
'severity',
'action',
'court_outcome',
'amount_fined'
]
doc = parse(file('dinesafe.xml'))
writer = DictWriter(file('dinesafe.csv', 'w'), fields)
writer.writeheader()
row_data = doc.getElementsByTagName('ROWDATA')[0]
for row in row_data.getElementsByTagName('ROW'):
row_values = dict()
for field in fields:
text_element = row.getElementsByTagName(field.upper())[0].firstChild
value = ''
if text_element:
value = text_element.wholeText.strip()
row_values[field] = value
writer.writerow(row_values)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment