Created
March 30, 2017 07:57
-
-
Save scrapehero/1cb241a9dbe3798e4bdc36644b703dbb to your computer and use it in GitHub Desktop.
Python script to parse unstructured addresses
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from requests import get | |
from pprint import pprint | |
from json import dump | |
from csv import QUOTE_ALL, DictWriter | |
API_KEY = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' | |
def address_resolver(json): | |
final = {} | |
if json['results']: | |
data = json['results'][0] | |
for item in data['address_components']: | |
for category in item['types']: | |
data[category] = {} | |
data[category] = item['long_name'] | |
final['street'] = data.get("route", None) | |
final['state'] = data.get("administrative_area_level_1", None) | |
final['city'] = data.get("locality", None) | |
final['county'] = data.get("administrative_area_level_2", None) | |
final['country'] = data.get("country", None) | |
final['postal_code'] = data.get("postal_code", None) | |
final['neighborhood'] = data.get("neighborhood",None) | |
final['sublocality'] = data.get("sublocality", None) | |
final['housenumber'] = data.get("housenumber", None) | |
final['postal_town'] = data.get("postal_town", None) | |
final['subpremise'] = data.get("subpremise", None) | |
final['latitude'] = data.get("geometry", {}).get("location", {}).get("lat", None) | |
final['longitude'] = data.get("geometry", {}).get("location", {}).get("lng", None) | |
final['location_type'] = data.get("geometry", {}).get("location_type", None) | |
final['postal_code_suffix'] = data.get("postal_code_suffix", None) | |
final['street_number'] = data.get('street_number', None) | |
return final | |
def get_address_details(address,): | |
url = 'https://maps.googleapis.com/maps/api/geocode/json?components=&language=®ion=&bounds=&key=' | |
url = url + '&address='+ address.replace(" ","+") | |
response = get(url) | |
data = address_resolver(response.json()) | |
data['address'] = address | |
return data | |
if __name__ == '__main__': | |
""" | |
Provide the address via csv or paste it here | |
""" | |
# address_to_search = list(DictReader("path/to/csv/file")) | |
address_to_search = ['71 Pilgrim Avenue Chevy Chase, MD 20815'] | |
data = [] | |
for i in address_to_search: | |
data.append(get_address_details(i)) | |
with open("data.csv",'w') as csvfile: | |
csvwriter = DictWriter(csvfile, fieldnames=data[0].keys(), quoting=QUOTE_ALL) | |
csvwriter.writeheader() | |
csvwriter.writerows(data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I am a newbie to Python, I copied the above code, and replaced with my own google Map API on it and also the file path, it seems still not work, could you please provide a sample csv file to have a try?