|
# import the geocoding services you'd like to try |
|
from geopy.geocoders import ArcGIS, Bing, Nominatim, OpenCage, GoogleV3, OpenMapQuest |
|
import csv, sys |
|
import pandas as pd |
|
import keys |
|
|
|
in_file = str(sys.argv[1]) |
|
out_file = str('gc_' + in_file) |
|
timeout = int(sys.argv[2]) |
|
|
|
print('creating geocoding objects.') |
|
|
|
arcgis = ArcGIS(timeout=timeout) |
|
bing = Bing(api_key=keys.bing_api,timeout=100) |
|
nominatim = Nominatim(user_agent=keys.n_user, timeout=timeout) |
|
opencage = OpenCage(api_key=keys.oc_api,timeout=timeout) |
|
googlev3 = GoogleV3(api_key=keys.g3_api, domain='maps.googleapis.com', timeout=timeout) |
|
openmapquest = OpenMapQuest(api_key=keys.omq_api, timeout=timeout) |
|
|
|
# choose and order your preference for geocoders here |
|
geocoders = [openmapquest, nominatim, opencage, googlev3, arcgis] |
|
|
|
def gc(address): |
|
street = str(address['street']) |
|
city = str(address['city']) |
|
state = str(address['state']) |
|
country = str(address['country']) |
|
add_concat = street + ", " + city + ", " + state + " " + country |
|
for gcoder in geocoders: |
|
location = gcoder.geocode(add_concat) |
|
if location != None: |
|
print(f'geocoded record {address.name}: {street}') |
|
located = pd.Series({ |
|
'lat': location.latitude, |
|
'lng': location.longitude, |
|
'time': pd.to_datetime('now') |
|
}) |
|
else: |
|
print(f'failed to geolocate record {address.name}: {street}') |
|
located = pd.Series({ |
|
'lat': 'null', |
|
'lng': 'null', |
|
'time': pd.to_datetime('now') |
|
}) |
|
return located |
|
|
|
print('opening input.') |
|
reader = pd.read_csv(in_file, header=0) |
|
print('geocoding addresses.') |
|
reader = reader.merge(reader.apply(lambda add: gc(add), axis=1), left_index=True, right_index=True) |
|
print(f'writing to {out_file}.') |
|
reader.to_csv(out_file, encoding='utf-8', index=False) |
|
print('done.') |
Hello Eric,
Thanks for posting this valuable code (at least for a newbie like me).
I would like to ask if you could add a feature to it.
Imagine that you have thousands of addresses to geolocate, every single failure (communication problem for example)
make you restart from the beginning.
It's possible to include a counter so when it reaches it's value (could be a parameter too), it edit's the output file, append the new set of processed records, resets the counter and go on to the next record?
Thank you in advance!
Best regards
KV