Created
April 19, 2020 09:20
-
-
Save SohanChy/c8b52899e8089188ef4f967acf108f9a to your computer and use it in GitHub Desktop.
Multithreaded Python Script For BULK Fetching GEOCODE data from Google Maps API
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import requests | |
import threading | |
SECRET_API_KEY = 'KEY_HERE' | |
# Json must be an array of objects | |
# eg: [{},{},{}] | |
input_arr_json_file = 'lookup_upazillas.json' | |
# field to use for lookup | |
def get_address_field(row): | |
# row['label'] is again json encoded thats why second decode is done | |
# you may not need it | |
return json.loads(row['label'])['en'] | |
# will be printed to console | |
identifier_field = "id" | |
# will be pushed to parent level | |
def append_lat_long(lat, long, row_copy): | |
row_copy['latitude'] = lat | |
row_copy['longitude'] = long | |
# append metadata | |
def append_meta(row_copy, isResolved, resolved_data): | |
row_copy['meta'] = { | |
'is_resolved': isResolved, | |
'resolved_data': resolved_data | |
} | |
# geocode lookup query | |
def get_loc_lookup_string(row): | |
return "{}, {}".format(get_address_field(row), "Bangladesh") | |
# throttle threads at | |
thread_throttle = 5 | |
def actual_lookup(resolved_data, new_data, num, total, row): | |
print("Resolving: {} of {} - {}, {}, {}".format( | |
num, total, row[identifier_field], get_address_field(row), get_loc_lookup_string(row) | |
) | |
) | |
response = requests.get('https://maps.googleapis.com/maps/api/geocode/json', | |
{ | |
'address': get_loc_lookup_string(new_data), | |
'key': SECRET_API_KEY | |
}).json() | |
if len(response['results']) > 0: | |
print("RESOLVED: {} of {} - {}, {}, {}".format( | |
num, total, row[identifier_field], get_address_field(row), get_loc_lookup_string(row) | |
) | |
) | |
append_meta(new_data, True, response['results'][0]['geometry']['location']) | |
else: | |
print("RESOLVE_FAILED: {} of {} - {}, {}, {}".format( | |
num, total, row[identifier_field], get_address_field(row), get_loc_lookup_string(row) | |
) | |
) | |
append_meta(new_data, False, None) | |
resolved_data.append(new_data) | |
def multi_threaded(data): | |
resolved_data = [] | |
total = len(data) | |
threads = [] | |
for num, row in enumerate(data): | |
if type(row) is not dict: | |
print('ERROR: Row not a dictionary/object'); | |
return | |
new_data = row.copy() | |
t = threading.Thread(target=actual_lookup, args=(resolved_data, new_data, num, total, row,)) | |
threads.append(t) | |
running_threads = [] | |
for thread in threads: | |
if len(running_threads) >= thread_throttle: | |
print("throttling threads...") | |
while len(running_threads) > 0: | |
rt = running_threads[0] | |
rt.join() | |
running_threads.pop(0) | |
thread.start() | |
running_threads.append(thread) | |
return resolved_data | |
def main(): | |
with open(input_arr_json_file) as json_file: | |
data = json.load(json_file) | |
resolved_data = multi_threaded(data) | |
with open('resolved.json', 'w') as outfile: | |
json.dump(resolved_data, outfile) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment