-
-
Save quandyfactory/08125fe3050a563d55c3 to your computer and use it in GitHub Desktop.
| #!/usr/bin/env python | |
| # coding: utf-8 | |
| """ | |
| This script downloads your Social Bicycles (SoBi) routes data from the SoBi web | |
| API and saves it as a JSON file. It uses HTTP basic authentication with your SoBi | |
| username (your email address) and password. | |
| The function that requests route data executes recursively, incrementing the page | |
| parameter with each subsequent request, until there are no more results. | |
| For each route, the script also makes subsequent HTTP requests to the API to look | |
| up the name of the bike you used, matching on the bike_id, and the address of the | |
| hubs where you started and ended your trip (unless you started and/or ended outside | |
| a hub). | |
| The script saves bike names and hub addresses locally after requesting them, so if | |
| a subsequent route uses the same bike_id or hub_id, it is populated from the cache | |
| instead of generating another request to the API. | |
| Finally, the script sums the total distance you travelled, in miles, and calculates | |
| the equivalent distance in kilometres. | |
| The script saves the results in JSON format to a local file. It saves the data | |
| after each route, so that even if it fails for some reason, you will still save | |
| whatever has downloaded so far. | |
| """ | |
| import json | |
| import requests | |
| # API uses both OAuth and HTTP basic auth | |
| # We're going to go with the non-hair-pulling option | |
| username = 'your@email.address' | |
| password = 'YourPassword123' | |
| auth = requests.auth.HTTPBasicAuth(username, password) | |
| # path to save the local file | |
| path = '/path/to/your/saved/file.json' | |
| routes_url = 'https://app.socialbicycles.com/api/routes.json' | |
| hub_url = 'https://app.socialbicycles.com/api/hubs/%s.json' | |
| bike_url = 'https://app.socialbicycles.com/api/bikes/%s.json' | |
| data = { 'routes': [], 'bikes': [], 'hubs': [], 'total_distance': 0, 'total_distance_km': 0 } | |
| def get_data(page=1): | |
| """Main function to fetch the data from the API""" | |
| thisurl = '%s?page=%s' % (routes_url, page) | |
| print('Requesting %s...' % (thisurl)) | |
| response = requests.get(thisurl, auth=auth) | |
| print('Response code: %s' % (response.status_code)) | |
| if response.status_code == 200: | |
| routes_obj = response.json() | |
| items = routes_obj['items'] | |
| if not items: | |
| print('There are no more routes to save.') | |
| else: | |
| print('There %s %s route%s in this page.' % ( | |
| len(items), | |
| 'is' if len(items) == 1 else 'are', | |
| '' if len(items) == 1 else 's', | |
| ) | |
| ) | |
| for item in items: | |
| template = { | |
| 'distance': item['distance'], # distance is miles | |
| 'from_hub_id': item['started_inside_hub_id'], | |
| 'from_hub_address': lookup_hub_address(item['started_inside_hub_id']), | |
| 'to_hub_id': item['finished_inside_hub_id'], | |
| 'to_hub_address': lookup_hub_address(item['finished_inside_hub_id']), | |
| 'start_time': item['start_time'], | |
| 'finish_time': item['finish_time'], | |
| 'first_location_address': item['first_location_address'], | |
| 'bike_id': item['bike_id'], | |
| 'bike_name': lookup_bike_name(item['bike_id']), | |
| } | |
| # add this route to the list of routes | |
| data['routes'].append(template) | |
| save_data() | |
| # now call get_data() again with the next page. Yay recursion! | |
| page = routes_obj['current_page'] + 1 | |
| get_data(page=page) | |
| # now calculate the total distance | |
| data['total_distance'] = sum_distance(data['routes']) | |
| # and total distance in kilometres | |
| data['total_distance_km'] = convert_miles_to_km(data['total_distance']) | |
| # save the data again | |
| save_data() | |
| print('All done.') | |
| def to_float(val): | |
| """Tries to convert a string into a number""" | |
| try: | |
| return float(val) | |
| except ValueError: | |
| return 0 | |
| def distance(route): | |
| return to_float(route["distance"]) | |
| def save_data(): | |
| """Saves the data dict as a JSON file""" | |
| with open(path, 'w') as myfile: | |
| myfile.write(json.dumps(data)) | |
| def sum_distance(routes): | |
| """Sums all the distances in a list of routes""" | |
| return round(sum(distance(route) for route in routes), 2) | |
| def convert_miles_to_km(miles): | |
| """Converts miles to kilometres""" | |
| return round(miles * 1.60934, 2) | |
| def lookup_bike_name(bike_id): | |
| """Returns the bike name for a bike id. First checks to see if bike name is saved locally in data['bikes'], then looks it up from the web API and saves result locally for future reference.""" | |
| if not bike_id: # just in case it has a None value | |
| return '' | |
| bike_name = '' # initialize | |
| bike_match = [bike['bike_name'] for bike in data['bikes'] if bike['bike_id'] == bike_id] | |
| if len(bike_match) == 0: | |
| this_bike_url = bike_url % (bike_id) | |
| print('Requesting %s...' % (this_bike_url)) | |
| response = requests.get(this_bike_url, auth=auth) | |
| print('Response code: %s' % (response.status_code)) | |
| if response.status_code == 200: | |
| bike_obj = response.json() | |
| bike_name = bike_obj['name'] | |
| print('API response: bike_id %s has name %s.' % (bike_id, bike_name)) | |
| data['bikes'].append({'bike_id': bike_id, 'bike_name': bike_name}) | |
| print("Bike added to data['bikes'].") | |
| else: # some other status code | |
| print('API returned HTTP status code %s.' % (response.status_code)) | |
| else: # bike has been looked up | |
| print("Bike name found in data['bikes']") | |
| bike_name = bike_match[0] | |
| return bike_name | |
| def lookup_hub_address(hub_id): | |
| """Returns the hub address for a hub id. First checks to see if hub address is saved locally in data['hubs'], then looks it up from the web API and saves result locally for future reference.""" | |
| if not hub_id: # don't try to look up the hub address if route terminus was outside a hub | |
| return '' | |
| hub_address = '' # initialize | |
| hub_match = [hub['hub_address'] for hub in data['hubs'] if hub['hub_id'] == hub_id] | |
| if len(hub_match) == 0: | |
| this_hub_url = hub_url % (hub_id) | |
| print('Requesting %s...' % (this_hub_url)) | |
| response = requests.get(this_hub_url, auth=auth) | |
| print('Response code: %s' % (response.status_code)) | |
| if response.status_code == 200: | |
| hub_obj = response.json() | |
| hub_address = hub_obj['address'] | |
| print('API response: hub_id %s has address %s.' % (hub_id, hub_address)) | |
| data['hubs'].append({'hub_id': hub_id, 'hub_address': hub_address}) | |
| print("Hub added to data['hubs'].") | |
| else: # some other status code | |
| print('API returned HTTP status code %s.' % (response.status_code)) | |
| else: # hub has been looked up | |
| print("Hub address found in data['hubs']") | |
| hub_address = hub_match[0] | |
| return hub_address | |
| if __name__ == '__main__': | |
| get_data() | |
Feature ideas:
- Specify username, password and path at runtime
- Multiple output options (JSON, XML, CSV, Excel)
- Abstract screen-printing into a logging function
Quick preliminary code for CSV export:
import csv
fields = [key for key in data['routes'][0].keys()]
with open('data.csv', 'w') as myfile:
csv_write = csv.writer(myfile)
csv_write.writerow(fields)
for row in obj['routes']:
csv_write.writerow([row[field] for field in fields])
Plus adding the bikes, hubs and totals, of course.
Code to export to xslx format is pretty similar. Could check to see if openpyxl is installed.
line 69: Do if not items
line 114: You're doing two float conversions per loop, one in is_number and one in the iterator. I'd do:
round(sum(to_float(route['distance']) for route in routes), 2)
and define:
def to_float(val):
try:
return float(val)
except ValueError:
return 0
line 122: Do if not bike_id
line 125: Do if not bike_match
line 141: This will raise an exception if you get to line 137, because bike_name won't be defined
line 145: Do if not hub_id (unless hub_id is allowed to be 0)
line 148: if not hub_match
line 164: This will raise an exception if you get to line 160
I'd also stop doing the recursion and turn that function into a loop, but that's just me :)
Excellent suggestions, thanks for taking the time to share them! I've updated the script to incorporate the fixes (and I also noticed that I misspelled "response" on line 169).
Also you can slightly simplify line 121
Instead of
return round(sum([to_float(route['distance']) for route in routes]), 2)
do
return round(sum(to_float(route['distance']) for route in routes), 2)
Many functions (like sum()) that take a list/iterable as their argument can instead be passed a generator expression, like I showed. See https://www.python.org/dev/peps/pep-0289/#id14 for more details
And in fact, I'd simplify that further to
return round(sum(distance(route) for route in routes), 2)
and elsewhere define
def distance(route):
return to_float(route["distance"])
The More You Know™! Thanks again, I've made the change.
What is the hair-pulling option for authentication?
When I get a chance I'll adapt this into a library rather than just a kludgey standalone script.