Last active
April 21, 2018 04:39
-
-
Save sergiolucero/e88bd6306e9efaf2e3cbf3e468851555 to your computer and use it in GitHub Desktop.
scraping CityBikes (using python3!! pybikes is Python 2!!)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import sqlite3 | |
| import requests | |
| import time | |
| base_url = 'http://api.citybik.es' | |
| list_tail = '/v2/networks/?fields=id,name,href' | |
| nets = requests.get(base_url+list_tail).json()['networks'] # retrieves system list | |
| print('FOUND: %d networks' %len(nets)) | |
| def scrape_some(): # YOU CAN DASK ME!! Original version ran in 222 secs (April 21st) | |
| station_sum = 0 | |
| SCRAPING_TIME = time.ctime() # FOR SCRAPING_REFERENCE | |
| t0 = time.time() | |
| for nix, net in enumerate(nets): | |
| out = pd.DataFrame() | |
| url = base_url+net['href'] | |
| netdata = requests.get(url).json()['network'] | |
| nStations = len(netdata['stations']) | |
| station_sum += nStations | |
| if nix%10==5: | |
| print('[%d/%d](%d secs) [%s:%s] has %d stations' \ | |
| %(nix, len(nets), time.time()-t0, net['id'],net['name'],nSt$ | |
| unk_dict = {'loc': 'unknown_%d' %nix} | |
| location = netdata.get('location', unk_dict) # STORE MICH AUCH | |
| for stat in netdata['stations']: | |
| out = out.append(pd.DataFrame(stat)) | |
| #'name': '08 Plac Mickiewicza', 'timestamp': '2018-03-29T05:44:... | |
| for var in location.keys(): | |
| full_var = 'system_{}'.format(var) if var in ['latitude', 'long$ | |
| out[full_var] = location[var] | |
| if 'extra' in out: out['extra'].apply(str) | |
| out['nStations'] = nStations | |
| out['total_bikes'] = sum(out.free_bikes) if 'free_bikes' in out els$ | |
| out['my_time'] = SCRAPING_TIME | |
| out['total_slots'] = sum(out.empty_slots) if 'empty_slots' in o$ | |
| out.to_sql('citybikes',sqlite3.connect('bikes.db'),if_exists='append',index=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment