Skip to content

Instantly share code, notes, and snippets.

@sergiolucero
Last active April 21, 2018 04:39
Show Gist options
  • Save sergiolucero/e88bd6306e9efaf2e3cbf3e468851555 to your computer and use it in GitHub Desktop.
Save sergiolucero/e88bd6306e9efaf2e3cbf3e468851555 to your computer and use it in GitHub Desktop.
scraping CityBikes (using python3!! pybikes is Python 2!!)
import pandas as pd
import sqlite3
import requests
import time
base_url = 'http://api.citybik.es'
list_tail = '/v2/networks/?fields=id,name,href'
nets = requests.get(base_url+list_tail).json()['networks'] # retrieves system list
print('FOUND: %d networks' %len(nets))
def scrape_some(): # YOU CAN DASK ME!! Original version ran in 222 secs (April 21st)
station_sum = 0
SCRAPING_TIME = time.ctime() # FOR SCRAPING_REFERENCE
t0 = time.time()
for nix, net in enumerate(nets):
out = pd.DataFrame()
url = base_url+net['href']
netdata = requests.get(url).json()['network']
nStations = len(netdata['stations'])
station_sum += nStations
if nix%10==5:
print('[%d/%d](%d secs) [%s:%s] has %d stations' \
%(nix, len(nets), time.time()-t0, net['id'],net['name'],nSt$
unk_dict = {'loc': 'unknown_%d' %nix}
location = netdata.get('location', unk_dict) # STORE MICH AUCH
for stat in netdata['stations']:
out = out.append(pd.DataFrame(stat))
#'name': '08 Plac Mickiewicza', 'timestamp': '2018-03-29T05:44:...
for var in location.keys():
full_var = 'system_{}'.format(var) if var in ['latitude', 'long$
out[full_var] = location[var]
if 'extra' in out: out['extra'].apply(str)
out['nStations'] = nStations
out['total_bikes'] = sum(out.free_bikes) if 'free_bikes' in out els$
out['my_time'] = SCRAPING_TIME
out['total_slots'] = sum(out.empty_slots) if 'empty_slots' in o$
out.to_sql('citybikes',sqlite3.connect('bikes.db'),if_exists='append',index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment