matmoody · April 30, 2016 17:03
diff --git a/citibike.py b/citibike.py
 import collections
 import matplotlib.pyplot as plt
 import pandas as pd
 import requests
 import sqlite3 as lite
 import time
 from pandas.io.json import json_normalize
 from dateutil.parser import parse 

 %matplotlib inline


 r = requests.get('http://www.citibikenyc.com/stations/json')

 #unique list of keys for each station listing
 key_list = []
 for station in r.json()['stationBeanList']:
    for k in station.keys():
        if k not in key_list:
            key_list.append(k)


 df = json_normalize(r.json()['stationBeanList'])

 con = lite.connect('citi_bike.db')
 cur = con.cursor()

 with con:
    cur.execute('CREATE TABLE citibike_reference (id INT PRIMARY KEY, totalDocks INT, city TEXT, altitude INT, stAddress2 TEXT, longitude NUMERIC, postalCode TEXT, testStation TEXT, stAddress1 TEXT, stationName TEXT, landMark TEXT, latitude NUMERIC, location TEXT )')

 #extract the column from the DataFrame and put them into a list
 station_ids = df['id'].tolist() 

 #add the '_' to the station name and also add the data type for SQLite
 station_ids = ['_' + str(x) + ' INT' for x in station_ids]

 #create the table
 #in this case, we're concatenating the string and joining all the station ids (now with '_' and 'INT' added)
 with con:
    cur.execute("CREATE TABLE available_bikes ( execution_time INT, " +  ", ".join(station_ids) + ");")

 #take the string and parse it into a Python datetime object
 exec_time = parse(r.json()['executionTime'])

 for i in range(60):
    r = requests.get('http://www.citibikenyc.com/stations/json')
    exec_time = parse(r.json()['executionTime']).strftime("%s")

    cur.execute('INSERT INTO available_bikes (execution_time) VALUES (?)', (exec_time,))

    for station in r.json()['stationBeanList']:
        cur.execute("UPDATE available_bikes SET _%d = %d WHERE execution_time = %s" % (station['id'], station['availableBikes'], exec_time))
    con.commit()

    time.sleep(60)

 con.close()

 df = pd.read_sql_query("SELECT * FROM available_bikes ORDER BY execution_time", con, index_col='execution_time')

 hour_change = collections.defaultdict(int)

 for col in df.columns:
    station_vals = df[col].tolist()
    # strip the "_"
    station_id = col[1:]
    station_change = 0
    for k,v in enumerate(station_vals):
        if k < len(station_vals) - 1:
            station_change += abs(station_vals[k] - station_vals[k+1])
    # convert station id back to int
    hour_change[int(station_id)] = station_change
    

 def keywithmaxval(d):
    """Find key with greatest value"""
    return max(d, key=lambda k: d[k])

 # assing max key to max_station
 max_station = keywithmaxval(hour_change)

 #query sqlite for reference information
 cur.execute("SELECT id, stationname, latitude, longitude FROM citibike_reference WHERE id = ?", (max_station,))
 data = cur.fetchone()
 print "The most active station is station id %s at %s latitude: %s longitude: %s" % data
 print "With %d bicycles coming and going in the hour between %s and %s" % (
    hour_change[max_station],
    datetime.datetime.fromtimestamp(int(df.index[0])).strftime('%Y-%m-%dT%H:%M:%S'),
    datetime.datetime.fromtimestamp(int(df.index[-1])).strftime("%Y-%m-%dT%H:%M:%S"),
 )

 # Visually inspect ^ result.
 plt.bar(hour_change.keys(), hour_change.values())
 plt.show()
	import collections
	import matplotlib.pyplot as plt
	import pandas as pd
	import requests
	import sqlite3 as lite
	import time
	from pandas.io.json import json_normalize
	from dateutil.parser import parse

	%matplotlib inline


	r = requests.get('http://www.citibikenyc.com/stations/json')

	#unique list of keys for each station listing
	key_list = []
	for station in r.json()['stationBeanList']:
	for k in station.keys():
	if k not in key_list:
	key_list.append(k)


	df = json_normalize(r.json()['stationBeanList'])

	con = lite.connect('citi_bike.db')
	cur = con.cursor()

	with con:
	cur.execute('CREATE TABLE citibike_reference (id INT PRIMARY KEY, totalDocks INT, city TEXT, altitude INT, stAddress2 TEXT, longitude NUMERIC, postalCode TEXT, testStation TEXT, stAddress1 TEXT, stationName TEXT, landMark TEXT, latitude NUMERIC, location TEXT )')

	#extract the column from the DataFrame and put them into a list
	station_ids = df['id'].tolist()

	#add the '_' to the station name and also add the data type for SQLite
	station_ids = ['_' + str(x) + ' INT' for x in station_ids]

	#create the table
	#in this case, we're concatenating the string and joining all the station ids (now with '_' and 'INT' added)
	with con:
	cur.execute("CREATE TABLE available_bikes ( execution_time INT, " + ", ".join(station_ids) + ");")

	#take the string and parse it into a Python datetime object
	exec_time = parse(r.json()['executionTime'])

	for i in range(60):
	r = requests.get('http://www.citibikenyc.com/stations/json')
	exec_time = parse(r.json()['executionTime']).strftime("%s")

	cur.execute('INSERT INTO available_bikes (execution_time) VALUES (?)', (exec_time,))

	for station in r.json()['stationBeanList']:
	cur.execute("UPDATE available_bikes SET _%d = %d WHERE execution_time = %s" % (station['id'], station['availableBikes'], exec_time))
	con.commit()

	time.sleep(60)

	con.close()

	df = pd.read_sql_query("SELECT * FROM available_bikes ORDER BY execution_time", con, index_col='execution_time')

	hour_change = collections.defaultdict(int)

	for col in df.columns:
	station_vals = df[col].tolist()
	# strip the "_"
	station_id = col[1:]
	station_change = 0
	for k,v in enumerate(station_vals):
	if k < len(station_vals) - 1:
	station_change += abs(station_vals[k] - station_vals[k+1])
	# convert station id back to int
	hour_change[int(station_id)] = station_change


	def keywithmaxval(d):
	"""Find key with greatest value"""
	return max(d, key=lambda k: d[k])

	# assing max key to max_station
	max_station = keywithmaxval(hour_change)

	#query sqlite for reference information
	cur.execute("SELECT id, stationname, latitude, longitude FROM citibike_reference WHERE id = ?", (max_station,))
	data = cur.fetchone()
	print "The most active station is station id %s at %s latitude: %s longitude: %s" % data
	print "With %d bicycles coming and going in the hour between %s and %s" % (
	hour_change[max_station],
	datetime.datetime.fromtimestamp(int(df.index[0])).strftime('%Y-%m-%dT%H:%M:%S'),
	datetime.datetime.fromtimestamp(int(df.index[-1])).strftime("%Y-%m-%dT%H:%M:%S"),
	)

	# Visually inspect ^ result.
	plt.bar(hour_change.keys(), hour_change.values())
	plt.show()