Created
October 26, 2018 00:20
-
-
Save flxai/b6150189d02b472fa02f288f7c7e3dec to your computer and use it in GitHub Desktop.
Download and plot seatfinder data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import os | |
import pandas as pd | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
BASE_URL = 'https://seatfinder.bibliothek.kit.edu/kassel/getdata.php?location%5B0%5D=UBA0EP,UBA1EP,UBA2EP,UBA3EP,UBB0EP,UBB0GP,UBB1EP,UBB1GP,UBB2EP,UBB2GP,LeoEG,LeoOG&values%5B0%5D=manualcount,seatestimate&after%5B0%5D=-1year&before%5B0%5D=now' | |
DELIMITER = ';' | |
FILE_MANUAL_COUNT = 'seatfinder_manual_count.csv' | |
FILE_SEAT_ESTIMATE = 'seatfinder_seat_estimate.csv' | |
PLOT_X_INCHES = 20 | |
PLOT_Y_INCHES = 40 | |
def parse_seatfinder_json(snippet): | |
"""Parses a hierarchical json snippet from seatfinder and returns a list of records.""" | |
ret = [['location_name', 'timestamp', 'occupied_seats', 'free_seats']] | |
for k1, v1 in snippet.items(): | |
for e in v1: | |
ret.append([ | |
e['location_name'], | |
e['timestamp']['date'], | |
str(e['occupied_seats']), | |
str(e['free_seats']), | |
]) | |
return ret | |
def save_csv(snippet, file_name): | |
data = parse_seatfinder_json(snippet) | |
csv = '\n'.join([';'.join(e) for e in data]) | |
with open(file_name, 'w') as text_file: | |
text_file.write(csv) | |
def download_seatfinder_data(): | |
r = requests.get(BASE_URL) | |
data = r.json()[0] | |
save_csv(data['manualcount'], FILE_MANUAL_COUNT) | |
save_csv(data['seatestimate'], FILE_SEAT_ESTIMATE) | |
# Download data if not found | |
for file_name in [FILE_MANUAL_COUNT, FILE_SEAT_ESTIMATE]: | |
if not os.path.isfile(file_name): | |
download_seatfinder_data() | |
break | |
# Load data | |
df_manual_count = pd.read_csv(FILE_MANUAL_COUNT, delimiter=DELIMITER, parse_dates=['timestamp']) | |
df_seat_estimate = pd.read_csv(FILE_SEAT_ESTIMATE, delimiter=DELIMITER, parse_dates=['timestamp']) | |
locations = df_manual_count.location_name.unique() | |
# Create subplots with shared x axis | |
fig, axes = plt.subplots(len(locations), sharex=True, sharey=True) | |
fig.set_size_inches(PLOT_X_INCHES, PLOT_Y_INCHES) | |
fig.suptitle('Estimated counts') | |
# Save this for later limits | |
dates = pd.to_datetime(df_seat_estimate.timestamp, format='%Y-%m-%d %H:%M:%S.%f') | |
colors = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"] | |
for i, location in enumerate(locations): | |
ax = axes[i] | |
color1 = colors[i % len(colors)] | |
color2 = colors[(i + 1) % len(colors)] | |
df_manual_count_filtered = df_manual_count[df_manual_count['location_name'] == location] | |
# Plot estimated count as connected lines | |
df_seat_estimate_filtered = df_seat_estimate[df_seat_estimate['location_name'] == location] | |
g1 = sns.lineplot(x='timestamp', y='occupied_seats', color=color1, data=df_seat_estimate_filtered, ax=ax) | |
g1.set_xlabel('Date/Time') | |
g1.set_ylabel('Occupied seats') | |
# Plot manual count as dots | |
g2 = sns.scatterplot(x='timestamp', y='occupied_seats', color=color2, data=df_manual_count_filtered, ax=ax) | |
g2.set_xlabel('Date/Time') | |
g2.set_ylabel('Occupied seats') | |
ax.set_xlim(dates.min(), dates.max()) | |
# Uncomment to view interactively | |
# plt.show() | |
plt.savefig('seatfinder') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment