Instantly share code, notes, and snippets.
dottyz
/ story_bike_share_analyze_7.py
Created
May 2, 2019 18:39
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig, axes = plt.subplots(1, 2, figsize=(20, 7)) | |
for ax, m in zip(axes, ['January', 'July']): | |
sns.pointplot(x='Date', y='Id', data=ridership[(ridership['Month']==m)&(ridership['User Type']=='Member')].groupby('Date')['Id'].sum().reset_index(), ax=ax, markers='') | |
ax.set_title(m) | |
ax.set_ylabel('Total Daily Trips') | |
ax.set_xticklabels(ax.get_xticklabels(), rotation=45) |
dottyz
/ story_bike_share_analyze_6.py
Created
May 2, 2019 18:39
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
p = sns.relplot(x='Date', y='Id', hue='User Type', data=ridership, kind='line', height=9, aspect=16/9.) | |
p.ax.set_ylabel('Total Daily Trips') | |
# Limit the number of labels shown on the x-axis to only show the first day of each month | |
p.ax.set_xticklabels([x.set_text('') if not x.get_text().endswith('-01') else x for x in p.ax.get_xticklabels()], rotation=45) |
dottyz
/ story_bike_share_analyze_5.py
Created
May 2, 2019 18:38
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Aggregate the ridership by date fields and user type and count the number of unique trips | |
ridership = df.groupby(['Date', 'Quarter', 'Month', 'Day of Week', 'User Type'])['Id']\ | |
.nunique().reset_index().sort_values('User Type', ascending=False) |
dottyz
/ story_bike_share_analyze_4.py
Created
May 2, 2019 18:38
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig, (ax1, ax2) = plt.subplots(2, 2, figsize=(15, 15)) | |
for i, col in enumerate(['Duration', 'Distance']): | |
# Graph the distribution plots (seaborn doesn't allow default separation for distplot) | |
for j, user in enumerate(df['User Type'].unique()): | |
sns.distplot(df[df['User Type']==user][col], hist=False, ax=ax2[i]) | |
# Graph the boxplots | |
sns.boxplot(x=col, y='User Type', data=df, ax=ax1[i]) | |
ax1[i].set_xlabel('') |
dottyz
/ story_bike_share_analyze_3.py
Created
May 2, 2019 18:37
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Extract the unique routes from the data with their start and end coordinates | |
maps = df[['Route Id', 'Station Id From', 'Lat From', 'Lon From', 'Station Id To', 'Lat To', 'Lon To']].drop_duplicates() | |
maps['Distance'] = maps.apply(lambda x: distance.distance((x['Lat From'], x['Lon From']), (x['Lat To'], x['Lon To'])).km * 1000, axis=1) | |
# Merge the distance calculation with the main DataFrame | |
df = df.merge(maps[['Route Id', 'Distance']], how='left', on='Route Id') | |
df = df[[x for x in df.columns if not 'From' in x and not 'To' in x and x != 'Route Id']] |
dottyz
/ story_bike_share_analyze_2.py
Created
May 2, 2019 18:36
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Clean up column names for ease of use | |
df.columns = [' '.join(x.replace('trip_', '').replace('_seconds', '').split('_')).title() for x in df.columns] | |
df['Start Time'] = pd.to_datetime(df['Start Time']) | |
df['Date'] = df['Start Time'].apply(lambda x: x.strftime('%Y-%m-%d')) | |
df['Quarter'] = df['Start Time'].apply(lambda x: int((int(x.strftime('%m')) - 1) / 3) + 1) | |
df['Month'] = df['Start Time'].apply(lambda x: x.strftime('%B')).astype(month_type) | |
df['Day of Week'] = df['Start Time'].apply(lambda x: x.strftime('%a')).astype(day_type) | |
df['Hour'] = df['Start Time'].apply(lambda x: x.strftime('%H')) |
dottyz
/ story_bike_share_analyze_1.py
Created
May 2, 2019 18:36
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime as dt | |
import re | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from geopy import distance | |
from pandas.api.types import CategoricalDtype |
dottyz
/ story_bike_share_clean_8.py
Created
May 2, 2019 18:35
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df.to_csv('./data/bikeshare_ridership.csv', index=False) |
dottyz
/ story_bike_share_clean_7.py
Created
May 2, 2019 18:34
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Removing false start trips | |
df = df[(df['trip_duration_seconds']>=60)] | |
# Removing outliers | |
q1 = df['trip_duration_seconds'].quantile(0.25) | |
q3 = df['trip_duration_seconds'].quantile(0.75) | |
interquartile_range = q3 - q1 | |
df = df[~((df['trip_duration_seconds'] < (q1 - 1.5 * interquartile_range)) \ |
dottyz
/ story_bike_share_clean_6.py
Created
May 2, 2019 18:32
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig, ax = plt.subplots(1, 1, figsize=(16, 9)) | |
sns.distplot(df['trip_duration_seconds'], hist=False, ax=ax) |