Instantly share code, notes, and snippets.
dottyz
/ story_bike_share_analyze_3.py
Created
May 2, 2019 18:37
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Extract the unique routes from the data with their start and end coordinates | |
maps = df[['Route Id', 'Station Id From', 'Lat From', 'Lon From', 'Station Id To', 'Lat To', 'Lon To']].drop_duplicates() | |
maps['Distance'] = maps.apply(lambda x: distance.distance((x['Lat From'], x['Lon From']), (x['Lat To'], x['Lon To'])).km * 1000, axis=1) | |
# Merge the distance calculation with the main DataFrame | |
df = df.merge(maps[['Route Id', 'Distance']], how='left', on='Route Id') | |
df = df[[x for x in df.columns if not 'From' in x and not 'To' in x and x != 'Route Id']] |
dottyz
/ story_bike_share_analyze_4.py
Created
May 2, 2019 18:38
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig, (ax1, ax2) = plt.subplots(2, 2, figsize=(15, 15)) | |
for i, col in enumerate(['Duration', 'Distance']): | |
# Graph the distribution plots (seaborn doesn't allow default separation for distplot) | |
for j, user in enumerate(df['User Type'].unique()): | |
sns.distplot(df[df['User Type']==user][col], hist=False, ax=ax2[i]) | |
# Graph the boxplots | |
sns.boxplot(x=col, y='User Type', data=df, ax=ax1[i]) | |
ax1[i].set_xlabel('') |
dottyz
/ story_bike_share_analyze_5.py
Created
May 2, 2019 18:38
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Aggregate the ridership by date fields and user type and count the number of unique trips | |
ridership = df.groupby(['Date', 'Quarter', 'Month', 'Day of Week', 'User Type'])['Id']\ | |
.nunique().reset_index().sort_values('User Type', ascending=False) |
dottyz
/ story_bike_share_analyze_6.py
Created
May 2, 2019 18:39
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
p = sns.relplot(x='Date', y='Id', hue='User Type', data=ridership, kind='line', height=9, aspect=16/9.) | |
p.ax.set_ylabel('Total Daily Trips') | |
# Limit the number of labels shown on the x-axis to only show the first day of each month | |
p.ax.set_xticklabels([x.set_text('') if not x.get_text().endswith('-01') else x for x in p.ax.get_xticklabels()], rotation=45) |
dottyz
/ story_bike_share_analyze_7.py
Created
May 2, 2019 18:39
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig, axes = plt.subplots(1, 2, figsize=(20, 7)) | |
for ax, m in zip(axes, ['January', 'July']): | |
sns.pointplot(x='Date', y='Id', data=ridership[(ridership['Month']==m)&(ridership['User Type']=='Member')].groupby('Date')['Id'].sum().reset_index(), ax=ax, markers='') | |
ax.set_title(m) | |
ax.set_ylabel('Total Daily Trips') | |
ax.set_xticklabels(ax.get_xticklabels(), rotation=45) |
dottyz
/ story_bike_share_analyze_8.py
Created
May 2, 2019 18:40
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig, axes = plt.subplots(2, 2, figsize=(15, 15)) | |
# Flatten the 2D axes array for ease of looping | |
axes = np.array(axes).flatten() | |
# Prepare the month description titles for each quarter | |
quarter_names = ['Jan. - Mar.', 'Apr. - Jun.', 'Jul. - Sept.', 'Oct. - Dec.'] | |
for q, ax in zip(sorted(ridership['Quarter'].unique()), axes): | |
ax.set_title(quarter_names[(q-1)]) |
dottyz
/ story_bike_share_analyze_9.py
Created
May 2, 2019 18:41
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig, axes = plt.subplots(1, 3, figsize=(18, 6)) | |
axes = np.array(axes).flatten() | |
for m, ax in zip(ridership[ridership['Quarter']==3]['Month'].unique(), axes): | |
ax.set_title(m) | |
ax.set_ylim(0, 7000) | |
ax.set_ylabel('Average Daily Trips') | |
sns.barplot( | |
x='Day of Week', | |
y='Id', |
dottyz
/ story_bike_share_analyze_10.py
Created
May 2, 2019 18:41
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data = df.groupby(['Date', 'Hour', 'User Type'])['Id'].nunique().groupby(['Hour', 'User Type']).mean().reset_index() | |
fig, ax = plt.subplots(figsize=(16, 9)) | |
sns.barplot(x='Hour', y='Id', hue='User Type', data=data, ax=ax) | |
ax.set_ylabel('Average Hourly Trips') |
dottyz
/ story_bike_share_analyze_11.py
Last active
May 3, 2019 14:22
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import the weather data and drop the first 22 rows (containing descriptions of the weather station) | |
weather = pd.read_csv('./data/weather.csv', header=22) | |
# Remove units contained in the column names (eg. Celcius, mm, etc.) | |
weather.columns = [re.sub(r'\([^()]*\)', '', x).strip() if x != 'Date/Time' else 'Date' for x in weather.columns] | |
data = df.groupby(['Date', 'User Type'])['Id'].nunique().to_frame().pivot_table(index='Date', columns='User Type').reset_index() | |
data.columns = ['Date', 'Casual Trips', 'Member Trips'] | |
data = data.merge(weather[['Date', 'Mean Temp', 'Total Precip']], on='Date', how='inner') |
dottyz
/ story_bike_share_analyze_12.py
Created
May 2, 2019 18:43
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig, ax = plt.subplots(figsize=(16, 9)) | |
ax2 = ax.twinx() # Create the twin axis to enable display of ridership and temperature on the same graph | |
palette = sns.color_palette() # Get the default color palette | |
for i, user_type in enumerate(['Casual Trips', 'Member Trips']): | |
sns.lineplot(x='Date', y=user_type, data=data, ax=ax, color=palette[i], markers='') | |
sns.pointplot(x='Date', y='Mean Temp', data=data, ax=ax2, color=palette[2], markers='x') |