Skip to content

Instantly share code, notes, and snippets.

@saeedesmaili
Last active January 30, 2019 14:50
Show Gist options
  • Save saeedesmaili/50c3992616dc63129146f08257ee20c9 to your computer and use it in GitHub Desktop.
Save saeedesmaili/50c3992616dc63129146f08257ee20c9 to your computer and use it in GitHub Desktop.
import pandas as pd
df = pd.read_csv('rides.csv')
# Hourly
df_tehran = df.copy()[df.city == "Tehran"]
df_tehran['timestamp'] = pd.to_datetime(df_tehran['timestamp'])
df_tehran_g = df_tehran.groupby([df_tehran.timestamp.dt.hour, df_tehran.service], as_index=True).agg({
'price_per_km': 'mean'
})
df_tehran_g.reset_index(inplace=True)
df_snapp = df_tehran_g[df_tehran_g.service == "Snapp"][['timestamp', 'price_per_km']]
df_snapp.columns = ['timestamp', 'snapp_price']
df_tap30 = df_tehran_g[df_tehran_g.service == "Tap30"][['timestamp', 'price_per_km']]
df_tap30.columns = ['timestamp', 'tap30_price']
df_tehran_g = pd.merge(df_snapp, df_tap30)
snapp_mean = df_tehran[df_tehran.service == "Snapp"].price_per_km.mean()
tap30_mean = df_tehran[df_tehran.service == "Tap30"].price_per_km.mean()
trace1 = go.Scatter(
x = df_tehran_g.timestamp,
y = df_tehran_g.snapp_price,
mode = 'lines+markers',
name = 'Snapp - avg: {0:.0f}'.format(round(snapp_mean,0)),
line = dict(
color = ('#00ADB5'),
width = 2)
)
trace2 = go.Scatter(
x = df_tehran_g.timestamp,
y = df_tehran_g.tap30_price,
mode = 'lines+markers',
name = 'Tap30 - avg: {0:.0f}'.format(round(tap30_mean,0)),
line = dict(
color = ('#000000'),
width = 2)
)
data = [trace1, trace2]
layout = dict(title = 'Hourly price trend - Tehran<br><span style="font-size: 1.5rem">rides: {:,.0f}</span>'.format(df_tehran.timestamp.count()/2),
xaxis = dict(title = 'Hour', showticklabels=True, dtick=1),
yaxis = dict(title = 'Price (per km)'),
legend=dict(orientation="h", x=-.1, y=1.2)
)
fig = dict(data=data, layout=layout)
iplot(fig)
# weekday
df_tehran = df.copy()[df.city == "Tehran"]
df_tehran['timestamp'] = pd.to_datetime(df_tehran['timestamp'])
df_tehran_g = df_tehran.groupby([df_tehran.timestamp.dt.weekday, df_tehran.service], as_index=True).agg({
'price_per_km': 'mean'
})
df_tehran_g.reset_index(inplace=True)
df_snapp = df_tehran_g[df_tehran_g.service == "Snapp"][['timestamp', 'price_per_km']]
df_snapp.columns = ['timestamp', 'snapp_price']
df_tap30 = df_tehran_g[df_tehran_g.service == "Tap30"][['timestamp', 'price_per_km']]
df_tap30.columns = ['timestamp', 'tap30_price']
df_tehran_g = pd.merge(df_snapp, df_tap30)
sorter = [5, 6, 0, 1, 2, 3, 4]
sorterIndex = dict(zip(sorter,range(len(sorter))))
df_tehran_g['rank'] = df_tehran_g['timestamp'].map(sorterIndex)
df_tehran_g.sort_values(by='rank', inplace=True)
df_tehran_g.timestamp = ['Sat', 'Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri']
trace1 = go.Scatter(
x = df_tehran_g.timestamp,
y = df_tehran_g.snapp_price,
mode = 'lines+markers',
name = 'Snapp',
line = dict(
color = ('#00ADB5'),
width = 2)
)
trace2 = go.Scatter(
x = df_tehran_g.timestamp,
y = df_tehran_g.tap30_price,
mode = 'lines+markers',
name = 'Tap30',
line = dict(
color = ('#000000'),
width = 2)
)
data = [trace1, trace2]
layout = dict(title = 'Daily price trend - Tehran<br><span style="font-size: 1rem">rides: {:,.0f}</span>'.format(df_tehran.timestamp.count()/2),
xaxis = dict(title = 'Day of the week', showticklabels=True, dtick=1,),
yaxis = dict(title = 'Price (per km)'),
legend=dict(orientation="h", x=-.1, y=1.2),
width = 900
)
fig = dict(data=data, layout=layout)
iplot(fig)
## surged:
df_surged = df.groupby(['city', 'is_surged', 'service'], as_index=False).count()[['city', 'service', 'is_surged', 'counts']]
sorter = ["Tehran", "Karaj", "Tabriz", "Urmia"]
sorterIndex = dict(zip(sorter,range(len(sorter))))
df_surged['rank'] = df_surged['city'].map(sorterIndex)
df_surged.sort_values(by=['rank', 'service'], inplace=True)
del df_surged['rank']
df_g2 = df_surged.groupby(['city', 'service'], as_index=False).sum()[['city', 'service', 'counts']]
df_merge = pd.merge(df_surged, df_g2, on=['city', 'service'],how='left')
df_merge['surge_rate'] = df_merge.counts_x / df_merge.counts_y
del df_merge['counts_x']
del df_merge['counts_y']
df_merge['surge_rate'] = pd.Series(["{0:.2f}%".format(val * 100) for val in df_merge['surge_rate']], index = df_merge.index)
df_surged_g = df_surged.groupby(['service', 'is_surged'], as_index=False).sum()
df_surged_g['label'] = ['Snapp - normal', 'Snapp - surged', 'Tap30 - normal', 'Tap30 - surged']
fig = {
'data': [
{
'labels': df_surged_g[df_surged_g.service == "Snapp"].label.tolist(),
'values': df_surged_g[df_surged_g.service == "Snapp"].counts.tolist(),
'marker': {'colors': ['#00ADB5',
'#006064',
]},
'type': 'pie',
'name': 'Sunflowers',
'domain': {'y': [.52, 1],
'x': [0, 1]},
'hoverinfo':'label+percent+name',
},
{
'labels': df_surged_g[df_surged_g.service == "Tap30"].label.tolist(),
'values': df_surged_g[df_surged_g.service == "Tap30"].counts.tolist(),
'type': 'pie',
'name': 'Starry Night',
'marker': {'colors': ['#6d6d6d',
'#000000',
]},
'domain': {'y': [0, .48],
'x': [0, 1]},
'hoverinfo':'label+percent+name',
},
],
'layout': {'title': 'Surged Rate<br><span style="font-size: 1rem">rides: {:,.0f}</span>'.format(df_tehran.timestamp.count()/2),
'font': {'size': 18},
'legend': {'font': {'size': 12}},
'width': 500,
'height': 800
}
}
iplot(fig, filename='pie_chart_subplots')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment