This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Function to plot effectiveness of a player | |
def plot_effectiveness(player): | |
pw = tennis_df[(tennis_df['winner_name'] == player)].groupby(['tourney_year','surface'], as_index=False).agg(['count']) | |
pww = pw['tourney_id'].reset_index() | |
pl = tennis_df[(tennis_df['loser_name'] == player)].groupby(['tourney_year','surface'], as_index=False).agg(['count']) | |
pll = pl['tourney_id'].reset_index() | |
pww.columns = ['tourney_year','surface','wins'] | |
pll.columns = ['tourney_year','surface','loses'] | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create dataframe with age of winners filtering Grand Slam finals | |
tennis_df_win=tennis_df[tennis_df['tourney_level'].isin(['G'])&(tennis_df['round']=='F')].dropna(subset=['winner_age']) | |
dfw = tennis_df_win[['tourney_year','tourney_name','winner_name','winner_age']] | |
dfw.columns = ['tourney_year','tourney_name','player','age'] | |
dfs_final = dfw.groupby(['tourney_year','tourney_name']).agg({'age':'mean'}).reset_index() | |
dfs_final_2 = dfs_final.groupby(['tourney_year']).agg({'age':'mean'}).reset_index() | |
fig = plt.figure(figsize=(15,5)) | |
ax = fig.add_subplot(111) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Extract retirements | |
ret_df = tennis_df[tennis_df['score'].str.contains("RET")==True][['tourney_year','tourney_level','surface','tourney_id','winner_name']] | |
ref_df_f = ret_df.groupby(['tourney_year','surface'], as_index=False).agg('count') | |
fig = plt.figure(figsize=(15,5)) | |
ax = fig.add_subplot(111) | |
plt.style.use('seaborn-colorblind') | |
plt.title('Retirements - Evolution of Retirements by Surface') | |
plt.ylabel('Number of Retirements') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ret_df_f.columns = ['tourney_year','surface','rets'] | |
notret_df = tennis_df[tennis_df['score'].str.contains("RET")==False][['tourney_year','surface','tourney_id']] | |
notret_df_f = notret_df.groupby(['tourney_year','surface'], as_index=False).agg('count')[['tourney_year','surface','tourney_id']] | |
notret_df_f.columns = ['tourney_year','surface','norets'] | |
dfs = (ret_df_f, notret_df_f) | |
dfs_concat = pd.concat(dfs) | |
dfs_c = dfs_concat.fillna(0).groupby(['tourney_year','surface']).agg({'rets':'sum','norets':'sum'}).reset_index() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
h2h_wl = tennis_df_all.groupby(['winner_name','loser_name']).agg({'tourney_id':'count','tourney_year':'max'}).reset_index() | |
h2h_wl.columns = ['player_a','player_b','total','year'] | |
h2h_lw = tennis_df_all.groupby(['loser_name','winner_name']).agg({'tourney_id':'count','tourney_year':'max'}).reset_index() | |
h2h_lw.columns = ['player_a','player_b','total','year'] | |
h2h_f = h2h_wl.merge(h2h_lw, on=['player_a', 'player_b']) | |
h2h_f['total'] = h2h_f['total_x'] + h2h_f['total_y'] | |
h2h_f['player_a'] = np.where(h2h_f['player_a'] < h2h_f['player_b'], h2h_f['player_a'], h2h_f['player_b']) | |
h2h_f['player_b'] = np.where(h2h_f['player_a'] > h2h_f['player_b'], h2h_f['player_a'], h2h_f['player_b']) | |
h2h_f['year'] = np.where(h2h_f['year_x'] > h2h_f['year_y'], h2h_f['year_x'], h2h_f['year_y']) |
NewerOlder