Skip to content

Instantly share code, notes, and snippets.

#we create a dictionary dict_countries similar to the one we used in the scraping section
#but in this case we need to specify the names considered in the link to download the csv e.g. "SP1" (Spanish League)
dict_countries = {
'Spanish La Liga':'SP1', 'Spanish Segunda Division':'SP2',
'German Bundesliga':'D1',
'German Bundesliga 2':'D2',
'Italian Serie A':'I1',
'Italian Serie B':'I2',
'English Premier League':'E0', 'English League 1':'E2', 'English League 2':'E3',
'French Ligue 1': 'F1', 'French Ligue 2':'F2',
#loop through each league
for league in dict_historical_data:
#picking unique team names inside the historical_data team names
all_teams = dict_historical_data[league]['home_team'].unique().tolist()
#matching betfair names (dict_betfair -> dict_home_name_matching, dict_away_name_matching) with historical data (dict_historical_data -> all_teams)
dict_home_name_matching[league][['teams_matched', 'score']] = dict_home_name_matching[league]['home_team'].apply(lambda x:process.extractOne(x, all_teams, scorer=fuzz.token_set_ratio)).apply(pd.Series)
dict_away_name_matching[league][['teams_matched', 'score']] = dict_away_name_matching[league]['away_team'].apply(lambda x:process.extractOne(x, all_teams, scorer=fuzz.token_set_ratio)).apply(pd.Series)
#Replacing "Historical Data" team names (teams_matched) in betfair dataframes
home_teams = pd.merge(dict_betfair[league], dict_home_name_matching[league], on='home_team',
how='left')[['Dates', 'over2.5', 'btts', 'teams
#df_historical_data: 5 years for h2h analysis (it doesn't have 'HST', 'AST', 'HC', 'AC') Only recent seasons have all columns available
df_historical_data = pd.concat(dict_historical_data, ignore_index=True)
df_historical_data = df_historical_data[['date', 'home_team', 'away_team', 'home_goals', 'away_goals', 'season']]
#df_profile: 2 years (current + last season) for real odds and stats calculation (it has 'HST', 'AST', 'HC', 'AC')
#You can add or omit seasons
seasons = ['19', '20']
df_profile = pd.concat(dict_historical_data, ignore_index=True)
df_profile = df_profile[df_profile['season'].isin(seasons)]
#Betpractice method for average goals
df_home = df_profile.groupby('home_team').mean().round(2)
df_home['home_goals_betpractice'] = df_home['home_goals'] + df_home['away_goals']
df_away = df_profile.groupby('away_team').mean().round(2)
df_away['away_goals_betpractice'] = df_away['home_goals'] + df_away['away_goals']
df_stats = pd.concat([
df_home[['home_goals_betpractice', 'home_shots_target', 'home_corners']],
df_away[['away_goals_betpractice', 'away_shots_target', 'away_corners']],
], axis=1)
#inputs
match_date = '2021-02-14'
dict_filter = {'goals':2.5, 'shots_target':9, 'corners':9}
#-----------------------------------
#concatenate all leagues in dict_betfair
df_betfair = pd.concat(dict_betfair, ignore_index=True)
df_betfair = df_betfair[df_betfair['Dates']==match_date]
#split home and away teams
home = df_betfair['home_team'].to_list()
away = df_betfair['away_team'].to_list()
#inputs
dict_filter_real_odds = {'real_odds_over':2, 'real_odds_btts':2} #at least 50% of chances
#-------
#simplifying formula
calculate_over = lambda x, y : calculate_real_odds(x, y, df_profile, 'Over/Under')
calculate_btts = lambda x, y : calculate_real_odds(x, y, df_profile, 'BTTS')
#aplying formula to calculate reald odds
df_filters['real_odds_over'] = df_filters.apply(lambda x:calculate_over(x['home_team'], x['away_team']), axis=1)
df_filters['real_odds_btts'] = df_filters.apply(lambda x:calculate_btts(x['home_team'], x['away_team']), axis=1)
#filtering teams out based on reald odds
def calculate_real_odds(home_team, away_team, df_profile, market):
"""Calculates the reals odds in a match with data based on selected seasons (df_profile)"""
df_profile = df_profile.assign(total_goals=df_profile['home_goals'] + df_profile['away_goals'])
df_profile['Over/Under'] = np.where(df_profile['total_goals']>2, 'Over 2.5', 'Under 2.5')
df_profile['BTTS'] = np.where((df_profile['home_goals']>0) & (df_profile['away_goals']>0), True, False)
dict_markets = {'BTTS':[True, False], 'Over/Under':['Over 2.5', 'Under 2.5']}
df_real_odds = df_profile[(df_profile['home_team']==home_team)|(df_profile['away_team']==away_team)].groupby(market).count()[['total_goals']]
option1 = df_real_odds.loc[dict_markets[market][0], 'total_goals']#over2.5
option2 = df_real_odds.loc[dict_markets[market][1], 'total_goals']#under2.5
percentage_odds_over = option1 / (option1 + option2)
#picking columns
df_system = df_filters[['home_team', 'away_team', 'goals', 'shorts_target', 'corners',
'real_odds_over', 'over2.5', 'real_odds_btts', 'btts']]
#renaming columns
df_system.rename(columns={'shorts_target':'SoT', 'corners':'C', 'goals':'G', 'over2.5':'OVER',
'btts':'BTTS', 'real_odds_over':'RO_OVER', 'real_odds_btts':'RO_BTTS'}, inplace=True)
#extracting over2.5 and btts=True values
df_system = df_system.assign(OVER = df_system['OVER'].str.extract(r'(.+)\n.+').astype(float))
df_system = df_system.assign(BTTS = df_system['BTTS'].str.extract(r'(.+)\n.+').astype(float))
#calculating value of bets
# % of rows missing in each column
for column in df_netflix_2019.columns:
percentage = df_netflix_2019[column].isnull().mean()
print(f'{column}: {round(percentage*100, 2)}%')
#drop column
df_netflix_2019.drop('director', axis=1)