thepycoach’s gists

thepycoach / valuebet_system.py

Last active February 11, 2021 18:55

	#we create a dictionary dict_countries similar to the one we used in the scraping section
	#but in this case we need to specify the names considered in the link to download the csv e.g. "SP1" (Spanish League)
	dict_countries = {
	'Spanish La Liga':'SP1', 'Spanish Segunda Division':'SP2',
	'German Bundesliga':'D1',
	'German Bundesliga 2':'D2',
	'Italian Serie A':'I1',
	'Italian Serie B':'I2',
	'English Premier League':'E0', 'English League 1':'E2', 'English League 2':'E3',
	'French Ligue 1': 'F1', 'French Ligue 2':'F2',

thepycoach / valuebet_system.py

Last active February 11, 2021 19:00

	#loop through each league
	for league in dict_historical_data:
	#picking unique team names inside the historical_data team names
	all_teams = dict_historical_data[league]['home_team'].unique().tolist()
	#matching betfair names (dict_betfair -> dict_home_name_matching, dict_away_name_matching) with historical data (dict_historical_data -> all_teams)
	dict_home_name_matching[league][['teams_matched', 'score']] = dict_home_name_matching[league]['home_team'].apply(lambda x:process.extractOne(x, all_teams, scorer=fuzz.token_set_ratio)).apply(pd.Series)
	dict_away_name_matching[league][['teams_matched', 'score']] = dict_away_name_matching[league]['away_team'].apply(lambda x:process.extractOne(x, all_teams, scorer=fuzz.token_set_ratio)).apply(pd.Series)
	#Replacing "Historical Data" team names (teams_matched) in betfair dataframes
	home_teams = pd.merge(dict_betfair[league], dict_home_name_matching[league], on='home_team',
	how='left')[['Dates', 'over2.5', 'btts', 'teams

thepycoach / valuebet_system.py

Last active February 11, 2021 19:03

	#df_historical_data: 5 years for h2h analysis (it doesn't have 'HST', 'AST', 'HC', 'AC') Only recent seasons have all columns available
	df_historical_data = pd.concat(dict_historical_data, ignore_index=True)
	df_historical_data = df_historical_data[['date', 'home_team', 'away_team', 'home_goals', 'away_goals', 'season']]

	#df_profile: 2 years (current + last season) for real odds and stats calculation (it has 'HST', 'AST', 'HC', 'AC')
	#You can add or omit seasons
	seasons = ['19', '20']

	df_profile = pd.concat(dict_historical_data, ignore_index=True)
	df_profile = df_profile[df_profile['season'].isin(seasons)]

thepycoach / valuebet_system.py

Last active February 11, 2021 19:06

	#Betpractice method for average goals
	df_home = df_profile.groupby('home_team').mean().round(2)
	df_home['home_goals_betpractice'] = df_home['home_goals'] + df_home['away_goals']
	df_away = df_profile.groupby('away_team').mean().round(2)
	df_away['away_goals_betpractice'] = df_away['home_goals'] + df_away['away_goals']

	df_stats = pd.concat([
	df_home[['home_goals_betpractice', 'home_shots_target', 'home_corners']],
	df_away[['away_goals_betpractice', 'away_shots_target', 'away_corners']],
	], axis=1)

thepycoach / valuebet_system.py

Last active February 11, 2021 19:09

	#inputs
	match_date = '2021-02-14'
	dict_filter = {'goals':2.5, 'shots_target':9, 'corners':9}
	#-----------------------------------
	#concatenate all leagues in dict_betfair
	df_betfair = pd.concat(dict_betfair, ignore_index=True)
	df_betfair = df_betfair[df_betfair['Dates']==match_date]
	#split home and away teams
	home = df_betfair['home_team'].to_list()
	away = df_betfair['away_team'].to_list()

thepycoach / valuebet_system.py

Last active February 11, 2021 19:18

	#inputs
	dict_filter_real_odds = {'real_odds_over':2, 'real_odds_btts':2} #at least 50% of chances
	#-------
	#simplifying formula
	calculate_over = lambda x, y : calculate_real_odds(x, y, df_profile, 'Over/Under')
	calculate_btts = lambda x, y : calculate_real_odds(x, y, df_profile, 'BTTS')
	#aplying formula to calculate reald odds
	df_filters['real_odds_over'] = df_filters.apply(lambda x:calculate_over(x['home_team'], x['away_team']), axis=1)
	df_filters['real_odds_btts'] = df_filters.apply(lambda x:calculate_btts(x['home_team'], x['away_team']), axis=1)
	#filtering teams out based on reald odds

thepycoach / valuebet_system.py

Last active February 11, 2021 19:15

	def calculate_real_odds(home_team, away_team, df_profile, market):
	"""Calculates the reals odds in a match with data based on selected seasons (df_profile)"""
	df_profile = df_profile.assign(total_goals=df_profile['home_goals'] + df_profile['away_goals'])
	df_profile['Over/Under'] = np.where(df_profile['total_goals']>2, 'Over 2.5', 'Under 2.5')
	df_profile['BTTS'] = np.where((df_profile['home_goals']>0) & (df_profile['away_goals']>0), True, False)
	dict_markets = {'BTTS':[True, False], 'Over/Under':['Over 2.5', 'Under 2.5']}
	df_real_odds = df_profile[(df_profile['home_team']==home_team)\|(df_profile['away_team']==away_team)].groupby(market).count()[['total_goals']]
	option1 = df_real_odds.loc[dict_markets[market][0], 'total_goals']#over2.5
	option2 = df_real_odds.loc[dict_markets[market][1], 'total_goals']#under2.5
	percentage_odds_over = option1 / (option1 + option2)

thepycoach / valuebet_system.py

Last active February 11, 2021 19:20

	#picking columns
	df_system = df_filters[['home_team', 'away_team', 'goals', 'shorts_target', 'corners',
	'real_odds_over', 'over2.5', 'real_odds_btts', 'btts']]
	#renaming columns
	df_system.rename(columns={'shorts_target':'SoT', 'corners':'C', 'goals':'G', 'over2.5':'OVER',
	'btts':'BTTS', 'real_odds_over':'RO_OVER', 'real_odds_btts':'RO_BTTS'}, inplace=True)
	#extracting over2.5 and btts=True values
	df_system = df_system.assign(OVER = df_system['OVER'].str.extract(r'(.+)\n.+').astype(float))
	df_system = df_system.assign(BTTS = df_system['BTTS'].str.extract(r'(.+)\n.+').astype(float))
	#calculating value of bets

thepycoach / data_cleaning.py

Created March 5, 2021 23:25

	# % of rows missing in each column
	for column in df_netflix_2019.columns:
	percentage = df_netflix_2019[column].isnull().mean()
	print(f'{column}: {round(percentage*100, 2)}%')

thepycoach / data_cleaning.py

Last active March 6, 2021 13:49

	#drop column
	df_netflix_2019.drop('director', axis=1)

Frank thepycoach