Skip to content

Instantly share code, notes, and snippets.

#import libraries
#libraries subection 1
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
#libraries subsection 2
import pandas as pd
#Initialize your storage
dict_frames = {}
#choosing the main european leagues
dict_countries = {
'german football': ['German Bundesliga', 'German Bundesliga 2'],
'italian football': ['Italian Serie A', 'Italian Serie B'],
'spanish football': ['Spanish La Liga', 'Spanish Segunda Division'],
'english football': ['English Premier League', 'English League 1', 'English League 2'],
'french football': ['French Ligue 1', 'French Ligue 2'],
'dutch football': ['Dutch Eredivisie'],
#set website language to English
language_box = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.CLASS_NAME, 'ssc-hlsw')))
#select dropdown and then value (EN) from dropdown
WebDriverWait(language_box, 5).until(EC.element_to_be_clickable((By.CLASS_NAME, 'ssc-hls'))).click()
WebDriverWait(language_box, 5).until(EC.element_to_be_clickable((By.CLASS_NAME, 'ssc-en_GB'))).click()
#we need to make selenium wait for the website to load after switching languages ---How?---> wait for some element to be loaded in English e.g. "Over/Under 2.5 Goals" text
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//span[contains(text(), "Over/Under 2.5 Goals")]')))
#Scraping different competitions across the leagues within the dictionary
header = driver.find_element_by_class_name('updated-competitions')
competition = WebDriverWait(header, 5).until(EC.element_to_be_clickable((By.XPATH, './/a[contains(@title, "COMPETITIONS")]')))
competition.click()
competitions_table = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.ID, 'mod-multipickazmenu-1061-container')))
country_button = WebDriverWait(competitions_table, 5).until(EC.element_to_be_clickable((By.XPATH, './/div[contains(@data-category,' +'"' + country + '"' + ')]')))
country_button.click()
league_button = WebDriverWait(competitions_table, 5).until(EC.element_to_be_clickable((By.XPATH, './/a[contains(@data-galabel,' +'"' + dict_countries[country][league] + '"' + ')]')))
league_button.click()
#Choose your betting market and initialize store
markets = ['Over/Under 2.5 Goals', 'Both teams to Score?']
dict_odds = {}
#scraping the betting markets we chose
for i, market in enumerate(markets):
dropdown = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CLASS_NAME, 'marketchooser-container')))
dropdown.click()
chooser = WebDriverWait(dropdown, 5).until(EC.element_to_be_clickable((By.XPATH, '//*[contains(text(),'+'"'+str(market)+'"'+')]')))
chooser.click()
#initialize storagei of data to be scraped
#making dataframes for each league scraped (inside the loop)
df_over_under = pd.DataFrame({'Dates':dict_odds['dates_0'], 'Teams':dict_odds['teams_0'], 'over2.5':dict_odds['odds_0']}).set_index(['Teams', 'Dates'])
df_btts = pd.DataFrame({'Dates':dict_odds['dates_1'], 'Teams':dict_odds['teams_1'], 'btts':dict_odds['odds_1']}).set_index(['Teams', 'Dates'])
#concatenating the dataframes previously created for each betting market
df_betfair = pd.concat([df_over_under, df_btts], axis=1, sort=True)
df_betfair.reset_index(inplace=True)
df_betfair.rename(columns={'index':'Teams'}, inplace=True)
#transforming data
df_betfair = df_betfair.fillna('')
df_betfair = df_betfair.replace('SUSPENDED\n', '', regex=True)
#loop through the dictionary (we're going to open a chrome window for every element of the dictionary)
for country in dict_countries:
for league in range(0, len(dict_countries[country])):
#execute chromedriver with edited options
driver = webdriver.Chrome(path, options=options)
driver.get(web)
# driver.maximize_window() #when Headless = False
# option1
# accept = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="onetrust-accept-btn-handler"]')))
# option 2
#using time library to replace words "In-Play", "Today" and "Tomorrow" for numeric date
today = datetime.date.today()
tomorrow = datetime.date.today() + datetime.timedelta(days=1)
year = today.strftime("%Y")
df_betfair['Dates'] = df_betfair['Dates'].apply(lambda x: re.sub('In-Play', today.strftime("%A, %d %B"), x))
df_betfair['Dates'] = df_betfair['Dates'].apply(lambda x: re.sub('Today', today.strftime("%A, %d %B"), x))
df_betfair['Dates'] = df_betfair['Dates'].apply(lambda x: re.sub('Tomorrow', tomorrow.strftime("%A, %d %B"), x))
df_betfair['Dates'] = df_betfair['Dates'].apply(lambda x: x.split(',')[1].strip())
df_betfair['Dates'] = df_betfair['Dates'].apply(lambda x: datetime.datetime.strptime(year + ' ' + x, '%Y %d %B'))
#import libraries
import pandas as pd
import numpy as np
from fuzzywuzzy import process, fuzz
import pickle
import re
import datetime
#pickle data scraped in section 1
dict_betfair = pickle.load(open('dict_betfair', 'rb'))
#initialize storage (we'll use these dictionaries to match names between betfair and historical_data)
dict_home_name_matching = {}
dict_away_name_matching = {}
#fill the dictionary with a list of names of all home and away teams that will play during the week
for league in dict_betfair:
dict_betfair[league][['home_team', 'away_team']] = dict_betfair[league]['Teams'].str.extract(r'(.+)\n(.+)')
dict_home_name_matching[league] = dict_betfair[league].groupby('home_team', as_index=False).count()[['home_team']]
dict_away_name_matching[league] = dict_betfair[league].groupby('away_team', as_index=False).count()[['away_team']]