This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #import libraries | |
| #libraries subection 1 | |
| from selenium import webdriver | |
| from selenium.webdriver.chrome.options import Options | |
| from selenium.webdriver.common.by import By | |
| from selenium.webdriver.support.ui import WebDriverWait | |
| from selenium.webdriver.support import expected_conditions as EC | |
| import time | |
| #libraries subsection 2 | |
| import pandas as pd |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #Initialize your storage | |
| dict_frames = {} | |
| #choosing the main european leagues | |
| dict_countries = { | |
| 'german football': ['German Bundesliga', 'German Bundesliga 2'], | |
| 'italian football': ['Italian Serie A', 'Italian Serie B'], | |
| 'spanish football': ['Spanish La Liga', 'Spanish Segunda Division'], | |
| 'english football': ['English Premier League', 'English League 1', 'English League 2'], | |
| 'french football': ['French Ligue 1', 'French Ligue 2'], | |
| 'dutch football': ['Dutch Eredivisie'], |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #set website language to English | |
| language_box = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.CLASS_NAME, 'ssc-hlsw'))) | |
| #select dropdown and then value (EN) from dropdown | |
| WebDriverWait(language_box, 5).until(EC.element_to_be_clickable((By.CLASS_NAME, 'ssc-hls'))).click() | |
| WebDriverWait(language_box, 5).until(EC.element_to_be_clickable((By.CLASS_NAME, 'ssc-en_GB'))).click() | |
| #we need to make selenium wait for the website to load after switching languages ---How?---> wait for some element to be loaded in English e.g. "Over/Under 2.5 Goals" text | |
| WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//span[contains(text(), "Over/Under 2.5 Goals")]'))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #Scraping different competitions across the leagues within the dictionary | |
| header = driver.find_element_by_class_name('updated-competitions') | |
| competition = WebDriverWait(header, 5).until(EC.element_to_be_clickable((By.XPATH, './/a[contains(@title, "COMPETITIONS")]'))) | |
| competition.click() | |
| competitions_table = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.ID, 'mod-multipickazmenu-1061-container'))) | |
| country_button = WebDriverWait(competitions_table, 5).until(EC.element_to_be_clickable((By.XPATH, './/div[contains(@data-category,' +'"' + country + '"' + ')]'))) | |
| country_button.click() | |
| league_button = WebDriverWait(competitions_table, 5).until(EC.element_to_be_clickable((By.XPATH, './/a[contains(@data-galabel,' +'"' + dict_countries[country][league] + '"' + ')]'))) | |
| league_button.click() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #Choose your betting market and initialize store | |
| markets = ['Over/Under 2.5 Goals', 'Both teams to Score?'] | |
| dict_odds = {} | |
| #scraping the betting markets we chose | |
| for i, market in enumerate(markets): | |
| dropdown = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CLASS_NAME, 'marketchooser-container'))) | |
| dropdown.click() | |
| chooser = WebDriverWait(dropdown, 5).until(EC.element_to_be_clickable((By.XPATH, '//*[contains(text(),'+'"'+str(market)+'"'+')]'))) | |
| chooser.click() | |
| #initialize storagei of data to be scraped |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #making dataframes for each league scraped (inside the loop) | |
| df_over_under = pd.DataFrame({'Dates':dict_odds['dates_0'], 'Teams':dict_odds['teams_0'], 'over2.5':dict_odds['odds_0']}).set_index(['Teams', 'Dates']) | |
| df_btts = pd.DataFrame({'Dates':dict_odds['dates_1'], 'Teams':dict_odds['teams_1'], 'btts':dict_odds['odds_1']}).set_index(['Teams', 'Dates']) | |
| #concatenating the dataframes previously created for each betting market | |
| df_betfair = pd.concat([df_over_under, df_btts], axis=1, sort=True) | |
| df_betfair.reset_index(inplace=True) | |
| df_betfair.rename(columns={'index':'Teams'}, inplace=True) | |
| #transforming data | |
| df_betfair = df_betfair.fillna('') | |
| df_betfair = df_betfair.replace('SUSPENDED\n', '', regex=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #loop through the dictionary (we're going to open a chrome window for every element of the dictionary) | |
| for country in dict_countries: | |
| for league in range(0, len(dict_countries[country])): | |
| #execute chromedriver with edited options | |
| driver = webdriver.Chrome(path, options=options) | |
| driver.get(web) | |
| # driver.maximize_window() #when Headless = False | |
| # option1 | |
| # accept = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="onetrust-accept-btn-handler"]'))) | |
| # option 2 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #using time library to replace words "In-Play", "Today" and "Tomorrow" for numeric date | |
| today = datetime.date.today() | |
| tomorrow = datetime.date.today() + datetime.timedelta(days=1) | |
| year = today.strftime("%Y") | |
| df_betfair['Dates'] = df_betfair['Dates'].apply(lambda x: re.sub('In-Play', today.strftime("%A, %d %B"), x)) | |
| df_betfair['Dates'] = df_betfair['Dates'].apply(lambda x: re.sub('Today', today.strftime("%A, %d %B"), x)) | |
| df_betfair['Dates'] = df_betfair['Dates'].apply(lambda x: re.sub('Tomorrow', tomorrow.strftime("%A, %d %B"), x)) | |
| df_betfair['Dates'] = df_betfair['Dates'].apply(lambda x: x.split(',')[1].strip()) | |
| df_betfair['Dates'] = df_betfair['Dates'].apply(lambda x: datetime.datetime.strptime(year + ' ' + x, '%Y %d %B')) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #import libraries | |
| import pandas as pd | |
| import numpy as np | |
| from fuzzywuzzy import process, fuzz | |
| import pickle | |
| import re | |
| import datetime |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #pickle data scraped in section 1 | |
| dict_betfair = pickle.load(open('dict_betfair', 'rb')) | |
| #initialize storage (we'll use these dictionaries to match names between betfair and historical_data) | |
| dict_home_name_matching = {} | |
| dict_away_name_matching = {} | |
| #fill the dictionary with a list of names of all home and away teams that will play during the week | |
| for league in dict_betfair: | |
| dict_betfair[league][['home_team', 'away_team']] = dict_betfair[league]['Teams'].str.extract(r'(.+)\n(.+)') | |
| dict_home_name_matching[league] = dict_betfair[league].groupby('home_team', as_index=False).count()[['home_team']] | |
| dict_away_name_matching[league] = dict_betfair[league].groupby('away_team', as_index=False).count()[['away_team']] |