thepycoach’s gists

thepycoach / scraping_bookie_live.py

Last active April 22, 2021 22:57

	#changing chromedriver default options
	options = Options()
	options.headless = True
	options.add_argument('window-size=1920x1080') #Headless = True

	web = 'https://sports.tipico.de/en/live/soccer'
	path = '/Users/.../chromedriver' #introduce your file's path inside '...'

	#execute chromedriver with edited options
	driver = webdriver.Chrome(path, options=options)

thepycoach / scraping_bookie_live.py

Last active April 22, 2021 22:59

	#Make ChromeDriver click a button
	#option 1
	accept = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="_evidon-accept-button"]')))
	#option 2
	# time.sleep(2)
	# accept = driver.find_element_by_xpath('//*[@id="_evidon-accept-button"]')

	#Initialize your storage
	teams = []
	x12 = []

thepycoach / scraping_bookie_live.py

Last active April 22, 2021 23:33

	#update 2
	#Looking for live events 'Program_LIVE'
	box = driver.find_element_by_xpath('//div[contains(@testid, "Program_UPCOMING")]') #updated
	#Looking for 'sports titles'
	sport_title = box.find_element_by_class_name('SportTitle-styles-sport') #updated

thepycoach / scraping_bookie_live.py

Last active April 22, 2021 23:23

	# update 3 (commented code not necesssary anymore)
	# for sport in sport_title:
	# selecting only football
	# if sport.text == 'Football':
	parent = sport_title.find_element_by_xpath('./..') #immediate parent node
	# update 4 (+3 times .find_element_by_xpath('./..'))
	grandparent = parent.find_element_by_xpath('./..').find_element_by_xpath('./..').find_element_by_xpath('./..').find_element_by_xpath('./..')
	#3. empty groups
	try:
	empty_groups = grandparent.find_elements_by_class_name('EventOddGroup-styles-empty-group')

thepycoach / scraping_bookie_live.py

Last active April 22, 2021 23:05

	#Looking for single row events
	single_row_events = grandparent.find_elements_by_class_name('EventRow-styles-event-row')
	#4 Remove empty events from single_row_events
	try:
	empty_events
	single_row_events = [single_row_event for single_row_event in single_row_events if single_row_event not in empty_events]
	except:
	pass

thepycoach / scraping_bookie_live.py

Last active April 22, 2021 23:05

	#Getting data
	for match in single_row_events:
	#'odd_events'
	odds_event = match.find_elements_by_class_name('EventOddGroup-styles-odd-groups')
	odds_events.append(odds_event)
	# Team names
	for team in match.find_elements_by_class_name('EventTeams-styles-titles'):
	teams.append(team.text)
	#Getting data: the odds
	for odds_event in odds_events:

thepycoach / scraping_bookie_live.py

Last active December 6, 2020 02:29

	import pandas as pd
	import pickle

	#7 #unlimited columns
	pd.set_option('display.max_rows', 500)
	pd.set_option('display.max_columns', 500)
	pd.set_option('display.width', 1000)

	#Storing lists within dictionary
	dict_gambling = {'Teams': teams, 'btts': btts,

thepycoach / scraping_bookie_live.py

Last active November 23, 2021 03:13

	#import libraries
	from selenium import webdriver
	from selenium.webdriver.chrome.options import Options
	from selenium.webdriver.support.ui import Select
	from selenium.webdriver.common.by import By
	from selenium.webdriver.support.ui import WebDriverWait
	from selenium.webdriver.support import expected_conditions as EC
	import time

thepycoach / find_surebet_bookie1_bookie2.py

Last active December 5, 2020 22:11

	#1.#transforming data bookie 1,2 and 3
	df_tipico = pickle.load(open('df_tipico','rb'))
	df_tipico = df_tipico[['Teams', 'btts']]
	df_tipico = df_tipico.replace(r'', '0\n0', regex=True)#odds with no values
	df_tipico = df_tipico.replace(r'^\d+\.\d+$', '0\n0', regex=True)#odds with only one element

	df_bwin = pickle.load(open('df_bwin','rb'))
	df_bwin = df_bwin[['Teams', 'btts']]
	df_bwin = df_bwin.replace(r'', '0\n0', regex=True)
	df_bwin = df_bwin.replace(r'^\d+\.\d+$', '0\n0', regex=True)

thepycoach / find_surebet_bookie1_bookie2.py

Last active December 5, 2020 22:13

	#2.String matching
	teams_1 = df_tipico['Teams'].tolist()
	teams_2 = df_bwin['Teams'].tolist()
	teams_3 = df_betfair['Teams'].tolist()

	#team names and scores matched
	df_tipico[['Teams_matched_bwin', 'Score_bwin']] = df_tipico['Teams'].apply(lambda x:process.extractOne(x, teams_2, scorer=fuzz.token_set_ratio)).apply(pd.Series)
	df_tipico[['Teams_matched_betfair', 'Score_betfair']] = df_tipico['Teams'].apply(lambda x:process.extractOne(x, teams_3, scorer=fuzz.token_set_ratio)).apply(pd.Series)
	df_bwin[['Teams_matched_betfair', 'Score_betfair']] = df_bwin['Teams'].apply(lambda x:process.extractOne(x, teams_3, scorer=fuzz.token_set_ratio)).apply(pd.Series)

Frank thepycoach