Last active
February 11, 2021 18:55
-
-
Save thepycoach/10ea0bf1b5f39e7c8bb845cfb97db47a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #we create a dictionary dict_countries similar to the one we used in the scraping section | |
| #but in this case we need to specify the names considered in the link to download the csv e.g. "SP1" (Spanish League) | |
| dict_countries = { | |
| 'Spanish La Liga':'SP1', 'Spanish Segunda Division':'SP2', | |
| 'German Bundesliga':'D1', | |
| 'German Bundesliga 2':'D2', | |
| 'Italian Serie A':'I1', | |
| 'Italian Serie B':'I2', | |
| 'English Premier League':'E0', 'English League 1':'E2', 'English League 2':'E3', | |
| 'French Ligue 1': 'F1', 'French Ligue 2':'F2', | |
| 'Dutch Eredivisie':'N1', | |
| 'Belgian First Division A':'B1', | |
| 'Portuguese Primeira Liga':'P1', | |
| 'Turkish Super League':'T1', | |
| 'Greek Super League':'G1', | |
| } | |
| #dict_historical_data contains data of the past 5 years. we'll use it to manage 2 dataframes: df_historical_data and df_profile | |
| dict_historical_data = {} | |
| #to download all the leagues we loop through the dictionary | |
| for league in dict_countries: | |
| frames = [] | |
| for i in range(15, 21): | |
| try: | |
| df = pd.read_csv("http://www.football-data.co.uk/mmz4281/"+str(i)+str(i+1)+"/"+dict_countries[league]+".csv") | |
| except: #Italian Serie B (0xa0 utf-8) | |
| df = pd.read_csv("http://www.football-data.co.uk/mmz4281/"+str(i)+str(i+1)+"/"+dict_countries[league]+".csv", encoding='unicode_escape') | |
| df = df.assign(season=i) | |
| frames.append(df) | |
| df_frames = pd.concat(frames) | |
| df_frames = df_frames.rename(columns={'Date':'date', 'HomeTeam':'home_team', 'AwayTeam':'away_team', | |
| 'FTHG': 'home_goals', 'FTAG': 'away_goals'}) | |
| dict_historical_data[league] = df_frames |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment