Created
February 3, 2016 14:16
-
-
Save ejmurray/b537c7c4d063fa235568 to your computer and use it in GitHub Desktop.
scrape the BBC sport website.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
# author: Ernest | |
# created: 03/02/2016 | |
# http://goo.gl/pXfFe1 | |
""" | |
Description | |
Return the scores and table for a given league using BS4. Change the url to get the given league. | |
premier-league | |
championship | |
spanish-la-liga | |
""" | |
from bs4 import BeautifulSoup | |
from urllib.request import urlopen | |
import csv | |
url = 'http://www.bbc.com/sport/football/premier-league/results' | |
page = urlopen(url).read() | |
soup = BeautifulSoup(page, "html.parser") | |
def has_class_but_no_id(tag): | |
return tag.has_attr('score') | |
writer = csv.writer(open("webScraper.csv", "w")) | |
for match in soup.find_all('td', class_='match-details'): | |
home_tag = match.find('span', class_='team-home') | |
home = home_tag and ''.join(home_tag.stripped_strings) | |
score_tag = match.find('span', class_='score') | |
score = score_tag and ''.join(score_tag.stripped_strings) | |
away_tag = match.find('span', class_='team-away') | |
away = away_tag and ''.join(away_tag.stripped_strings) | |
if home and score and away: | |
print(home, score, away) | |
url2 = "http://www.bbc.com/sport/football/spanish-la-liga/table" | |
soup = BeautifulSoup(urlopen(url2).read(), "html.parser") | |
for row in soup("table", {"class": "table-stats"})[0].tbody("tr"): | |
tds = row("td") | |
print(tds[1].contents[2].string, tds[2].string) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment