Skip to content

Instantly share code, notes, and snippets.

@ejmurray
Created June 19, 2015 12:47
Show Gist options
  • Save ejmurray/9f6cbde4584cd1272932 to your computer and use it in GitHub Desktop.
Save ejmurray/9f6cbde4584cd1272932 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
__author__ = 'Ernest'
# scrape http://www.bbc.co.uk/sport/football/fixtures for up and coming matches.
from bs4 import BeautifulSoup
from urllib2 import urlopen
BASE_URL = "http://www.bbc.co.uk/sport/football/fixtures"
def get_category_links(section_url):
html = urlopen(section_url).read()
soup = BeautifulSoup(html, "lxml")
fixtures = soup.find("div", "fixtures-table full-table-medium")
category_links = [BASE_URL + dd.a["href"] for dd in fixtures.findAll("dd")]
return category_links
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment