Skip to content

Instantly share code, notes, and snippets.

@chrishanretty
Created July 2, 2013 07:39
Show Gist options
  • Select an option

  • Save chrishanretty/5907417 to your computer and use it in GitHub Desktop.

Select an option

Save chrishanretty/5907417 to your computer and use it in GitHub Desktop.
ATP rankings scraper
import re
import codecs
import urllib.request
from bs4 import BeautifulSoup
from bs4 import SoupStrainer
start = 'http://www.atpworldtour.com/Rankings/Singles.aspx'
outfile = codecs.open('atp_ranks3.csv', 'w','utf-8')
for strata in range(1,1501,100):
start = 'http://www.atpworldtour.com/Rankings/Singles.aspx?d=24.06.2013&r=' + str(strata) + '&c=#'
print (start)
soup = BeautifulSoup(urllib.request.urlopen(start),parse_only = SoupStrainer('table',{'class':'bioTableAlt stripeMe'}))
for therow in soup.find_all('tr')[1:]:
for thecell in therow.find_all('td'):
if (thecell.has_key('class') and (thecell['class'][0] == 'first')):
rank = thecell.find('span')
rank = rank.get_text()
name = thecell.find('a')
name = name.get_text()
nationality = thecell.get_text()
nationality = re.search('\([A-Z]{3}\)',nationality)
nationality = nationality.group()
outfile.write(rank + '\t' + name + '\t' + nationality + '\t')
else:
cellcontents = thecell.get_text().strip()
cellcontents = re.sub('\n',' ',cellcontents)
outfile.write(cellcontents + '\t')
outfile.write('\n')
outfile.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment