Created
March 1, 2009 15:17
-
-
Save sykloid/72351 to your computer and use it in GitHub Desktop.
A screenscraper for the australian open scores page
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
# A Screenscraper for the Australian Open scores page. | |
# P.C. Shyamshankar | |
# January 2009 | |
# | |
# Can handle ongoing as well as completed matches, but you'll have to change the | |
# urls yourself. | |
# | |
# Note : Match numbers do not stay fixed! As matches finish and are pulled out | |
# of the list, they affect all below them (-1) and their own, of course. | |
# | |
# Important matches are _usually_ at the top, and stay there for sometime, so | |
# they should work out in most cases. | |
from BeautifulSoup import BeautifulSoup | |
from urllib import urlopen | |
from sys import argv | |
try : | |
match_num = int(argv[1]) | |
except : | |
match_num = -1 | |
# Matches in progress | |
url = urlopen('http://www.australianopen.com/en_AU/scores/index2.html') | |
# Completed matches | |
# url = urlopen('http://www.australianopen.com/en_AU/scores/cmatch/12ms.html') | |
soup = BeautifulSoup(url) | |
tables = soup.findAll('table', attrs = {'summary' : True}) | |
for index, table in enumerate(tables) : | |
if match_num == -1 : | |
pass | |
elif match_num == index : | |
pass | |
else : | |
continue | |
cells = [i.contents for i in table.findAll('td', attrs = {'valign' : True})] | |
players = [i.contents[0] for i in table.findAll('a')] | |
title = cells[0][0].find('td', attrs = {'class' : 'medBold'}).contents[0] | |
title = title.replace(u' ', ' ') | |
img = table.findAll('img')[0] | |
if img.get('alt') == u'Winner' : | |
status = 'completed' | |
print "Match (%d) completed : %s" % (index, title) | |
template = "%40s %1s %4s %4s %4s %4s %4s" | |
print template % ("Player", "W", "S1", "S2", "S3", "S4", "S5") | |
else : | |
status = 'progress' | |
print "Match (%d) in progress : %s" % (index, title) | |
template = "%40s %1s %4s %4s %4s %4s %4s %4s" | |
print template % ("Player", "S", "G", "S1", "S2", "S3", "S4", "S5") | |
if cells[2] : | |
serving = 0 | |
else : | |
serving = 1 | |
if u'Doubles' in title : | |
if status == 'completed' : | |
players[serving * 2] = players[serving * 2].contents[0] | |
players[serving * 2 + 1] = players[serving * 2 + 1].contents[0] | |
players[0] = players[0] + '/' + players[1] | |
players[1] = players[2] + '/' + players[3] | |
else : | |
players = [] | |
players.append( | |
[cells[1][1].contents[0], cells[1][2].replace(u' ', u' ')] | |
) | |
players.append( | |
[cells[10][1].contents[0], cells[10][2].replace(u' ', u' ')] | |
) | |
if status == 'completed' : | |
players[serving] = players[serving][1] + ' ' + players[serving][0].contents[0] | |
players[1 - serving] = players[1 - serving][1] + ' ' + players[1 - serving][0] | |
else : | |
for i in 0, 1 : | |
players[i] = players[i][1] + ' ' + players[i][0] | |
player0 = [] | |
player1 = [] | |
if status == 'progress' : | |
if serving == 0 : | |
player0.append(cells[3][1].contents[0]) | |
player1.append(cells[9 + 3][0]) | |
elif serving == 1 : | |
player0.append(cells[3][0]) | |
player1.append(cells[9 + 3][1].contents[0]) | |
for i in range(4, 9) : | |
try : | |
player0.append( | |
cells[i][0] + | |
('(' + cells[i][1].contents[0] + ')' | |
if cells[i][1].contents else '')) | |
player1.append( | |
cells[9 + i][0] + | |
('(' + cells[9 + i][1].contents[0] + ')' | |
if cells[9 + i][1].contents else '')) | |
except : | |
break | |
player_0_string = template % ( | |
(players[0],) + | |
(("*" if serving == 0 else " "),) + | |
tuple(player0) + | |
tuple(["-"] * ((5 if status =='completed' else 6) - len(player0))) | |
) | |
player_1_string = template % ( | |
(players[1],) + | |
(("*" if serving == 1 else " "),) + | |
tuple(player1) + | |
tuple(["-"] * ((5 if status == 'completed' else 6) - len(player1))) | |
) | |
player_0_string = player_0_string.replace(u' ', u'%4s' % u'-') | |
player_1_string = player_1_string.replace(u' ', u'%4s' % u'-') | |
print player_0_string | |
print player_1_string | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment