-
-
Save robwiss/6902679 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"from bs4 import BeautifulSoup\n", | |
"from urllib2 import urlopen\n", | |
"\n", | |
"soup = BeautifulSoup(urlopen('http://games.espn.go.com/ffl/leaguerosters?leagueId=280610'))\n", | |
"\n", | |
"# interested in tables of player data\n", | |
"# Available data for team is: team name, team record\n", | |
"# Available data per player is: roster pos, name, NFL team, field pos, status,\n", | |
"# Draft/Free Agent\n", | |
"# (Defenses have no NFL team field)\n", | |
"#\n", | |
"# Format of data structure to hold scraped data:\n", | |
"# dictionary where keys are team names, values are lists representing rosters\n", | |
"# roster lists of the form [ROSTER POS, NAME, NFL TEAM (can be ''), FIELD POS,\n", | |
"# STATUS, DRAFTED]\n", | |
"# Second dictionary to hold records:\n", | |
"# dictionary where keys are team name, values are tuples of win/loss\n", | |
"\n", | |
"rosters = {}\n", | |
"records = {}\n", | |
"\n", | |
"# each FF player's roster is stored in a table tag with a class of\n", | |
"# 'playerTableTable'\n", | |
"# The table is further broken down into <tr>'s with a few different classes:\n", | |
"# - 'playerTableBgRowHead' : contains the FF team name and record\n", | |
"# - 'playerTableBgRowSubhead' : contains the column headers\n", | |
"# - 'pncPlayerRow' : contains the roster entries\n", | |
"for playerTable in soup.findAll('table', attrs={ 'class' : 'playerTableTable'}):\n", | |
" team_row = playerTable.find('tr', attrs={ 'class' : 'playerTableBgRowHead' })\n", | |
" team_name = team_row.a.text\n", | |
" wins, losses = [\n", | |
" int(x)\n", | |
" for x in team_row.th.text[len(team_row.a.text)+1:][1:-1].split('-')\n", | |
" ]\n", | |
" rosters[team_name] = []\n", | |
" records[team_name] = (wins, losses)\n", | |
" for entry in playerTable.findAll('tr', attrs={ 'class' : 'pncPlayerRow' }):\n", | |
" position = entry.find('td', attrs={'class' : 'playerSlot'}).text\n", | |
" drafted = entry.find('td', attrs={'style' : 'null'}).text == 'Draft'\n", | |
" # get the playertablePlayerName element\n", | |
" player_elem = entry.find('td', attrs = { 'class' : 'playertablePlayerName' })\n", | |
" text = player_elem.text\n", | |
" # unpack name and team name\n", | |
" player_name = player_elem.a.text\n", | |
" # unpack status (will be in a span)\n", | |
" status = ''\n", | |
" if player_elem.span is not None:\n", | |
" status = player_elem.span.text\n", | |
" # strip the player name and status from the text\n", | |
" text = text.rstrip(u'\\xa0{}'.format(status))\n", | |
" text = text.lstrip(player_name).lstrip('*').lstrip(', ')\n", | |
"\n", | |
" # all that is left is nbsp separated team name and field position\n", | |
" if len(text.split(u'\\xa0')) > 1:\n", | |
" nfl_team_name, field_pos = text.split(u'\\xa0')\n", | |
" else:\n", | |
" field_pos = text\n", | |
"\n", | |
" # append the entry to the roster\n", | |
" rosters[team_name].append((position, player_name, nfl_team_name, field_pos, status, drafted))\n", | |
"\n", | |
"for league_team in rosters.iterkeys():\n", | |
" print '{} {}'.format(league_team, records[league_team])\n", | |
" for entry in rosters[league_team]:\n", | |
" print ' {}'.format(entry)\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"FUCK JOKER ZOMBIES (2, 3)\n", | |
" (u'QB', u'Robert Griffin III', u'Wsh', u'QB', '', True)\n", | |
" (u'RB', u'Arian Foster', u'Hou', u'RB', '', True)\n", | |
" (u'RB', u'Ray Rice', u'Bal', u'RB', '', True)\n", | |
" (u'RB/WR', u'Marques Colston', u'NO', u'WR', '', True)\n", | |
" (u'WR', u'Demaryius Thomas', u'Den', u'WR', '', True)\n", | |
" (u'WR', u'Reggie Wayne', u'Ind', u'WR', '', True)\n", | |
" (u'TE', u'Owen Daniels', u'Hou', u'TE', u'O', True)\n", | |
" (u'D/ST', u'Texans D/ST', u'', u'D/ST', '', False)\n", | |
" (u'K', u'Randy Bullock', u'Hou', u'K', '', True)\n", | |
" (u'Bench', u'Antonio Brown', u'Pit', u'WR', '', True)\n", | |
" (u'Bench', u'Steve Johnson', u'Buf', u'WR', u'Q', True)\n", | |
" (u'Bench', u'Rashard Mendenhall', u'Ari', u'RB', '', True)\n", | |
" (u'Bench', u'Sam Bradford', u'StL', u'QB', '', True)\n", | |
" (u'Bench', u'Bilal Powell', u'NYJ', u'RB', '', False)\n", | |
" (u'Bench', u'Coby Fleener', u'Ind', u'TE', '', False)\n", | |
"TEAM HEIM (3, 2)\n", | |
" (u'QB', u'Drew Brees', u'NO', u'QB', '', True)\n", | |
" (u'RB', u'Jamaal Charles', u'KC', u'RB', u'P', True)\n", | |
" (u'RB', u'Danny Woodhead', u'SD', u'RB', '', True)\n", | |
" (u'RB/WR', u'Darren Sproles', u'NO', u'RB', '', True)\n", | |
" (u'WR', u'Julio Jones', u'Atl', u'WR', u'Q', True)\n", | |
" (u'WR', u'Torrey Smith', u'Bal', u'WR', '', True)\n", | |
" (u'TE', u'Jared Cook', u'StL', u'TE', '', True)\n", | |
" (u'D/ST', u'Bears D/ST', u'', u'D/ST', '', True)\n", | |
" (u'K', u'Dan Bailey', u'Dal', u'K', '', True)\n", | |
" (u'Bench', u'Vernon Davis', u'SF', u'TE', u'P', True)\n", | |
" (u'Bench', u'Chris Johnson', u'Ten', u'RB', '', True)\n", | |
" (u'Bench', u'Eli Manning', u'NYG', u'QB', '', True)\n", | |
" (u'Bench', u'Michael Floyd', u'Ari', u'WR', '', True)\n", | |
" (u'Bench', u'Michael Bush', u'Chi', u'RB', '', True)\n", | |
" (u'Bench', u'James Jones', u'GB', u'WR', '', False)\n", | |
"TITTY CITY TOBLERONES (3, 2)\n", | |
" (u'QB', u'Peyton Manning', u'Den', u'QB', '', True)\n", | |
" (u'RB', u'Frank Gore', u'SF', u'RB', '', True)\n", | |
" (u'RB', u'Bernard Pierce', u'Bal', u'RB', '', True)\n", | |
" (u'RB/WR', u'BenJarvus Green-Ellis', u'Cin', u'RB', '', False)\n", | |
" (u'WR', u'Randall Cobb', u'GB', u'WR', '', True)\n", | |
" (u'WR', u'Denarius Moore', u'Oak', u'WR', '', False)\n", | |
" (u'TE', u'Antonio Gates', u'SD', u'TE', '', False)\n", | |
" (u'D/ST', u'Rams D/ST', u'', u'D/ST', '', True)\n", | |
" (u'K', u'Blair Walsh', u'Min', u'K', '', True)\n", | |
" (u'Bench', u'Maurice Jones-Drew', u'Jac', u'RB', '', True)\n", | |
" (u'Bench', u'Steve Smith', u'Car', u'WR', '', True)\n", | |
" (u'Bench', u'Pierre Garcon', u'Wsh', u'WR', '', True)\n", | |
" (u'Bench', u'Jermichael Finley', u'GB', u'TE', '', True)\n", | |
" (u'Bench', u'Alex Smith', u'KC', u'QB', '', False)\n", | |
" (u'Bench', u'Cardinals D/ST', u'', u'D/ST', '', False)\n", | |
"JOKER ZOMBIES RULE (3, 2)\n", | |
" (u'QB', u'Tony Romo', u'Dal', u'QB', '', True)\n", | |
" (u'RB', u'Matt Forte', u'Chi', u'RB', '', True)\n", | |
" (u'RB', u'DeMarco Murray', u'Dal', u'RB', '', True)\n", | |
" (u'RB/WR', u'Reggie Bush', u'Det', u'RB', '', True)\n", | |
" (u'WR', u'Calvin Johnson', u'Det', u'WR', u'Q', True)\n", | |
" (u'WR', u'Josh Gordon', u'Cle', u'WR', '', True)\n", | |
" (u'TE', u'Jimmy Graham', u'NO', u'TE', '', True)\n", | |
" (u'D/ST', u'49ers D/ST', u'', u'D/ST', '', True)\n", | |
" (u'K', u'Phil Dawson', u'SF', u'K', '', True)\n", | |
" (u'Bench', u'Cam Newton', u'Car', u'QB', '', True)\n", | |
" (u'Bench', u'Darren McFadden', u'Oak', u'RB', u'Q', True)\n", | |
" (u'Bench', u'Cecil Shorts', u'Jac', u'WR', '', True)\n", | |
" (u'Bench', u'Jordan Cameron', u'Cle', u'TE', '', True)\n", | |
" (u'Bench', u'Giovani Bernard', u'Cin', u'RB', '', True)\n", | |
" (u'Bench', u'Philip Rivers', u'SD', u'QB', '', False)\n", | |
"DEEP INSIDE OF YOU (0, 5)\n", | |
" (u'QB', u'Jay Cutler', u'Chi', u'QB', '', True)\n", | |
" (u'RB', u'Marshawn Lynch', u'Sea', u'RB', '', True)\n", | |
" (u'RB', u\"Le'Veon Bell\", u'Pit', u'RB', '', False)\n", | |
" (u'RB/WR', u'Hakeem Nicks', u'NYG', u'WR', '', True)\n", | |
" (u'WR', u'A.J. Green', u'Cin', u'WR', '', True)\n", | |
" (u'WR', u'Alshon Jeffery', u'Chi', u'WR', u'Q', False)\n", | |
" (u'TE', u'Jordan Reed', u'Wsh', u'TE', u'P', False)\n", | |
" (u'D/ST', u'Browns D/ST', u'', u'D/ST', '', False)\n", | |
" (u'K', u'Alex Henery', u'Phi', u'K', '', False)\n", | |
" (u'Bench', u'Mike Wallace', u'Mia', u'WR', '', True)\n", | |
" (u'Bench', u'Daryl Richardson', u'StL', u'RB', '', True)\n", | |
" (u'Bench', u'Rueben Randle', u'NYG', u'WR', '', False)\n", | |
" (u'Bench', u'Matt Schaub', u'Hou', u'QB', '', False)\n", | |
" (u'Bench', u'Charles Clay', u'Mia', u'TE', '', False)\n", | |
" (u'Bench', u'Marlon Brown', u'Bal', u'WR', u'Q', False)\n", | |
"STUPID SEXY FLANDERS (1, 4)\n", | |
" (u'QB', u'Russell Wilson', u'Sea', u'QB', '', True)\n", | |
" (u'RB', u'Doug Martin', u'TB', u'RB', '', True)\n", | |
" (u'RB', u'Isaiah Pead', u'StL', u'RB', '', False)\n", | |
" (u'RB/WR', u'Golden Tate', u'Sea', u'WR', '', True)\n", | |
" (u'WR', u'Andre Johnson', u'Hou', u'WR', '', True)\n", | |
" (u'WR', u'Dwayne Bowe', u'KC', u'WR', '', True)\n", | |
" (u'TE', u'Jason Witten', u'Dal', u'TE', '', True)\n", | |
" (u'D/ST', u'Seahawks D/ST', u'', u'D/ST', '', True)\n", | |
" (u'K', u'Greg Zuerlein', u'StL', u'K', '', True)\n", | |
" (u'Bench', u'Colin Kaepernick', u'SF', u'QB', '', True)\n", | |
" (u'Bench', u'Percy Harvin', u'Sea', u'WR', u'O', True)\n", | |
" (u'Bench', u'Lamar Miller', u'Mia', u'RB', '', True)\n", | |
" (u'Bench', u'Ben Roethlisberger', u'Pit', u'QB', u'P', True)\n", | |
" (u'Bench', u'Montee Ball', u'Den', u'RB', '', True)\n", | |
" (u'Bench', u'Jermaine Gresham', u'Cin', u'TE', '', True)\n", | |
"BEGGING FOR A PEGGING (3, 2)\n", | |
" (u'QB', u'Aaron Rodgers', u'GB', u'QB', '', True)\n", | |
" (u'RB', u'Johnathan Franklin', u'GB', u'RB', '', False)\n", | |
" (u'RB', u'LeGarrette Blount', u'NE', u'RB', '', False)\n", | |
" (u'RB/WR', u'DeSean Jackson', u'Phi', u'WR', '', True)\n", | |
" (u'WR', u'Dez Bryant', u'Dal', u'WR', '', True)\n", | |
" (u'WR', u'Victor Cruz', u'NYG', u'WR', '', True)\n", | |
" (u'TE', u'Julius Thomas', u'Den', u'TE', '', True)\n", | |
" (u'D/ST', u'Colts D/ST', u'', u'D/ST', '', False)\n", | |
" (u'K', u'Steven Hauschka', u'Sea', u'K', '', False)\n", | |
" (u'Bench', u'Alfred Morris', u'Wsh', u'RB', u'Q', True)\n", | |
" (u'Bench', u'Tony Gonzalez', u'Atl', u'TE', '', True)\n", | |
" (u'Bench', u'Julian Edelman', u'NE', u'WR', '', False)\n", | |
" (u'Bench', u'Eddie Royal', u'SD', u'WR', '', False)\n", | |
" (u'Bench', u'Willis McGahee', u'Cle', u'RB', '', False)\n", | |
" (u'Bench', u'Zac Stacy', u'StL', u'RB', u'P', False)\n", | |
"SECTION V DEVIANTS (3, 2)\n", | |
" (u'QB', u'Matt Ryan', u'Atl', u'QB', '', True)\n", | |
" (u'RB', u'DeAngelo Williams', u'Car', u'RB', '', True)\n", | |
" (u'RB', u'Jacquizz Rodgers', u'Atl', u'RB', '', False)\n", | |
" (u'RB/WR', u'Anquan Boldin', u'SF', u'WR', '', True)\n", | |
" (u'WR', u'Wes Welker', u'Den', u'WR', '', True)\n", | |
" (u'WR', u'Danny Amendola', u'NE', u'WR', u'P', True)\n", | |
" (u'TE', u'Heath Miller', u'Pit', u'TE', '', False)\n", | |
" (u'D/ST', u'Bengals D/ST', u'', u'D/ST', '', True)\n", | |
" (u'K', u'Matt Bryant', u'Atl', u'K', '', True)\n", | |
" (u'Bench', u'Tom Brady', u'NE', u'QB', '', True)\n", | |
" (u'Bench', u'Steven Jackson', u'Atl', u'RB', u'O', True)\n", | |
" (u'Bench', u'Ted Ginn', u'Car', u'WR', '', True)\n", | |
" (u'Bench', u'Robert Meachem', u'NO', u'WR', '', False)\n", | |
" (u'Bench', u'Kenbrell Thompkins', u'NE', u'WR', '', False)\n", | |
" (u'Bench', u'Jason Snelling', u'Atl', u'RB', u'Q', False)\n", | |
"OJ'S SLASHERS AND DASHERS (5, 0)\n", | |
" (u'QB', u'Andrew Luck', u'Ind', u'QB', '', True)\n", | |
" (u'RB', u'Eddie Lacy', u'GB', u'RB', '', True)\n", | |
" (u'RB', u'Knowshon Moreno', u'Den', u'RB', '', True)\n", | |
" (u'RB/WR', u'Trent Richardson', u'Ind', u'RB', '', True)\n", | |
" (u'WR', u'Brandon Marshall', u'Chi', u'WR', '', True)\n", | |
" (u'WR', u'Jordy Nelson', u'GB', u'WR', '', True)\n", | |
" (u'TE', u'Greg Olsen', u'Car', u'TE', u'Q', True)\n", | |
" (u'D/ST', u'Broncos D/ST', u'', u'D/ST', '', True)\n", | |
" (u'K', u'Garrett Hartley', u'NO', u'K', '', True)\n", | |
" (u'Bench', u'Eric Decker', u'Den', u'WR', '', True)\n", | |
" (u'Bench', u'Ben Tate', u'Hou', u'RB', '', True)\n", | |
" (u'Bench', u'T.Y. Hilton', u'Ind', u'WR', '', True)\n", | |
" (u'Bench', u'Quinton Patton', u'SF', u'WR', u'O', True)\n", | |
" (u'Bench', u'Joique Bell', u'Det', u'RB', '', False)\n", | |
" (u'Bench', u'Scott Chandler', u'Buf', u'TE', '', False)\n", | |
"SIXT-NINE'IN AND PREGNANT (2, 3)\n", | |
" (u'QB', u'Matthew Stafford', u'Det', u'QB', '', True)\n", | |
" (u'RB', u'Adrian Peterson', u'Min', u'RB', '', True)\n", | |
" (u'RB', u'LeSean McCoy', u'Phi', u'RB', '', True)\n", | |
" (u'RB/WR', u'Fred Jackson', u'Buf', u'RB', '', True)\n", | |
" (u'WR', u'Vincent Jackson', u'TB', u'WR', '', True)\n", | |
" (u'WR', u'Terrance Williams', u'Dal', u'WR', '', False)\n", | |
" (u'TE', u'Rob Gronkowski', u'NE', u'TE', u'Q', True)\n", | |
" (u'D/ST', u'Chiefs D/ST', u'', u'D/ST', '', False)\n", | |
" (u'K', u'Matt Prater', u'Den', u'K', '', True)\n", | |
" (u'Bench', u'C.J. Spiller', u'Buf', u'RB', u'P', True)\n", | |
" (u'Bench', u'Larry Fitzgerald', u'Ari', u'WR', '', True)\n", | |
" (u'Bench', u'Stephen Gostkowski', u'NE', u'K', '', True)\n", | |
" (u'Bench', u'Michael Vick', u'Phi', u'QB', u'Q', True)\n", | |
" (u'Bench', u'Brian Hartline', u'Mia', u'WR', '', False)\n", | |
" (u'Bench', u'Martellus Bennett', u'Chi', u'TE', u'Q', False)\n" | |
] | |
} | |
], | |
"prompt_number": 9 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
from urllib2 import urlopen | |
soup = BeautifulSoup(urlopen('http://games.espn.go.com/ffl/leaguerosters?leagueId=280610')) | |
playerTables = soup.findAll('table', attrs={ 'class' : 'playerTableTable' }) | |
playerTable = playerTables[0] | |
team_row = playerTable.find('tr', attrs={ 'class' : 'playerTableBgRowHead' }) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment