Skip to content

Instantly share code, notes, and snippets.

@gabecano4308
Created December 29, 2020 23:32
Show Gist options
  • Save gabecano4308/4cb2720ba8345e8eef06ad9fb540aa79 to your computer and use it in GitHub Desktop.
Save gabecano4308/4cb2720ba8345e8eef06ad9fb540aa79 to your computer and use it in GitHub Desktop.
for part 1
height_weight_position = []
for row in wiz_per_game.find_all('tr')[1:]:
player = {}
# Parsing html data from each player's specific web page
player_url = ('https://www.basketball-reference.com/' + row.find('a').attrs['href'])
player_rest = requests.get(player_url)
player_soup = BeautifulSoup(player_rest.content, 'lxml')
player_info = player_soup.find(name = 'div', attrs = {'itemtype' : 'https://schema.org/Person'})
# Adding name for clarity
player['Name'] = row.find('a').text.strip()
# Using RegEx to extract height, weight, and position from each player's web profile.
# The '(.*)' regex notation allows the extraction of text from in between two known substrings,
# which is the text written on either side of '(.*)' in the below code.
s = str(player_info.find_all('p'))
weight = re.search('\"weight\">(.*)lb</span>', s)
position = re.search('Position:\n </strong>\n (.*)\n\n', s)
height = re.search('\"height\">(.*)</span>,\xa0<span itemprop="weight', s)
player['Height'] = height.group(1).strip()
player['Weight (Lbs)'] = weight.group(1).strip()
player['Position'] = position.group(1).strip()
height_weight_position.append(player)
pd.DataFrame(height_weight_position)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment