Created
December 29, 2020 23:32
-
-
Save gabecano4308/4cb2720ba8345e8eef06ad9fb540aa79 to your computer and use it in GitHub Desktop.
for part 1
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
height_weight_position = [] | |
for row in wiz_per_game.find_all('tr')[1:]: | |
player = {} | |
# Parsing html data from each player's specific web page | |
player_url = ('https://www.basketball-reference.com/' + row.find('a').attrs['href']) | |
player_rest = requests.get(player_url) | |
player_soup = BeautifulSoup(player_rest.content, 'lxml') | |
player_info = player_soup.find(name = 'div', attrs = {'itemtype' : 'https://schema.org/Person'}) | |
# Adding name for clarity | |
player['Name'] = row.find('a').text.strip() | |
# Using RegEx to extract height, weight, and position from each player's web profile. | |
# The '(.*)' regex notation allows the extraction of text from in between two known substrings, | |
# which is the text written on either side of '(.*)' in the below code. | |
s = str(player_info.find_all('p')) | |
weight = re.search('\"weight\">(.*)lb</span>', s) | |
position = re.search('Position:\n </strong>\n (.*)\n\n', s) | |
height = re.search('\"height\">(.*)</span>,\xa0<span itemprop="weight', s) | |
player['Height'] = height.group(1).strip() | |
player['Weight (Lbs)'] = weight.group(1).strip() | |
player['Position'] = position.group(1).strip() | |
height_weight_position.append(player) | |
pd.DataFrame(height_weight_position) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment