gabecano4308 · December 29, 2020 23:32
diff --git a/bball ref -- ht_wt_pos b/bball ref -- ht_wt_pos
 height_weight_position = []

 for row in wiz_per_game.find_all('tr')[1:]:
    
    player = {}
    
    # Parsing html data from each player's specific web page
    player_url = ('https://www.basketball-reference.com/' + row.find('a').attrs['href'])
    player_rest = requests.get(player_url)
    player_soup = BeautifulSoup(player_rest.content, 'lxml')
    player_info = player_soup.find(name = 'div', attrs = {'itemtype' : 'https://schema.org/Person'})
    
    # Adding name for clarity
    player['Name'] = row.find('a').text.strip()
    
    # Using RegEx to extract height, weight, and position from each player's web profile.
    # The '(.*)' regex notation allows the extraction of text from in between two known substrings,
    # which is the text written on either side of '(.*)' in the below code. 
    s = str(player_info.find_all('p'))
    weight = re.search('\"weight\">(.*)lb</span>', s)
    position = re.search('Position:\n  </strong>\n (.*)\n\n', s)
    height = re.search('\"height\">(.*)</span>,\xa0<span itemprop="weight', s)
    player['Height'] = height.group(1).strip()
    player['Weight (Lbs)'] = weight.group(1).strip()
    player['Position'] = position.group(1).strip()

    height_weight_position.append(player)
        
 pd.DataFrame(height_weight_position)
	height_weight_position = []

	for row in wiz_per_game.find_all('tr')[1:]:

	player = {}

	# Parsing html data from each player's specific web page
	player_url = ('https://www.basketball-reference.com/' + row.find('a').attrs['href'])
	player_rest = requests.get(player_url)
	player_soup = BeautifulSoup(player_rest.content, 'lxml')
	player_info = player_soup.find(name = 'div', attrs = {'itemtype' : 'https://schema.org/Person'})

	# Adding name for clarity
	player['Name'] = row.find('a').text.strip()

	# Using RegEx to extract height, weight, and position from each player's web profile.
	# The '(.*)' regex notation allows the extraction of text from in between two known substrings,
	# which is the text written on either side of '(.*)' in the below code.
	s = str(player_info.find_all('p'))
	weight = re.search('\"weight\">(.*)lb</span>', s)
	position = re.search('Position:\n </strong>\n (.*)\n\n', s)
	height = re.search('\"height\">(.*)</span>,\xa0<span itemprop="weight', s)
	player['Height'] = height.group(1).strip()
	player['Weight (Lbs)'] = weight.group(1).strip()
	player['Position'] = position.group(1).strip()

	height_weight_position.append(player)

	pd.DataFrame(height_weight_position)