Skip to content

Instantly share code, notes, and snippets.

@gabecano4308
Last active December 31, 2020 18:11
Show Gist options
  • Save gabecano4308/4c418f1c8007edfd0d22b25ca2157f44 to your computer and use it in GitHub Desktop.
Save gabecano4308/4c418f1c8007edfd0d22b25ca2157f44 to your computer and use it in GitHub Desktop.
for part 1
twitter_handle = []
for row in wiz_per_game.find_all('tr')[1:]:
player = {}
# Taking the row's first hyperlink (player's url ending) and appending it to the base url
# to get the player's personal webpage url.
player_url = ('https://www.basketball-reference.com/' + row.find('a').attrs['href'])
# Making a new BeautifulSoup instance of the player's webpage and narrowing it to the top section
player_rest = requests.get(player_url)
player_soup = BeautifulSoup(player_rest.content, 'lxml')
player_info = player_soup.find(name = 'div', attrs = {'itemtype' : 'https://schema.org/Person'})
# Adding player's name for clarity
player['Name'] = row.find('a').text.strip()
# Creating a list of all the hyperlinks from player_info
player_links= []
for link in player_info.find_all('a'):
player_links.append(link.get('href'))
# If a player's twitter exists, the link is second in the player_links list. If it doesn't exist,
# the value is set to 'Not Listed'.
if 'twitter' in player_links[1]:
player['Twitter Handle'] = player_links[1].replace('https://twitter.com/', '')
else:
player['Twitter Handle'] = 'Not Listed'
twitter_handle.append(player)
pd.DataFrame(twitter_handle)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment