Created
November 11, 2019 04:09
-
-
Save octoparse/30dcdbdb54c3098c286c4bd5e76f25e4 to your computer and use it in GitHub Desktop.
Scraping fantasy football projections
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import re | |
import requests | |
def get_html_data(url): | |
response = requests.get(url) | |
return BeautifulSoup(response.content, "html5lib") | |
def scrape(return_list=None): | |
url = 'https://fantasy.nfl.com/research/projections?offset={0}&position=1&sort=projectedPts&statCategory=projectedStats&statSeason=2019&statType=seasonProjectedStats&statWeek=10' | |
regex = re.compile('player-.+') | |
regex2 = re.compile('playerCard playerName playerNameFull playerNameId-.+') | |
num = 1 | |
while num < 139: | |
_url = url.format(num) | |
soup = get_html_data(_url) | |
trs = soup.find_all('tr', {'class': regex}) | |
for tr in trs: | |
name = tr.find('a', {'class': regex2}).text | |
a = tr.find('td', {'class': 'stat stat_1 numeric'}).text | |
b = tr.find('td', {'class': 'stat stat_5 numeric'}).text | |
c = tr.find('td', {'class': 'stat stat_6 numeric'}).text | |
d = tr.find('td', {'class': 'stat stat_7 numeric'}).text | |
e = tr.find('td', {'class': 'stat stat_14 numeric'}).text | |
f = tr.find('td', {'class': 'stat stat_15 numeric'}).text | |
g = tr.find('td', {'class': 'stat stat_20 numeric'}).text | |
h = tr.find('td', {'class': 'stat stat_21 numeric'}).text | |
i = tr.find('td', {'class': 'stat stat_22 numeric'}).text | |
j = tr.find('td', {'class': 'stat stat_28 numeric'}).text | |
k = tr.find('td', {'class': 'stat stat_29 numeric'}).text | |
l = tr.find('td', {'class': 'stat stat_32 numeric'}).text | |
m = tr.find('td', {'class': 'stat stat_30 numeric'}).text | |
n = tr.find('td', {'class': 'stat projected numeric sorted last'}).text | |
if return_list: | |
return_list.append(name, a, b, c, d, e, f, g, h, i, j, k, l, m, n) | |
else: | |
print '{}, {},{},{},{},{},{},{},{},{},{},{},{},{},{}'.format(name, a, b, c, d, e, f, g, h, i, j, k, l, m, n) | |
num += 25 | |
def main(): | |
return scrape() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Nice information octaparse. can you share a detailed guide about what is web scraping and what are the best tools for web scraping?