Last active
November 15, 2020 15:17
-
-
Save jthomasmock/c0920f748d7aefa8a346508079de5b1c to your computer and use it in GitHub Desktop.
Scraping Fantasy projections from fantasypros.com
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(tidyverse) | |
| library(glue) | |
| library(rvest) | |
| # Function to scrape the top avg cap salary by player ---- | |
| scrape_projections <- function(position, week) { | |
| if (position == "qb") { | |
| naming <- c("", "PASS", "PASS", 'PASS', "PASS", "PASS", "RUSH", "RUSH", "RUSH", "", "") | |
| } else if (position == "rb") { | |
| naming <- c("", "RUSH", "RUSH", "RUSH", "REC", "REC","REC", "", "") | |
| } else if (position == "wr") { | |
| naming <- c("", "REC", "REC","REC", "RUSH", "RUSH", "RUSH", "", "") | |
| } else { | |
| naming <- c("", "REC", "REC","REC", "", "") | |
| } | |
| # Be nice | |
| Sys.sleep(1) | |
| message(glue::glue("Scraping projected stats for {position} wk: {week}!")) | |
| url <- glue::glue("https://www.fantasypros.com/nfl/projections/{position}.php?week=draft") | |
| url %>% | |
| read_html() %>% | |
| html_table() %>% | |
| .[[1]] %>% | |
| filter(X1 != "") %>% | |
| set_names(nm = .[1,]) %>% | |
| .[2:length(.$Player),] %>% | |
| set_names(., paste({naming}, names(.), sep = "_")) %>% | |
| rename("Player" = "_Player", "FL" = "_FL", "FPTS" = "_FPTS") %>% | |
| mutate(position = toupper({position}), | |
| week = week) %>% | |
| mutate(team = str_sub(Player, -3), | |
| team = str_extract(team, "[[:upper:]]+"), | |
| Player = str_remove(Player, team), | |
| Player = str_trim(Player)) %>% | |
| select(Player, team, position, week, everything()) %>% | |
| rename_all(tolower) | |
| } | |
| # define inputs | |
| position <- c("qb", "rb", "wr", "te") | |
| week <- c("draft", as.character(1:16)) | |
| # crossing for all combos of inputs | |
| crossing(position, week) | |
| # scrape the data | |
| df_output <- crossing(position = position, week = week) %>% | |
| pmap_dfr(scrape_projections) %>% | |
| mutate_at(vars(pass_yds, rush_yds, rec_yds), str_remove, ",") %>% | |
| mutate_at(vars(pass_att:rec_tds), as.numeric) %>% | |
| mutate_at(vars(team:position), as.factor) %>% | |
| as_tibble() | |
| # Just yearly total projections | |
| df_total <- filter(df_output, week == "draft") %>% | |
| select(-week) | |
| # Just weekly projections | |
| df_weekly <- filter(df_output, week != "draft") %>% | |
| mutate(week = as.integer(week)) %>% | |
| arrange(week) |
Author
This is great, Tom! You rock! I tried running this but kept getting an error that "Column model must be length 1 (a summary value), not 11" - I'm not as familiar with the lm() function so wondering if there's a quick fix. Last question, I promise!
Author
I bet you're not using the dev version of dplyr! 🤦 That's my bad!
The summarize workflow + rowwise() is a replacement for purrr::map() in this example for dplyr 1.0.
That's not on CRAN yet, so I'd check out the following:
model_all <- df_total %>%
rowwise() %>%
mutate(tds = sum(rush_tds, pass_tds, rec_tds, na.rm = TRUE),
opp = sum(pass_att, rush_att, rec_rec, na.rm = TRUE)) %>%
group_by(position) %>%
top_n(32, fpts) %>%
nest() %>%
ungroup() %>%
mutate(model = map(data, ~ lm(fpts ~ opp, data = .x)), # rather than rowwise + summarize
glanced = map(model, broom::glance)) %>% # we do mutate + map()
unnest(glanced) %>%
select(position, r.squared) %>%
mutate(r.squared = round(r.squared, 3),
label = paste(position, r.squared, sep = " R^2: "))
model_all
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Ok - one more freebie @samhoppen: