-
-
Save jthomasmock/c0920f748d7aefa8a346508079de5b1c to your computer and use it in GitHub Desktop.
| library(tidyverse) | |
| library(glue) | |
| library(rvest) | |
| # Function to scrape the top avg cap salary by player ---- | |
| scrape_projections <- function(position, week) { | |
| if (position == "qb") { | |
| naming <- c("", "PASS", "PASS", 'PASS', "PASS", "PASS", "RUSH", "RUSH", "RUSH", "", "") | |
| } else if (position == "rb") { | |
| naming <- c("", "RUSH", "RUSH", "RUSH", "REC", "REC","REC", "", "") | |
| } else if (position == "wr") { | |
| naming <- c("", "REC", "REC","REC", "RUSH", "RUSH", "RUSH", "", "") | |
| } else { | |
| naming <- c("", "REC", "REC","REC", "", "") | |
| } | |
| # Be nice | |
| Sys.sleep(1) | |
| message(glue::glue("Scraping projected stats for {position} wk: {week}!")) | |
| url <- glue::glue("https://www.fantasypros.com/nfl/projections/{position}.php?week=draft") | |
| url %>% | |
| read_html() %>% | |
| html_table() %>% | |
| .[[1]] %>% | |
| filter(X1 != "") %>% | |
| set_names(nm = .[1,]) %>% | |
| .[2:length(.$Player),] %>% | |
| set_names(., paste({naming}, names(.), sep = "_")) %>% | |
| rename("Player" = "_Player", "FL" = "_FL", "FPTS" = "_FPTS") %>% | |
| mutate(position = toupper({position}), | |
| week = week) %>% | |
| mutate(team = str_sub(Player, -3), | |
| team = str_extract(team, "[[:upper:]]+"), | |
| Player = str_remove(Player, team), | |
| Player = str_trim(Player)) %>% | |
| select(Player, team, position, week, everything()) %>% | |
| rename_all(tolower) | |
| } | |
| # define inputs | |
| position <- c("qb", "rb", "wr", "te") | |
| week <- c("draft", as.character(1:16)) | |
| # crossing for all combos of inputs | |
| crossing(position, week) | |
| # scrape the data | |
| df_output <- crossing(position = position, week = week) %>% | |
| pmap_dfr(scrape_projections) %>% | |
| mutate_at(vars(pass_yds, rush_yds, rec_yds), str_remove, ",") %>% | |
| mutate_at(vars(pass_att:rec_tds), as.numeric) %>% | |
| mutate_at(vars(team:position), as.factor) %>% | |
| as_tibble() | |
| # Just yearly total projections | |
| df_total <- filter(df_output, week == "draft") %>% | |
| select(-week) | |
| # Just weekly projections | |
| df_weekly <- filter(df_output, week != "draft") %>% | |
| mutate(week = as.integer(week)) %>% | |
| arrange(week) |
Whoa! Blast from the past on this one!
For this example, naming is coming from the naming list defined up top.
naming <- list(
qb_col = c("", "PASS", "PASS", 'PASS', "PASS", "PASS", "RUSH", "RUSH", "RUSH", "", ""),
rb_col = c("", "RUSH", "RUSH", "RUSH", "REC", "REC","REC", "", ""),
wr_col = c("", "REC", "REC","REC", "RUSH", "RUSH", "RUSH", "", ""),
te_col = c("", "REC", "REC","REC", "", "")
)
I'd also recommend changing the final output to:
# Final dataframe
df_output <- map2_dfr(position, naming, scrape_projections) %>%
mutate_at(vars(pass_yds, rush_yds, rec_yds), str_remove, ",") %>%
mutate_at(vars(pass_att:rec_tds), as.numeric) %>%
mutate_at(vars(team:position), as.factor) %>%
as_tibble()
I've changed this up top as well. This prevents yds columns from turning to NA when converting to a numeric/double column.
@samhoppen I updated the above scraping script to better reflect a more robust workflow and also get by week for example.
I'd also recommend checking out: Jesse Piburn's fantasypros R package.
Haha I was just googling fantasypros scraper because I'm not quite good enough to build one yet and stumbled upon this one! I ended up finding Jesse's package too, which is great! Thanks for updating it - much appreciated!
Ok - one more freebie @samhoppen:
# scrape the data
df_output <- crossing(position = position, week = week) %>%
pmap_dfr(scrape_projections) %>%
mutate_at(vars(pass_yds, rush_yds, rec_yds), str_remove, ",") %>%
mutate_at(vars(pass_att:rec_tds), as.numeric) %>%
mutate_at(vars(team:position), as.factor) %>%
as_tibble()
# Just yearly total projections
df_total <- filter(df_output, week == "draft") %>%
select(-week)
model_all <- df_total %>%
rowwise() %>%
mutate(tds = sum(rush_tds, pass_tds, rec_tds, na.rm = TRUE),
opp = sum(pass_att, rush_att, rec_rec, na.rm = TRUE)) %>%
ungroup() %>%
group_by(position) %>%
top_n(32, fpts) %>%
nest() %>%
rowwise() %>%
summarize(model = lm(fpts ~ opp, data) %>% glance()) %>%
unpack(model) %>%
ungroup() %>%
select(position, r.squared) %>%
mutate(r.squared = round(r.squared, 3),
label = paste(position, r.squared, sep = " R^2: "))
model_all
plot_all <- df_total %>%
rowwise() %>%
mutate(tds = sum(rush_tds, pass_tds, rec_tds, na.rm = TRUE),
opp = sum(pass_att, rush_att, rec_rec, na.rm = TRUE)) %>%
ungroup() %>%
group_by(position) %>%
top_n(32, fpts) %>%
ungroup() %>%
left_join(model_all, by = c("position")) %>%
ggplot(aes(x = opp, y = fpts)) +
geom_point(aes(size = tds)) +
geom_smooth(method = "lm") +
facet_wrap(~label, scales = "free")
plot_all
This is great, Tom! You rock! I tried running this but kept getting an error that "Column model must be length 1 (a summary value), not 11" - I'm not as familiar with the lm() function so wondering if there's a quick fix. Last question, I promise!
I bet you're not using the dev version of dplyr! 🤦 That's my bad!
The summarize workflow + rowwise() is a replacement for purrr::map() in this example for dplyr 1.0.
That's not on CRAN yet, so I'd check out the following:
model_all <- df_total %>%
rowwise() %>%
mutate(tds = sum(rush_tds, pass_tds, rec_tds, na.rm = TRUE),
opp = sum(pass_att, rush_att, rec_rec, na.rm = TRUE)) %>%
group_by(position) %>%
top_n(32, fpts) %>%
nest() %>%
ungroup() %>%
mutate(model = map(data, ~ lm(fpts ~ opp, data = .x)), # rather than rowwise + summarize
glanced = map(model, broom::glance)) %>% # we do mutate + map()
unnest(glanced) %>%
select(position, r.squared) %>%
mutate(r.squared = round(r.squared, 3),
label = paste(position, r.squared, sep = " R^2: "))
model_all
Hey Tom, this is great! Quick question - what is supposed to go in the "naming" field when using the scrape_projections function?