randomgambit · February 20, 2018 21:46
diff --git a/scrape_nfl.R b/scrape_nfl.R
 # Replicating https://t.co/Jq1QfFGpjA

 library(rvest)
 library(stringr)
 library(dplyr)
 library(tidyr)
 library(purrr)
 library(lubridate)

 get_and_clean_table <- function(url) {
  paste0("http://www.pro-football-reference.com", url) %>%
  read_html() %>%
  html_nodes("table#game_logs") %>%
  html_table() %>%
  first() %>%
  set_names(tolower(names(.))) %>%
  filter(year != "Year") %>%
  mutate(game = str_replace(game, "\\*", "")) %>%
  separate(game, c("away", "home"), sep = " @ ") %>%
  mutate_each(funs(as.integer), vpts:hpyds) %>%
  mutate(year = ymd(year))
 }

 ## IO
 officials <- read_html("http://www.pro-football-reference.com/officials/") %>%
  html_nodes("table a") %>%
  {data_frame(name = html_text(.), url = html_attr(., "href"))} %>%
  mutate(data = url %>% map(get_and_clean_table)) %>%
  unnest() %>%
  walk(write_csv, "officials_data.csv")
	# Replicating https://t.co/Jq1QfFGpjA

	library(rvest)
	library(stringr)
	library(dplyr)
	library(tidyr)
	library(purrr)
	library(lubridate)

	get_and_clean_table <- function(url) {
	paste0("http://www.pro-football-reference.com", url) %>%
	read_html() %>%
	html_nodes("table#game_logs") %>%
	html_table() %>%
	first() %>%
	set_names(tolower(names(.))) %>%
	filter(year != "Year") %>%
	mutate(game = str_replace(game, "\\*", "")) %>%
	separate(game, c("away", "home"), sep = " @ ") %>%
	mutate_each(funs(as.integer), vpts:hpyds) %>%
	mutate(year = ymd(year))
	}

	## IO
	officials <- read_html("http://www.pro-football-reference.com/officials/") %>%
	html_nodes("table a") %>%
	{data_frame(name = html_text(.), url = html_attr(., "href"))} %>%
	mutate(data = url %>% map(get_and_clean_table)) %>%
	unnest() %>%
	walk(write_csv, "officials_data.csv")