stephenturner · January 23, 2016 13:42
diff --git a/scrape-brewtoad.r b/scrape-brewtoad.r
 library(rvest)
 library(dplyr)
 library(readr)

 # How many pages do you want to harvest (currently over 10,000 available)
 npages <- 5
 for (i in 1:npages) {
  # If first time through the loop, initialize list to hold results
  if (i==1) d <- list()
  # Report progress
  message(paste0("Harvesting page ", i, "/", npages))
  # The URL for the table
  url <- paste0("https://www.brewtoad.com/recipes?page=", i, "&sort=created_at&view_as_table=true")
  # harvest the data
  d[[i]] <- url %>%
    read_html %>%
    html_table %>%
    as.data.frame %>%
    tbl_df
 }
 rm(npages, i, url)

 # bind_rows on the list to convert to a data_frame
 d <- d %>%
  bind_rows %>%
  # Remove the rating column - sparse, most are zero
  select(-Rating) %>% 
  # remove the percent sign and turn abv into a numeric value
  mutate(ABV=as.numeric(gsub("%", "", ABV))) %>% 
  # remove dupes
  distinct
 # Write to file
 d %>% write_csv("brewtoad.csv")
	library(rvest)
	library(dplyr)
	library(readr)

	# How many pages do you want to harvest (currently over 10,000 available)
	npages <- 5
	for (i in 1:npages) {
	# If first time through the loop, initialize list to hold results
	if (i==1) d <- list()
	# Report progress
	message(paste0("Harvesting page ", i, "/", npages))
	# The URL for the table
	url <- paste0("https://www.brewtoad.com/recipes?page=", i, "&sort=created_at&view_as_table=true")
	# harvest the data
	d[[i]] <- url %>%
	read_html %>%
	html_table %>%
	as.data.frame %>%
	tbl_df
	}
	rm(npages, i, url)

	# bind_rows on the list to convert to a data_frame
	d <- d %>%
	bind_rows %>%
	# Remove the rating column - sparse, most are zero
	select(-Rating) %>%
	# remove the percent sign and turn abv into a numeric value
	mutate(ABV=as.numeric(gsub("%", "", ABV))) %>%
	# remove dupes
	distinct
	# Write to file
	d %>% write_csv("brewtoad.csv")