kelly-sovacool · March 31, 2020 16:59
diff --git a/bob_ross.R b/bob_ross.R
 library(tidyverse)

 ross_data <- read_csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/bob-ross/elements-by-episode.csv",
 	col_types="ccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd") %>%
 	mutate(TITLE = gsub("\\\"", '', TITLE))

 themes <- c('MOUNTAIN', 'WINTER', 'AUTUMN', 'LAKE', 'CABIN')

 #############################################################
 #  Part 1 - fill in the blanks each line of the function
 #############################################################

 # function sums the features (columns) by the descriptors in the title

 get_feature_abundance_by_theme <- function(descriptor){
 		temp_dataframe <- ross_data %>%
 		filter(grepl(descriptor, TITLE)) %>%
 		select(-EPISODE, -TITLE) %>%
 		gather(feature, presence) %>%
 		group_by(feature) %>%
 		summarize(abundance = sum(presence)) %>%
 		mutate(theme = descriptor) %>%
 		arrange(desc(abundance))
 		return(temp_dataframe)
 }

 #############################################################
 #  Part 2 - comment the the function
 #############################################################

 get_feature_abundance_by_theme <- function(descriptor){
 		temp_dataframe <- ross_data %>%
 		filter(grepl(descriptor, TITLE)) %>%
 		select(-EPISODE, -TITLE) %>%
 		gather(feature, presence) %>%
 		group_by(feature) %>%
 		summarise(abundance = sum(presence)) %>%
 		mutate(theme = descriptor) %>%
 		arrange(desc(abundance))
 		return(temp_dataframe)
 }

 #############################################################
 #  Part 3 - Run this function on each of the themes and output a single dataframe
 #############################################################

 # Using a for loop


 # Using an apply
 lapply(themes, get_feature_abundance_by_theme) %>%
    reduce(rbind)

 # Using a map function
 map_df(themes, get_feature_abundance_by_theme)


 #############################################################
 #  Part 4 - Run this function on all themes
 #############################################################

 # you can use the following code to pick the unique themes
 all_themes <- ross_data %>%
 	separate(TITLE, c('word1', 'word2', 'word3', 'word4', 'word5')) %>%
 	gather(place, word, contains('word')) %>%
 	filter(!is.na(word)) %>%
    filter(!(word %in% c('A', 'THE', 'IN'))) %>%
 	unique() %>%
    pull(word)

 # this takes a little while, 1-2 mins
 map_df(all_themes, get_feature_abundance_by_theme)
	library(tidyverse)

	ross_data <- read_csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/bob-ross/elements-by-episode.csv",
	col_types="ccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd") %>%
	mutate(TITLE = gsub("\\\"", '', TITLE))

	themes <- c('MOUNTAIN', 'WINTER', 'AUTUMN', 'LAKE', 'CABIN')

	#############################################################
	# Part 1 - fill in the blanks each line of the function
	#############################################################

	# function sums the features (columns) by the descriptors in the title

	get_feature_abundance_by_theme <- function(descriptor){
	temp_dataframe <- ross_data %>%
	filter(grepl(descriptor, TITLE)) %>%
	select(-EPISODE, -TITLE) %>%
	gather(feature, presence) %>%
	group_by(feature) %>%
	summarize(abundance = sum(presence)) %>%
	mutate(theme = descriptor) %>%
	arrange(desc(abundance))
	return(temp_dataframe)
	}

	#############################################################
	# Part 2 - comment the the function
	#############################################################

	get_feature_abundance_by_theme <- function(descriptor){
	temp_dataframe <- ross_data %>%
	filter(grepl(descriptor, TITLE)) %>%
	select(-EPISODE, -TITLE) %>%
	gather(feature, presence) %>%
	group_by(feature) %>%
	summarise(abundance = sum(presence)) %>%
	mutate(theme = descriptor) %>%
	arrange(desc(abundance))
	return(temp_dataframe)
	}

	#############################################################
	# Part 3 - Run this function on each of the themes and output a single dataframe
	#############################################################

	# Using a for loop


	# Using an apply
	lapply(themes, get_feature_abundance_by_theme) %>%
	reduce(rbind)

	# Using a map function
	map_df(themes, get_feature_abundance_by_theme)


	#############################################################
	# Part 4 - Run this function on all themes
	#############################################################

	# you can use the following code to pick the unique themes
	all_themes <- ross_data %>%
	separate(TITLE, c('word1', 'word2', 'word3', 'word4', 'word5')) %>%
	gather(place, word, contains('word')) %>%
	filter(!is.na(word)) %>%
	filter(!(word %in% c('A', 'THE', 'IN'))) %>%
	unique() %>%
	pull(word)

	# this takes a little while, 1-2 mins
	map_df(all_themes, get_feature_abundance_by_theme)