Created
March 31, 2020 16:59
-
-
Save kelly-sovacool/ecf79bb9aa4a35fc7fd94cd5e2e37896 to your computer and use it in GitHub Desktop.
Joy of Coding: Nick's code club 2020-03-30 w/ Will & Katie
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
ross_data <- read_csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/bob-ross/elements-by-episode.csv", | |
col_types="ccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd") %>% | |
mutate(TITLE = gsub("\\\"", '', TITLE)) | |
themes <- c('MOUNTAIN', 'WINTER', 'AUTUMN', 'LAKE', 'CABIN') | |
############################################################# | |
# Part 1 - fill in the blanks each line of the function | |
############################################################# | |
# function sums the features (columns) by the descriptors in the title | |
get_feature_abundance_by_theme <- function(descriptor){ | |
temp_dataframe <- ross_data %>% | |
filter(grepl(descriptor, TITLE)) %>% | |
select(-EPISODE, -TITLE) %>% | |
gather(feature, presence) %>% | |
group_by(feature) %>% | |
summarize(abundance = sum(presence)) %>% | |
mutate(theme = descriptor) %>% | |
arrange(desc(abundance)) | |
return(temp_dataframe) | |
} | |
############################################################# | |
# Part 2 - comment the the function | |
############################################################# | |
get_feature_abundance_by_theme <- function(descriptor){ | |
temp_dataframe <- ross_data %>% | |
filter(grepl(descriptor, TITLE)) %>% | |
select(-EPISODE, -TITLE) %>% | |
gather(feature, presence) %>% | |
group_by(feature) %>% | |
summarise(abundance = sum(presence)) %>% | |
mutate(theme = descriptor) %>% | |
arrange(desc(abundance)) | |
return(temp_dataframe) | |
} | |
############################################################# | |
# Part 3 - Run this function on each of the themes and output a single dataframe | |
############################################################# | |
# Using a for loop | |
# Using an apply | |
lapply(themes, get_feature_abundance_by_theme) %>% | |
reduce(rbind) | |
# Using a map function | |
map_df(themes, get_feature_abundance_by_theme) | |
############################################################# | |
# Part 4 - Run this function on all themes | |
############################################################# | |
# you can use the following code to pick the unique themes | |
all_themes <- ross_data %>% | |
separate(TITLE, c('word1', 'word2', 'word3', 'word4', 'word5')) %>% | |
gather(place, word, contains('word')) %>% | |
filter(!is.na(word)) %>% | |
filter(!(word %in% c('A', 'THE', 'IN'))) %>% | |
unique() %>% | |
pull(word) | |
# this takes a little while, 1-2 mins | |
map_df(all_themes, get_feature_abundance_by_theme) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment