trinker · February 16, 2018 05:51 · trinker · Feb 16, 2018 · trinker · Feb 16, 2018
diff --git a/likert_odd.R b/likert_odd.R
 ###############################
 ## Plotting Likert Type Data ##
 ###############################
 ##------------------------------------------------------------------------
 ## Note: Plotting horizontal stacked bar plots in ggplot2 with Likert type
 ## data is a non-trivial task.  Stacking is not well defined for mixed
 ## negative/positive values on a bar.  This requires splitting the data
 ## set into two different parts (positive/negative), plotting each side
 ## separately, and filling the colors manually.  This script adds complexity
 ## for neutral scales.
 ##------------------------------------------------------------------------


 ##=============
 ## Dependencies
 ##=============
 pacman::p_load(wakefield, tidyverse, numform)

 ##=========================
 ## generate random data set
 ##=========================
 lvls <- c(
        "Strongly Agree", "Agree", "Somewhat Agree", 
        "Neutral", "Somewhat Disagree", "Disagree", "Strongly Disagree"
    )

 set.seed(10)
 dat <- wakefield::r_data_frame(200, 
        is_first_gen = r_sample_binary(x = 0:1),
        `Q1. It´s not known, if climate change is real` = likert_7(),
        `Q2. In my opinion, the risks of climate change are exaggerated by activists` = likert_7(),
        `Q3. Climate change is not as dangerous as it is claimed` = likert_7(),
        `Q4. I'm convinced that we can handle climate change` = likert_7(),
        `Q5. I'm not ok with this` = likert_7()
    ) %>%
    tidyr::gather(Question, Response, -is_first_gen)  %>%
    dplyr::mutate( 
        Response = factor(sample(Response), levels = rev(lvls)),
        Level = as.integer(Response)
    )


 ##===================
 ## Summarize the Data
 ##===================
 ## Summarize the data into counts and proportion by group var (question)
 sdat <- dat %>%
    dplyr::count(Question, Response, Level, is_first_gen) %>%
    dplyr::ungroup() %>%
    dplyr::group_by(Question, is_first_gen) %>%
    dplyr::mutate(
        prop = n/sum(n),
        Status = case_when(is_first_gen == 0 ~'Peers', TRUE ~ 'First Generation') %>%
            factor(levels = c('Peers', 'First Generation'))
    ) %>%
    ungroup()

 ##===============================
 ## Make the levels and colors key
 ##===============================
 high <- '#03A89E' #'darkred'
 low <- '#CD7F32' #'blue'
 mid <- 'grey90'

 levels_key <- data_frame(
    Response = factor(levels(sdat[['Response']]), levels = levels(sdat[['Response']])),
    Level = as.integer(Response)
 ) %>%
    arrange(Level)

 levels_key$type <- c(
    rep('negative', floor(nrow(levels_key)/2)), 
    rep('neutral', nrow(levels_key) %% 2), 
    rep('positive', floor(nrow(levels_key)/2))
 )

 half_n_levels <- floor(nrow(levels_key)/2) + 1
 levels_key$colors <- c(
    colorRampPalette(c(low, mid))(half_n_levels)[-half_n_levels],
    rep(mid, nrow(levels_key) %% 2),
    colorRampPalette(c(mid, high))(half_n_levels)[-1]
 )

 levels_key$colors <- factor(levels_key$colors, levels = levels_key$colors)


 ## Add the levels info back onto the Response Key
 sdat <- sdat %>%
    dplyr::left_join(levels_key, by = c('Response', 'Level'))
    

 ## Split the data apart into negative and positive response types
 positive <- sdat %>% 
    dplyr::filter(type %in% c('positive', 'neutral')) %>%
    mutate(
        colors = factor(colors, levels = rev(levels(colors))),
        prop = case_when(type == 'neutral' ~ prop/2, TRUE ~ prop)
    ) 

 negative <- sdat %>% 
    dplyr::filter(type %in% c('neutral', 'negative')) %>%  
    dplyr::mutate(
        prop = case_when(type == 'neutral' ~ prop/2, TRUE ~ prop),
        prop = -1 * prop
    )

 ## calculate negative & positive responses data
 prop_dat_prime <- sdat %>%
    #dplyr::filter(type != 'neutral') %>%
    group_by(Question, Status, type) %>%
    summarize(
        n = sum(n),
    ) %>%
    ungroup() %>%
    group_by(Question, Status) %>%
    mutate(
        prop = n/sum(n),
    ) %>%
    ungroup() %>%
    group_by(type) %>%
    mutate(
        label = numform::f_prop2percent(prop, 0)
    ) %>%
    split(.$type)
    
 prop_dat <- lapply(c('positive', 'negative'), function(x){

    y <- left_join(
        prop_dat_prime[[x]],
        prop_dat_prime[['neutral']] %>%
            ungroup() %>%
            mutate(prop2 = prop/2) %>%
            select(-c(type, label, n, prop)),
        by = c('Question', 'Status')
    ) %>%
    mutate(prop = prop + prop2) %>%
    select(-prop2)         

 })

 prop_dat <- bind_rows(prop_dat) %>%
    group_by(Question, Status, type, label) %>%
    summarize(
        n = sum(n),
    ) %>%
    group_by(Question, Status) %>%
    mutate(
        prop = n/sum(n),
        proploc = case_when(type == 'negative' ~ -1 * prop, TRUE ~ prop)
    ) %>%
    ungroup() %>%
    group_by(type) %>%
    mutate(
        textloc = 1.13 * max(prop) * sign(proploc)
    ) %>%
    split(.$type) 

 prop_dat$neutral <- prop_dat_prime$neutral %>%
    mutate(textloc = 0)


 ##=====================================
 ## Plot the horizontal stacked bar plot
 ##=====================================
 ggplot() + 
    geom_bar(
        data = positive, 
        aes(x = Status, y = prop, fill = colors),
        position = "stack", 
        stat = "identity"
    )  + 
    geom_bar(
        data = negative, 
        aes(x = Status, y = prop, fill = colors),
        position = "stack", 
        stat = "identity"
    ) +
    coord_flip()  +
    geom_hline(yintercept = 0, color = 'white', size = 1) +
    guides(fill = guide_legend(reverse = TRUE)) + 
    scale_fill_identity(
        labels = levels_key$Response, 
        breaks = levels_key$colors, 
        guide = "legend", 
        name = ''
    ) +
    facet_wrap(~ Question, ncol = 1)  +
    geom_text(
        data = prop_dat$negative, 
        aes(label = label, x = Status,  y = textloc), 
        hjust = 0, color = 'grey60'
    ) +
    geom_text(
        data = prop_dat$positive,
        aes(label = label, x = Status,  y = textloc), 
        hjust = 1, color = 'grey60'
    )  +
    geom_text(
        data = prop_dat$neutral,
        aes(label = label, x = Status,  y = textloc), 
        hjust = .5, color = 'grey60'
    ) +
    scale_y_continuous(expand = c(0, 0), limits = 1.02 * range(bind_rows(prop_dat)$textloc)) +
    theme_bw() +
    theme(
        panel.grid = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks = element_blank(),
        strip.background = element_blank(),
        strip.text = element_text(hjust = 0, face = 'bold', size = 11),
        panel.border = element_rect(color = 'gray90', linetype = "dashed", fill = NA),
        plot.title = element_text(color = '#734A12'),
        plot.subtitle = element_text(color = '#734A12'),
        plot.caption = element_text(color = 'grey40')
    ) +
    labs(
        x = NULL, y = NULL, 
        title = 'Question Answer by First Generation Status',
        subtitle = 'Some other long winded explanation that might make one seem smarter',
        caption = 'Note: Captions seem more academic'
    )
	###############################
	## Plotting Likert Type Data ##
	###############################
	##------------------------------------------------------------------------
	## Note: Plotting horizontal stacked bar plots in ggplot2 with Likert type
	## data is a non-trivial task. Stacking is not well defined for mixed
	## negative/positive values on a bar. This requires splitting the data
	## set into two different parts (positive/negative), plotting each side
	## separately, and filling the colors manually. This script adds complexity
	## for neutral scales.
	##------------------------------------------------------------------------


	##=============
	## Dependencies
	##=============
	pacman::p_load(wakefield, tidyverse, numform)

	##=========================
	## generate random data set
	##=========================
	lvls <- c(
	"Strongly Agree", "Agree", "Somewhat Agree",
	"Neutral", "Somewhat Disagree", "Disagree", "Strongly Disagree"
	)

	set.seed(10)
	dat <- wakefield::r_data_frame(200,
	is_first_gen = r_sample_binary(x = 0:1),
	`Q1. It´s not known, if climate change is real` = likert_7(),
	`Q2. In my opinion, the risks of climate change are exaggerated by activists` = likert_7(),
	`Q3. Climate change is not as dangerous as it is claimed` = likert_7(),
	`Q4. I'm convinced that we can handle climate change` = likert_7(),
	`Q5. I'm not ok with this` = likert_7()
	) %>%
	tidyr::gather(Question, Response, -is_first_gen) %>%
	dplyr::mutate(
	Response = factor(sample(Response), levels = rev(lvls)),
	Level = as.integer(Response)
	)


	##===================
	## Summarize the Data
	##===================
	## Summarize the data into counts and proportion by group var (question)
	sdat <- dat %>%
	dplyr::count(Question, Response, Level, is_first_gen) %>%
	dplyr::ungroup() %>%
	dplyr::group_by(Question, is_first_gen) %>%
	dplyr::mutate(
	prop = n/sum(n),
	Status = case_when(is_first_gen == 0 ~'Peers', TRUE ~ 'First Generation') %>%
	factor(levels = c('Peers', 'First Generation'))
	) %>%
	ungroup()

	##===============================
	## Make the levels and colors key
	##===============================
	high <- '#03A89E' #'darkred'
	low <- '#CD7F32' #'blue'
	mid <- 'grey90'

	levels_key <- data_frame(
	Response = factor(levels(sdat[['Response']]), levels = levels(sdat[['Response']])),
	Level = as.integer(Response)
	) %>%
	arrange(Level)

	levels_key$type <- c(
	rep('negative', floor(nrow(levels_key)/2)),
	rep('neutral', nrow(levels_key) %% 2),
	rep('positive', floor(nrow(levels_key)/2))
	)

	half_n_levels <- floor(nrow(levels_key)/2) + 1
	levels_key$colors <- c(
	colorRampPalette(c(low, mid))(half_n_levels)[-half_n_levels],
	rep(mid, nrow(levels_key) %% 2),
	colorRampPalette(c(mid, high))(half_n_levels)[-1]
	)

	levels_key$colors <- factor(levels_key$colors, levels = levels_key$colors)


	## Add the levels info back onto the Response Key
	sdat <- sdat %>%
	dplyr::left_join(levels_key, by = c('Response', 'Level'))


	## Split the data apart into negative and positive response types
	positive <- sdat %>%
	dplyr::filter(type %in% c('positive', 'neutral')) %>%
	mutate(
	colors = factor(colors, levels = rev(levels(colors))),
	prop = case_when(type == 'neutral' ~ prop/2, TRUE ~ prop)
	)

	negative <- sdat %>%
	dplyr::filter(type %in% c('neutral', 'negative')) %>%
	dplyr::mutate(
	prop = case_when(type == 'neutral' ~ prop/2, TRUE ~ prop),
	prop = -1 * prop
	)

	## calculate negative & positive responses data
	prop_dat_prime <- sdat %>%
	#dplyr::filter(type != 'neutral') %>%
	group_by(Question, Status, type) %>%
	summarize(
	n = sum(n),
	) %>%
	ungroup() %>%
	group_by(Question, Status) %>%
	mutate(
	prop = n/sum(n),
	) %>%
	ungroup() %>%
	group_by(type) %>%
	mutate(
	label = numform::f_prop2percent(prop, 0)
	) %>%
	split(.$type)

	prop_dat <- lapply(c('positive', 'negative'), function(x){

	y <- left_join(
	prop_dat_prime[[x]],
	prop_dat_prime[['neutral']] %>%
	ungroup() %>%
	mutate(prop2 = prop/2) %>%
	select(-c(type, label, n, prop)),
	by = c('Question', 'Status')
	) %>%
	mutate(prop = prop + prop2) %>%
	select(-prop2)

	})

	prop_dat <- bind_rows(prop_dat) %>%
	group_by(Question, Status, type, label) %>%
	summarize(
	n = sum(n),
	) %>%
	group_by(Question, Status) %>%
	mutate(
	prop = n/sum(n),
	proploc = case_when(type == 'negative' ~ -1 * prop, TRUE ~ prop)
	) %>%
	ungroup() %>%
	group_by(type) %>%
	mutate(
	textloc = 1.13 * max(prop) * sign(proploc)
	) %>%
	split(.$type)

	prop_dat$neutral <- prop_dat_prime$neutral %>%
	mutate(textloc = 0)


	##=====================================
	## Plot the horizontal stacked bar plot
	##=====================================
	ggplot() +
	geom_bar(
	data = positive,
	aes(x = Status, y = prop, fill = colors),
	position = "stack",
	stat = "identity"
	) +
	geom_bar(
	data = negative,
	aes(x = Status, y = prop, fill = colors),
	position = "stack",
	stat = "identity"
	) +
	coord_flip() +
	geom_hline(yintercept = 0, color = 'white', size = 1) +
	guides(fill = guide_legend(reverse = TRUE)) +
	scale_fill_identity(
	labels = levels_key$Response,
	breaks = levels_key$colors,
	guide = "legend",
	name = ''
	) +
	facet_wrap(~ Question, ncol = 1) +
	geom_text(
	data = prop_dat$negative,
	aes(label = label, x = Status, y = textloc),
	hjust = 0, color = 'grey60'
	) +
	geom_text(
	data = prop_dat$positive,
	aes(label = label, x = Status, y = textloc),
	hjust = 1, color = 'grey60'
	) +
	geom_text(
	data = prop_dat$neutral,
	aes(label = label, x = Status, y = textloc),
	hjust = .5, color = 'grey60'
	) +
	scale_y_continuous(expand = c(0, 0), limits = 1.02 * range(bind_rows(prop_dat)$textloc)) +
	theme_bw() +
	theme(
	panel.grid = element_blank(),
	axis.text.x = element_blank(),
	axis.ticks = element_blank(),
	strip.background = element_blank(),
	strip.text = element_text(hjust = 0, face = 'bold', size = 11),
	panel.border = element_rect(color = 'gray90', linetype = "dashed", fill = NA),
	plot.title = element_text(color = '#734A12'),
	plot.subtitle = element_text(color = '#734A12'),
	plot.caption = element_text(color = 'grey40')
	) +
	labs(
	x = NULL, y = NULL,
	title = 'Question Answer by First Generation Status',
	subtitle = 'Some other long winded explanation that might make one seem smarter',
	caption = 'Note: Captions seem more academic'
	)