Created
February 16, 2018 05:51
-
-
Save trinker/0260a9dfdd9531f9b90d9fad2f7b4b12 to your computer and use it in GitHub Desktop.
Likert ggplot2 Odd Number of Responses
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################### | |
## Plotting Likert Type Data ## | |
############################### | |
##------------------------------------------------------------------------ | |
## Note: Plotting horizontal stacked bar plots in ggplot2 with Likert type | |
## data is a non-trivial task. Stacking is not well defined for mixed | |
## negative/positive values on a bar. This requires splitting the data | |
## set into two different parts (positive/negative), plotting each side | |
## separately, and filling the colors manually. This script adds complexity | |
## for neutral scales. | |
##------------------------------------------------------------------------ | |
##============= | |
## Dependencies | |
##============= | |
pacman::p_load(wakefield, tidyverse, numform) | |
##========================= | |
## generate random data set | |
##========================= | |
lvls <- c( | |
"Strongly Agree", "Agree", "Somewhat Agree", | |
"Neutral", "Somewhat Disagree", "Disagree", "Strongly Disagree" | |
) | |
set.seed(10) | |
dat <- wakefield::r_data_frame(200, | |
is_first_gen = r_sample_binary(x = 0:1), | |
`Q1. It´s not known, if climate change is real` = likert_7(), | |
`Q2. In my opinion, the risks of climate change are exaggerated by activists` = likert_7(), | |
`Q3. Climate change is not as dangerous as it is claimed` = likert_7(), | |
`Q4. I'm convinced that we can handle climate change` = likert_7(), | |
`Q5. I'm not ok with this` = likert_7() | |
) %>% | |
tidyr::gather(Question, Response, -is_first_gen) %>% | |
dplyr::mutate( | |
Response = factor(sample(Response), levels = rev(lvls)), | |
Level = as.integer(Response) | |
) | |
##=================== | |
## Summarize the Data | |
##=================== | |
## Summarize the data into counts and proportion by group var (question) | |
sdat <- dat %>% | |
dplyr::count(Question, Response, Level, is_first_gen) %>% | |
dplyr::ungroup() %>% | |
dplyr::group_by(Question, is_first_gen) %>% | |
dplyr::mutate( | |
prop = n/sum(n), | |
Status = case_when(is_first_gen == 0 ~'Peers', TRUE ~ 'First Generation') %>% | |
factor(levels = c('Peers', 'First Generation')) | |
) %>% | |
ungroup() | |
##=============================== | |
## Make the levels and colors key | |
##=============================== | |
high <- '#03A89E' #'darkred' | |
low <- '#CD7F32' #'blue' | |
mid <- 'grey90' | |
levels_key <- data_frame( | |
Response = factor(levels(sdat[['Response']]), levels = levels(sdat[['Response']])), | |
Level = as.integer(Response) | |
) %>% | |
arrange(Level) | |
levels_key$type <- c( | |
rep('negative', floor(nrow(levels_key)/2)), | |
rep('neutral', nrow(levels_key) %% 2), | |
rep('positive', floor(nrow(levels_key)/2)) | |
) | |
half_n_levels <- floor(nrow(levels_key)/2) + 1 | |
levels_key$colors <- c( | |
colorRampPalette(c(low, mid))(half_n_levels)[-half_n_levels], | |
rep(mid, nrow(levels_key) %% 2), | |
colorRampPalette(c(mid, high))(half_n_levels)[-1] | |
) | |
levels_key$colors <- factor(levels_key$colors, levels = levels_key$colors) | |
## Add the levels info back onto the Response Key | |
sdat <- sdat %>% | |
dplyr::left_join(levels_key, by = c('Response', 'Level')) | |
## Split the data apart into negative and positive response types | |
positive <- sdat %>% | |
dplyr::filter(type %in% c('positive', 'neutral')) %>% | |
mutate( | |
colors = factor(colors, levels = rev(levels(colors))), | |
prop = case_when(type == 'neutral' ~ prop/2, TRUE ~ prop) | |
) | |
negative <- sdat %>% | |
dplyr::filter(type %in% c('neutral', 'negative')) %>% | |
dplyr::mutate( | |
prop = case_when(type == 'neutral' ~ prop/2, TRUE ~ prop), | |
prop = -1 * prop | |
) | |
## calculate negative & positive responses data | |
prop_dat_prime <- sdat %>% | |
#dplyr::filter(type != 'neutral') %>% | |
group_by(Question, Status, type) %>% | |
summarize( | |
n = sum(n), | |
) %>% | |
ungroup() %>% | |
group_by(Question, Status) %>% | |
mutate( | |
prop = n/sum(n), | |
) %>% | |
ungroup() %>% | |
group_by(type) %>% | |
mutate( | |
label = numform::f_prop2percent(prop, 0) | |
) %>% | |
split(.$type) | |
prop_dat <- lapply(c('positive', 'negative'), function(x){ | |
y <- left_join( | |
prop_dat_prime[[x]], | |
prop_dat_prime[['neutral']] %>% | |
ungroup() %>% | |
mutate(prop2 = prop/2) %>% | |
select(-c(type, label, n, prop)), | |
by = c('Question', 'Status') | |
) %>% | |
mutate(prop = prop + prop2) %>% | |
select(-prop2) | |
}) | |
prop_dat <- bind_rows(prop_dat) %>% | |
group_by(Question, Status, type, label) %>% | |
summarize( | |
n = sum(n), | |
) %>% | |
group_by(Question, Status) %>% | |
mutate( | |
prop = n/sum(n), | |
proploc = case_when(type == 'negative' ~ -1 * prop, TRUE ~ prop) | |
) %>% | |
ungroup() %>% | |
group_by(type) %>% | |
mutate( | |
textloc = 1.13 * max(prop) * sign(proploc) | |
) %>% | |
split(.$type) | |
prop_dat$neutral <- prop_dat_prime$neutral %>% | |
mutate(textloc = 0) | |
##===================================== | |
## Plot the horizontal stacked bar plot | |
##===================================== | |
ggplot() + | |
geom_bar( | |
data = positive, | |
aes(x = Status, y = prop, fill = colors), | |
position = "stack", | |
stat = "identity" | |
) + | |
geom_bar( | |
data = negative, | |
aes(x = Status, y = prop, fill = colors), | |
position = "stack", | |
stat = "identity" | |
) + | |
coord_flip() + | |
geom_hline(yintercept = 0, color = 'white', size = 1) + | |
guides(fill = guide_legend(reverse = TRUE)) + | |
scale_fill_identity( | |
labels = levels_key$Response, | |
breaks = levels_key$colors, | |
guide = "legend", | |
name = '' | |
) + | |
facet_wrap(~ Question, ncol = 1) + | |
geom_text( | |
data = prop_dat$negative, | |
aes(label = label, x = Status, y = textloc), | |
hjust = 0, color = 'grey60' | |
) + | |
geom_text( | |
data = prop_dat$positive, | |
aes(label = label, x = Status, y = textloc), | |
hjust = 1, color = 'grey60' | |
) + | |
geom_text( | |
data = prop_dat$neutral, | |
aes(label = label, x = Status, y = textloc), | |
hjust = .5, color = 'grey60' | |
) + | |
scale_y_continuous(expand = c(0, 0), limits = 1.02 * range(bind_rows(prop_dat)$textloc)) + | |
theme_bw() + | |
theme( | |
panel.grid = element_blank(), | |
axis.text.x = element_blank(), | |
axis.ticks = element_blank(), | |
strip.background = element_blank(), | |
strip.text = element_text(hjust = 0, face = 'bold', size = 11), | |
panel.border = element_rect(color = 'gray90', linetype = "dashed", fill = NA), | |
plot.title = element_text(color = '#734A12'), | |
plot.subtitle = element_text(color = '#734A12'), | |
plot.caption = element_text(color = 'grey40') | |
) + | |
labs( | |
x = NULL, y = NULL, | |
title = 'Question Answer by First Generation Status', | |
subtitle = 'Some other long winded explanation that might make one seem smarter', | |
caption = 'Note: Captions seem more academic' | |
) | |
This is awesome! Thanks for the code!
I was trying to replicate this with my own data which doesn't have as much "negative type" responses. As a consequence, the "zero" or neutral part is pushed heavily to the left and the plot itself doesn't look as nice as here. Colors are overlapping percentages etc. While I like the plot, I would still try to replicate your figure (i.e., having percentages on the sides and in the middle which would facilitate readability). I would appreciate if you could comment on this.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Note that this only works for odd number of categories. For an even number see: https://gist.github.com/trinker/06548977f08d9dd92f47bd8a3c958d17
Probably not the best way to handle neutral. Likely shouls be split out like in the likert package so there's a reference point or the negative/positive portions as seen here:
Code based on: http://rnotr.com/likert/ggplot/barometer/likert-plots/