-
-
Save trinker/0260a9dfdd9531f9b90d9fad2f7b4b12 to your computer and use it in GitHub Desktop.
############################### | |
## Plotting Likert Type Data ## | |
############################### | |
##------------------------------------------------------------------------ | |
## Note: Plotting horizontal stacked bar plots in ggplot2 with Likert type | |
## data is a non-trivial task. Stacking is not well defined for mixed | |
## negative/positive values on a bar. This requires splitting the data | |
## set into two different parts (positive/negative), plotting each side | |
## separately, and filling the colors manually. This script adds complexity | |
## for neutral scales. | |
##------------------------------------------------------------------------ | |
##============= | |
## Dependencies | |
##============= | |
pacman::p_load(wakefield, tidyverse, numform) | |
##========================= | |
## generate random data set | |
##========================= | |
lvls <- c( | |
"Strongly Agree", "Agree", "Somewhat Agree", | |
"Neutral", "Somewhat Disagree", "Disagree", "Strongly Disagree" | |
) | |
set.seed(10) | |
dat <- wakefield::r_data_frame(200, | |
is_first_gen = r_sample_binary(x = 0:1), | |
`Q1. It´s not known, if climate change is real` = likert_7(), | |
`Q2. In my opinion, the risks of climate change are exaggerated by activists` = likert_7(), | |
`Q3. Climate change is not as dangerous as it is claimed` = likert_7(), | |
`Q4. I'm convinced that we can handle climate change` = likert_7(), | |
`Q5. I'm not ok with this` = likert_7() | |
) %>% | |
tidyr::gather(Question, Response, -is_first_gen) %>% | |
dplyr::mutate( | |
Response = factor(sample(Response), levels = rev(lvls)), | |
Level = as.integer(Response) | |
) | |
##=================== | |
## Summarize the Data | |
##=================== | |
## Summarize the data into counts and proportion by group var (question) | |
sdat <- dat %>% | |
dplyr::count(Question, Response, Level, is_first_gen) %>% | |
dplyr::ungroup() %>% | |
dplyr::group_by(Question, is_first_gen) %>% | |
dplyr::mutate( | |
prop = n/sum(n), | |
Status = case_when(is_first_gen == 0 ~'Peers', TRUE ~ 'First Generation') %>% | |
factor(levels = c('Peers', 'First Generation')) | |
) %>% | |
ungroup() | |
##=============================== | |
## Make the levels and colors key | |
##=============================== | |
high <- '#03A89E' #'darkred' | |
low <- '#CD7F32' #'blue' | |
mid <- 'grey90' | |
levels_key <- data_frame( | |
Response = factor(levels(sdat[['Response']]), levels = levels(sdat[['Response']])), | |
Level = as.integer(Response) | |
) %>% | |
arrange(Level) | |
levels_key$type <- c( | |
rep('negative', floor(nrow(levels_key)/2)), | |
rep('neutral', nrow(levels_key) %% 2), | |
rep('positive', floor(nrow(levels_key)/2)) | |
) | |
half_n_levels <- floor(nrow(levels_key)/2) + 1 | |
levels_key$colors <- c( | |
colorRampPalette(c(low, mid))(half_n_levels)[-half_n_levels], | |
rep(mid, nrow(levels_key) %% 2), | |
colorRampPalette(c(mid, high))(half_n_levels)[-1] | |
) | |
levels_key$colors <- factor(levels_key$colors, levels = levels_key$colors) | |
## Add the levels info back onto the Response Key | |
sdat <- sdat %>% | |
dplyr::left_join(levels_key, by = c('Response', 'Level')) | |
## Split the data apart into negative and positive response types | |
positive <- sdat %>% | |
dplyr::filter(type %in% c('positive', 'neutral')) %>% | |
mutate( | |
colors = factor(colors, levels = rev(levels(colors))), | |
prop = case_when(type == 'neutral' ~ prop/2, TRUE ~ prop) | |
) | |
negative <- sdat %>% | |
dplyr::filter(type %in% c('neutral', 'negative')) %>% | |
dplyr::mutate( | |
prop = case_when(type == 'neutral' ~ prop/2, TRUE ~ prop), | |
prop = -1 * prop | |
) | |
## calculate negative & positive responses data | |
prop_dat_prime <- sdat %>% | |
#dplyr::filter(type != 'neutral') %>% | |
group_by(Question, Status, type) %>% | |
summarize( | |
n = sum(n), | |
) %>% | |
ungroup() %>% | |
group_by(Question, Status) %>% | |
mutate( | |
prop = n/sum(n), | |
) %>% | |
ungroup() %>% | |
group_by(type) %>% | |
mutate( | |
label = numform::f_prop2percent(prop, 0) | |
) %>% | |
split(.$type) | |
prop_dat <- lapply(c('positive', 'negative'), function(x){ | |
y <- left_join( | |
prop_dat_prime[[x]], | |
prop_dat_prime[['neutral']] %>% | |
ungroup() %>% | |
mutate(prop2 = prop/2) %>% | |
select(-c(type, label, n, prop)), | |
by = c('Question', 'Status') | |
) %>% | |
mutate(prop = prop + prop2) %>% | |
select(-prop2) | |
}) | |
prop_dat <- bind_rows(prop_dat) %>% | |
group_by(Question, Status, type, label) %>% | |
summarize( | |
n = sum(n), | |
) %>% | |
group_by(Question, Status) %>% | |
mutate( | |
prop = n/sum(n), | |
proploc = case_when(type == 'negative' ~ -1 * prop, TRUE ~ prop) | |
) %>% | |
ungroup() %>% | |
group_by(type) %>% | |
mutate( | |
textloc = 1.13 * max(prop) * sign(proploc) | |
) %>% | |
split(.$type) | |
prop_dat$neutral <- prop_dat_prime$neutral %>% | |
mutate(textloc = 0) | |
##===================================== | |
## Plot the horizontal stacked bar plot | |
##===================================== | |
ggplot() + | |
geom_bar( | |
data = positive, | |
aes(x = Status, y = prop, fill = colors), | |
position = "stack", | |
stat = "identity" | |
) + | |
geom_bar( | |
data = negative, | |
aes(x = Status, y = prop, fill = colors), | |
position = "stack", | |
stat = "identity" | |
) + | |
coord_flip() + | |
geom_hline(yintercept = 0, color = 'white', size = 1) + | |
guides(fill = guide_legend(reverse = TRUE)) + | |
scale_fill_identity( | |
labels = levels_key$Response, | |
breaks = levels_key$colors, | |
guide = "legend", | |
name = '' | |
) + | |
facet_wrap(~ Question, ncol = 1) + | |
geom_text( | |
data = prop_dat$negative, | |
aes(label = label, x = Status, y = textloc), | |
hjust = 0, color = 'grey60' | |
) + | |
geom_text( | |
data = prop_dat$positive, | |
aes(label = label, x = Status, y = textloc), | |
hjust = 1, color = 'grey60' | |
) + | |
geom_text( | |
data = prop_dat$neutral, | |
aes(label = label, x = Status, y = textloc), | |
hjust = .5, color = 'grey60' | |
) + | |
scale_y_continuous(expand = c(0, 0), limits = 1.02 * range(bind_rows(prop_dat)$textloc)) + | |
theme_bw() + | |
theme( | |
panel.grid = element_blank(), | |
axis.text.x = element_blank(), | |
axis.ticks = element_blank(), | |
strip.background = element_blank(), | |
strip.text = element_text(hjust = 0, face = 'bold', size = 11), | |
panel.border = element_rect(color = 'gray90', linetype = "dashed", fill = NA), | |
plot.title = element_text(color = '#734A12'), | |
plot.subtitle = element_text(color = '#734A12'), | |
plot.caption = element_text(color = 'grey40') | |
) + | |
labs( | |
x = NULL, y = NULL, | |
title = 'Question Answer by First Generation Status', | |
subtitle = 'Some other long winded explanation that might make one seem smarter', | |
caption = 'Note: Captions seem more academic' | |
) | |
trinker
commented
Feb 16, 2018
Note that this only works for odd number of categories. For an even number see: https://gist.github.com/trinker/06548977f08d9dd92f47bd8a3c958d17
Probably not the best way to handle neutral. Likely shouls be split out like in the likert package so there's a reference point or the negative/positive portions as seen here:
Code based on: http://rnotr.com/likert/ggplot/barometer/likert-plots/
This is awesome! Thanks for the code!
I was trying to replicate this with my own data which doesn't have as much "negative type" responses. As a consequence, the "zero" or neutral part is pushed heavily to the left and the plot itself doesn't look as nice as here. Colors are overlapping percentages etc. While I like the plot, I would still try to replicate your figure (i.e., having percentages on the sides and in the middle which would facilitate readability). I would appreciate if you could comment on this.