-
-
Save trinker/0260a9dfdd9531f9b90d9fad2f7b4b12 to your computer and use it in GitHub Desktop.
| ############################### | |
| ## Plotting Likert Type Data ## | |
| ############################### | |
| ##------------------------------------------------------------------------ | |
| ## Note: Plotting horizontal stacked bar plots in ggplot2 with Likert type | |
| ## data is a non-trivial task. Stacking is not well defined for mixed | |
| ## negative/positive values on a bar. This requires splitting the data | |
| ## set into two different parts (positive/negative), plotting each side | |
| ## separately, and filling the colors manually. This script adds complexity | |
| ## for neutral scales. | |
| ##------------------------------------------------------------------------ | |
| ##============= | |
| ## Dependencies | |
| ##============= | |
| pacman::p_load(wakefield, tidyverse, numform) | |
| ##========================= | |
| ## generate random data set | |
| ##========================= | |
| lvls <- c( | |
| "Strongly Agree", "Agree", "Somewhat Agree", | |
| "Neutral", "Somewhat Disagree", "Disagree", "Strongly Disagree" | |
| ) | |
| set.seed(10) | |
| dat <- wakefield::r_data_frame(200, | |
| is_first_gen = r_sample_binary(x = 0:1), | |
| `Q1. It´s not known, if climate change is real` = likert_7(), | |
| `Q2. In my opinion, the risks of climate change are exaggerated by activists` = likert_7(), | |
| `Q3. Climate change is not as dangerous as it is claimed` = likert_7(), | |
| `Q4. I'm convinced that we can handle climate change` = likert_7(), | |
| `Q5. I'm not ok with this` = likert_7() | |
| ) %>% | |
| tidyr::gather(Question, Response, -is_first_gen) %>% | |
| dplyr::mutate( | |
| Response = factor(sample(Response), levels = rev(lvls)), | |
| Level = as.integer(Response) | |
| ) | |
| ##=================== | |
| ## Summarize the Data | |
| ##=================== | |
| ## Summarize the data into counts and proportion by group var (question) | |
| sdat <- dat %>% | |
| dplyr::count(Question, Response, Level, is_first_gen) %>% | |
| dplyr::ungroup() %>% | |
| dplyr::group_by(Question, is_first_gen) %>% | |
| dplyr::mutate( | |
| prop = n/sum(n), | |
| Status = case_when(is_first_gen == 0 ~'Peers', TRUE ~ 'First Generation') %>% | |
| factor(levels = c('Peers', 'First Generation')) | |
| ) %>% | |
| ungroup() | |
| ##=============================== | |
| ## Make the levels and colors key | |
| ##=============================== | |
| high <- '#03A89E' #'darkred' | |
| low <- '#CD7F32' #'blue' | |
| mid <- 'grey90' | |
| levels_key <- data_frame( | |
| Response = factor(levels(sdat[['Response']]), levels = levels(sdat[['Response']])), | |
| Level = as.integer(Response) | |
| ) %>% | |
| arrange(Level) | |
| levels_key$type <- c( | |
| rep('negative', floor(nrow(levels_key)/2)), | |
| rep('neutral', nrow(levels_key) %% 2), | |
| rep('positive', floor(nrow(levels_key)/2)) | |
| ) | |
| half_n_levels <- floor(nrow(levels_key)/2) + 1 | |
| levels_key$colors <- c( | |
| colorRampPalette(c(low, mid))(half_n_levels)[-half_n_levels], | |
| rep(mid, nrow(levels_key) %% 2), | |
| colorRampPalette(c(mid, high))(half_n_levels)[-1] | |
| ) | |
| levels_key$colors <- factor(levels_key$colors, levels = levels_key$colors) | |
| ## Add the levels info back onto the Response Key | |
| sdat <- sdat %>% | |
| dplyr::left_join(levels_key, by = c('Response', 'Level')) | |
| ## Split the data apart into negative and positive response types | |
| positive <- sdat %>% | |
| dplyr::filter(type %in% c('positive', 'neutral')) %>% | |
| mutate( | |
| colors = factor(colors, levels = rev(levels(colors))), | |
| prop = case_when(type == 'neutral' ~ prop/2, TRUE ~ prop) | |
| ) | |
| negative <- sdat %>% | |
| dplyr::filter(type %in% c('neutral', 'negative')) %>% | |
| dplyr::mutate( | |
| prop = case_when(type == 'neutral' ~ prop/2, TRUE ~ prop), | |
| prop = -1 * prop | |
| ) | |
| ## calculate negative & positive responses data | |
| prop_dat_prime <- sdat %>% | |
| #dplyr::filter(type != 'neutral') %>% | |
| group_by(Question, Status, type) %>% | |
| summarize( | |
| n = sum(n), | |
| ) %>% | |
| ungroup() %>% | |
| group_by(Question, Status) %>% | |
| mutate( | |
| prop = n/sum(n), | |
| ) %>% | |
| ungroup() %>% | |
| group_by(type) %>% | |
| mutate( | |
| label = numform::f_prop2percent(prop, 0) | |
| ) %>% | |
| split(.$type) | |
| prop_dat <- lapply(c('positive', 'negative'), function(x){ | |
| y <- left_join( | |
| prop_dat_prime[[x]], | |
| prop_dat_prime[['neutral']] %>% | |
| ungroup() %>% | |
| mutate(prop2 = prop/2) %>% | |
| select(-c(type, label, n, prop)), | |
| by = c('Question', 'Status') | |
| ) %>% | |
| mutate(prop = prop + prop2) %>% | |
| select(-prop2) | |
| }) | |
| prop_dat <- bind_rows(prop_dat) %>% | |
| group_by(Question, Status, type, label) %>% | |
| summarize( | |
| n = sum(n), | |
| ) %>% | |
| group_by(Question, Status) %>% | |
| mutate( | |
| prop = n/sum(n), | |
| proploc = case_when(type == 'negative' ~ -1 * prop, TRUE ~ prop) | |
| ) %>% | |
| ungroup() %>% | |
| group_by(type) %>% | |
| mutate( | |
| textloc = 1.13 * max(prop) * sign(proploc) | |
| ) %>% | |
| split(.$type) | |
| prop_dat$neutral <- prop_dat_prime$neutral %>% | |
| mutate(textloc = 0) | |
| ##===================================== | |
| ## Plot the horizontal stacked bar plot | |
| ##===================================== | |
| ggplot() + | |
| geom_bar( | |
| data = positive, | |
| aes(x = Status, y = prop, fill = colors), | |
| position = "stack", | |
| stat = "identity" | |
| ) + | |
| geom_bar( | |
| data = negative, | |
| aes(x = Status, y = prop, fill = colors), | |
| position = "stack", | |
| stat = "identity" | |
| ) + | |
| coord_flip() + | |
| geom_hline(yintercept = 0, color = 'white', size = 1) + | |
| guides(fill = guide_legend(reverse = TRUE)) + | |
| scale_fill_identity( | |
| labels = levels_key$Response, | |
| breaks = levels_key$colors, | |
| guide = "legend", | |
| name = '' | |
| ) + | |
| facet_wrap(~ Question, ncol = 1) + | |
| geom_text( | |
| data = prop_dat$negative, | |
| aes(label = label, x = Status, y = textloc), | |
| hjust = 0, color = 'grey60' | |
| ) + | |
| geom_text( | |
| data = prop_dat$positive, | |
| aes(label = label, x = Status, y = textloc), | |
| hjust = 1, color = 'grey60' | |
| ) + | |
| geom_text( | |
| data = prop_dat$neutral, | |
| aes(label = label, x = Status, y = textloc), | |
| hjust = .5, color = 'grey60' | |
| ) + | |
| scale_y_continuous(expand = c(0, 0), limits = 1.02 * range(bind_rows(prop_dat)$textloc)) + | |
| theme_bw() + | |
| theme( | |
| panel.grid = element_blank(), | |
| axis.text.x = element_blank(), | |
| axis.ticks = element_blank(), | |
| strip.background = element_blank(), | |
| strip.text = element_text(hjust = 0, face = 'bold', size = 11), | |
| panel.border = element_rect(color = 'gray90', linetype = "dashed", fill = NA), | |
| plot.title = element_text(color = '#734A12'), | |
| plot.subtitle = element_text(color = '#734A12'), | |
| plot.caption = element_text(color = 'grey40') | |
| ) + | |
| labs( | |
| x = NULL, y = NULL, | |
| title = 'Question Answer by First Generation Status', | |
| subtitle = 'Some other long winded explanation that might make one seem smarter', | |
| caption = 'Note: Captions seem more academic' | |
| ) | |
trinker
commented
Feb 16, 2018

Note that this only works for odd number of categories. For an even number see: https://gist.github.com/trinker/06548977f08d9dd92f47bd8a3c958d17
Probably not the best way to handle neutral. Likely shouls be split out like in the likert package so there's a reference point or the negative/positive portions as seen here:
Code based on: http://rnotr.com/likert/ggplot/barometer/likert-plots/
This is awesome! Thanks for the code!
I was trying to replicate this with my own data which doesn't have as much "negative type" responses. As a consequence, the "zero" or neutral part is pushed heavily to the left and the plot itself doesn't look as nice as here. Colors are overlapping percentages etc. While I like the plot, I would still try to replicate your figure (i.e., having percentages on the sides and in the middle which would facilitate readability). I would appreciate if you could comment on this.
