library(tidyverse)
library(glue)
library(ggbeeswarm)
flights <- nycflights13::flights %>%
filter(arr_delay > 0) %>%
mutate(arr_delay_log = log(arr_delay),
quarter = as.factor(1 + month %/% 4))
group_summarise_mns <- function(df, y, ...) {
y_expr <- enquo(y)
df %>%
group_by_at(vars(...)) %>%
summarise(
mean = mean(!!y_expr, na.rm = TRUE),
n = n(),
sd = sd(!!y_expr, na.rm = TRUE)
) %>%
ungroup()
}
flights %>%
group_summarise_mns(arr_delay, carrier, month, day)
#> # A tibble: 4,672 x 6
#> carrier month day mean n sd
#> <chr> <int> <int> <dbl> <int> <dbl>
#> 1 9E 1 1 39.2 13 66.3
#> 2 9E 1 2 29.9 33 34.6
#> 3 9E 1 3 35.6 30 54.8
#> 4 9E 1 4 28.1 25 33.9
#> 5 9E 1 5 33.2 20 57.1
#> 6 9E 1 6 23.2 22 32.9
#> 7 9E 1 7 25.4 5 25.4
#> 8 9E 1 8 26.2 12 48.2
#> 9 9E 1 9 18.6 13 18.2
#> 10 9E 1 10 19.4 7 18.8
#> # ... with 4,662 more rows
box_box_box_plot <- function(df, y, x, coarse, gran){
y_expr <- enquo(y)
x_expr <- enquo(x)
coarse_expr <- enquo(coarse)
gran_expr <- enquo(gran)
# create y-label with padding
y_level <- c("Box and whiskers: ",
"Coarse means: ",
"Coarse means:")
segmenting_exprs <- c(quo_name(y_expr),
quo_name(gran_expr),
quo_name(coarse_expr))
y_axis <- paste0(y_level, segmenting_exprs) %>%
str_pad(width = max(str_length(.)), side = "right") %>%
str_c(collapse = "\n")
df <- df %>%
mutate(!!x_expr := fct_reorder(!!x_expr,!!y_expr))
df %>%
group_summarise_mns(!!y_expr, !!x_expr, !!gran_expr, !!coarse_expr) %>%
ggplot(aes(x = !!x_expr, y = mean)) +
geom_boxplot(aes(x = !!x_expr, y = !!y_expr),
outlier.shape = NA,
data = df) +
geom_quasirandom(aes(colour = !!coarse_expr, size = n),
alpha = 0.1,
shape = 15) +
geom_point(
aes(colour = !!coarse_expr),
data = group_summarise_mns(df, !!y_expr, !!x_expr, !!coarse_expr),
size = 3,
shape = 15
) +
scale_x_discrete(labels = abbreviate) +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
text = element_text(family = "mono")) +
labs(# subtitle = paste0("Coarse level: ", quo_name(coarse_expr), "\nGranular level: ", quo_name(gran_expr)),
size = glue("n in {quo_name(gran_expr)}"),
y = y_axis)
}
flights %>%
box_box_box_plot(arr_delay_log, carrier, quarter, day)+
ggtitle("Quarter 2 typically has the worst delays")
Created on 2019-02-24 by the reprex package (v0.2.1)
Stack overflow question on formatting y-axis titles: https://stackoverflow.com/questions/54835979/align-multi-line-axis-title-in-ggplot2