library(tidyverse)
# Geom creation -----------------------------------------------------------
StatParallelSlopes <- ggproto(
"StatParallelSlopes", Stat,
required_aes = c("x", "y"),
compute_panel = function(data, scales, se = TRUE, formula = y ~ x, n = 100) {
if (nrow(data) == 0) {
return(data[integer(0), ])
}
# Compute model data
model_info <- compute_model_info(data, formula)
formula <- model_info$formula
data <- model_info$data
# Fit model
model <- lm(formula = formula, data = data)
# Compute prediction from model based on sequence of x-values defined for
# every group separately
stats <- lapply(
X = split(data, data$group), FUN = predict_per_group,
model = model, se = se, n = n
)
# Combine predictions into one data frame
dplyr::bind_rows(stats)
}
)
geom_parallel_slopes <- function(mapping = NULL, data = NULL,
position = "identity", ...,
se = TRUE, formula = y ~ x, n = 100,
na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE) {
layer(
geom = GeomSmooth, stat = StatParallelSlopes, data = data,
mapping = mapping, position = position,
params = list(na.rm = na.rm, se = se, formula = formula, n = n, ...),
inherit.aes = inherit.aes, show.legend = show.legend
)
}
compute_model_info <- function(data, formula) {
if (has_unique_value(data$group)) {
# Apparently, in {ggplot2} 'group' column is always present at the stage of
# computing panel data. It is filled with `-1` if it wasn't
# supplied as aesthetic and with `1` if it has only one unique value.
warning(
"`geom_parallel_slopes()` didn't recieve a grouping variable with more ",
"than one unique value. Make sure you supply one. Basic model is fitted.",
call. = FALSE
)
} else {
data$group <- as.factor(data$group)
# Actually make model to have parallel slopes
formula <- as.formula(paste0(deparse(formula), " + group"))
}
list(formula = formula, data = data)
}
predict_per_group <- function(group_df, model, se, n) {
# This code is a modified version of `ggplot2:::predictdf.default`
# Create a data on which to perform prediction
x_seq <- seq(min(group_df$x), max(group_df$x), length.out = n)
group_seq <- rep(group_df$group[1], n)
new_data <- data.frame(x = x_seq, group = group_seq)
# Perform prediction
pred <- stats::predict(
model, newdata = new_data, se.fit = se,
interval = if (se) "confidence" else "none"
)
# Convert prediction to "ggplot2 format"
if (isTRUE(se)) {
fit <- as.data.frame(pred$fit)
names(fit) <- c("y", "ymin", "ymax")
res <- data.frame(x = x_seq, fit, se = pred$se.fit)
} else {
res <- data.frame(x = x_seq, y = as.vector(pred))
}
# Restore columns that describe group with unique value (like color, etc.)
# so that they can be used in output plot
restore_unique_cols(res, group_df)
}
# Combination of source code from `ggplot2::StatSmooth$compute_panel` and
# `ggplot2:::uniquecols`
restore_unique_cols <- function(new, old) {
is_unique <- sapply(old, has_unique_value)
unique_df <- old[1, is_unique, drop = FALSE]
rownames(unique_df) <- seq_len(nrow(unique_df))
missing <- !(names(unique_df) %in% names(new))
cbind(new, unique_df[rep(1, nrow(new)), missing, drop = FALSE])
}
has_unique_value <- function(x) {
length(unique(x)) <= 1
}
# Demonstration -----------------------------------------------------------
set.seed(202)
dummy_df <- bind_rows(
tibble(a = runif(10, 0, 0.1), gr = 1),
tibble(a = runif(15, 0.1, 0.3), gr = 2),
tibble(a = runif(20, 0.4, 1), gr = 3)
) %>%
mutate(
pan = factor(sample(1:3, n(), replace = TRUE)),
gr = factor(gr),
# Slope depends on panel, intercept on group
b = as.integer(pan)*a + as.integer(gr) + runif(n(), max = 0.1),
# Quadratic curves
c = as.integer(pan)*a^2 + as.integer(gr) + runif(n(), max = 0.1),
)
# Basic usage
viz <- ggplot(dummy_df, aes(a, b, colour = gr)) + geom_point()
viz + geom_smooth(method = "lm") + labs(title = "geom_smooth()")
# Roughly same width of error boundaries is a nice demonstration of the fact
# that model was fitted to the whole data and not per group (in which error's
# relative amplitude actually differ).
viz + geom_parallel_slopes() + labs(title = "geom_parallel_slopes()")
# Demonstration that models are different
viz +
geom_smooth(aes(group = gr), method = "lm", color = "blue", se = FALSE) +
geom_parallel_slopes(aes(group = gr), color = "green", se = FALSE) +
labs(
title =
"geom_smooth() (blue) and geom_parallel_slopes() (green) are different"
)
# Extra aesthetics
viz +
geom_parallel_slopes(mapping = aes(group = gr), color = "red", size = 3) +
labs(title = "geom_parallel_slopes() with modified aesthetics")
# Different formula
viz_quad <- ggplot(dummy_df, aes(a, c, colour = gr)) + geom_point()
viz_quad +
geom_smooth(method = "lm", formula = y ~ poly(x, 2)) +
labs(title = "Quadratic geom_smooth()")
viz_quad +
geom_parallel_slopes(formula = y ~ poly(x, 2)) +
labs(title = "Quadratic geom_parallel_slopes()")
# Demonstration that linear and quadratic models are actually different
ggplot(dummy_df, aes(a, c, colour = gr, group = gr)) +
geom_point(show.legend = FALSE) +
geom_parallel_slopes(color = "blue", se = FALSE) +
geom_parallel_slopes(color = "green", se = FALSE, formula = y ~ poly(x, 2)) +
labs(
title =
"Linear (blue) and quadratic (green) parallel slopes models are different"
)
# Model is fitted per facet panel
viz_faceted <- ggplot(dummy_df, aes(a, b, colour = gr)) +
geom_point() +
facet_wrap(~ pan)
viz_faceted + geom_smooth(method = "lm") + labs(title = "Faceted geom_smooth()")
#> Warning in qt((1 - level)/2, df): созданы NaN
viz_faceted +
geom_parallel_slopes() +
labs(title = "Faceted geom_parallel_slopes()")
# Edge cases
# Works with not factor grouping variable (in which case it should be
# explicitly set in `group` aesthetics, as in `geom_smooth()`)
dummy_df %>%
mutate(gr = as.integer(gr)) %>%
ggplot(aes(a, b, colour = gr, group = gr)) +
geom_point() +
geom_smooth(method = "lm") +
geom_parallel_slopes() +
labs(title = "Works with non-factor grouping")
# Works without grouping variable or with one level in it (just as
# `geom_smooth()`), but with a relevant warning
ggplot(dummy_df, aes(a, b)) +
geom_point() +
geom_smooth(method = "lm") +
# It is plotted exactly over `geom_smooth()`
geom_parallel_slopes() +
labs(title = "Works with no grouping variable")
#> Warning: `geom_parallel_slopes()` didn't recieve a grouping variable
#> with more than one unique value. Make sure you supply one. Basic model is
#> fitted.
ggplot(dummy_df %>% mutate(gr = 1), aes(a, b, group = gr)) +
geom_point() +
geom_smooth(method = "lm") +
# It is plotted exactly over `geom_smooth()`
geom_parallel_slopes() +
labs(title = "Works with grouping variable with one value")
#> Warning: `geom_parallel_slopes()` didn't recieve a grouping variable
#> with more than one unique value. Make sure you supply one. Basic model is
#> fitted.
Created on 2019-10-29 by the reprex package (v0.3.0)