Skip to content

Instantly share code, notes, and snippets.

@SwampThingPaul
Created September 20, 2024 19:03
Show Gist options
  • Save SwampThingPaul/3793ce0a161ce88bc3c044164cd301a0 to your computer and use it in GitHub Desktop.
Save SwampThingPaul/3793ce0a161ce88bc3c044164cd301a0 to your computer and use it in GitHub Desktop.
stats_playtime Simpson's paradox
library(ggplot2)
library(dplyr)
theme_set(theme_minimal(base_size = 16)+
theme_bw()+
theme(panel.border = element_rect("black",fill=NA,linewidth=1)))
# Create sample data
set.seed(123)
n <- 100
groups <- 5
xcorr = -0.74
xgroup.mean = c(2.5,5,7,8,10)
data <- data.frame(
x = NA, # rep(runif(n, 0, 12), groups),
y = NA,
group = factor(rep(1:groups, each = n))
)
# Generate y values with different correlations for each group
correlations <- c(0.74, 0.82, 0.75, 0.72, 0.69)
for (i in 1:groups) {
data$x[data$group == i] <- (runif(n, xgroup.mean[i]-1, xgroup.mean[i]+1)*xcorr)+10
data$y[data$group == i] <- correlations[i] * data$x[data$group == i] +
rnorm(n, mean = i * 2, sd = 1)
}
# Create the plot
ggplot(data, aes(x = x, y = y, color = group)) +
geom_point(alpha = 0.7) +
geom_smooth(aes(group = group),method = "lm", se = FALSE, color = "red") +
scale_color_discrete(name = "Group") +
labs(
x = "X",
y = "Y"
) +
geom_smooth(method = "lm", se = FALSE, color = "black",linetype=2) +
theme_minimal() +
theme(legend.position = "none")
# Mixed model
## An adapted example from https://fromthebottomoftheheap.net/2021/02/02/random-effects-in-gams/
## LME version
library(lme4)
mod.lme <- lmer(y~ x + (1 | group) + (0 + x | group),data = data)
summary(mod.lme)
fixef(mod.lme); # fixed effects
summary(mod.lme)$varcor
# ranef(mod.lme); # random effects
## MGCV version
library(mgcv)
library(gratia)
mod.mgcv <- gam(y ~ x+
s(group,bs="re")+
s(group,x,bs="re"),
data = data,method="REML")
summary(mod.mgcv)
coef(mod.mgcv)[1:2]
variance_comp(mod.mgcv)
data$gam.ranef.pred <- predict(mod.mgcv,newdata = data)
data$gam.fixef.pred <- predict(mod.mgcv,newdata = data,exclude=c("s(group)", "s(group,x)"))
fixef.pred = data.frame(x = rep(range(data$x),5),
group = factor(rep(1:groups, each = 2)))
fixef.pred$gam.fixef.pred <- predict(mod.mgcv,newdata = fixef.pred,exclude=c("s(group)", "s(group,x)"))
# data$lme.ranef.pred <- predict(mod.lme,newdata = data); # same prediction as GAM
# plot(lme.ranef.pred~gam.ranef.pred,data);abline(0,1,col="red")
ggplot(data, aes(x = x, y = y, color = group)) +
geom_point(alpha = 0.7) +
scale_color_discrete(name = "Group") +
geom_line(data = data, aes(x=x,y=gam.ranef.pred,group=group,linetype = "Random Effect"),color="blue",linewidth=1)+
geom_line(data = data, aes(x=x,y=gam.fixef.pred,linetype = "Fixed Effect"),color="black",linewidth=0.5)+
geom_smooth(se = FALSE, aes(linetype = "Naive linear model (all data)"),
color = "gray50", method = "lm")+
geom_smooth(aes(group = group,linetype = "Group linear model"),method = "lm", se = FALSE, color = "red") +
labs(
x = "X",
y = "Y"
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment