Skip to content

Instantly share code, notes, and snippets.

@vankesteren
Created October 31, 2024 12:58
Show Gist options
  • Save vankesteren/4e9cf9e4975ccd535e83d409a960db26 to your computer and use it in GitHub Desktop.
Save vankesteren/4e9cf9e4975ccd535e83d409a960db26 to your computer and use it in GitHub Desktop.
Simulating data, estimating effects, and computing intervention effects with uncertainty propagation in a simple causal graph
# Packages for this script
library(tidyverse)
library(lavaan)
library(dagitty)
library(glue)
library(rstanarm)
## SIMULATION ##
# here is a true data-generating process in lavaan (SEM) syntax
dgp <- "
A ~ 0.4*D
B ~ 0.2*D + 0.8*A + -0.1*C
C ~ -0.3*D + -0.3*A + 0.2*E
D ~ 1
E ~ 1
F ~ 0.9*B + -0.4*C + -0.3*E
"
df <- simulateData(dgp, sample.nobs = 20000)
dag <- lavaanToGraph(dgp)
plot(dag, show.coefficients = TRUE)
## ESTIMATION ##
# how can we find the adjustment set? Use DAG rules!
adjustmentSets(dag, exposure = "A", outcome = "B", effect = "direct")
coef(lm(B ~ A + C + D, data = df))[2]
# a function to compute any of these paths with OLS
ols_estimate_path <- function(df, dag, source = "A", target = "B") {
adjustment_set <- adjustmentSets(dag, exposure = source, outcome = target, effect = "direct")[[1]]
adj_set_string <- paste(adjustment_set, collapse = ', ')
print(glue("Identifying {source} -> {target} using adjustment set {adj_set_string}"))
adj_set_frm <- paste(adjustment_set, collapse = ' + ')
frm <- glue("{target} ~ {source} + {adj_set_frm}")
unname(coef(lm(frm, df))[2])
}
ols_estimate_path(df, dag, "E", "F")
## PREDICTION ##
# I will re-estimate relevant models in a Bayesian way to easily do posterior prediction / forward simulation
mod_c <- stan_lm(formula = C ~ A + D, data = df, prior = NULL, algorithm = "fullrank")
mod_b <- stan_lm(formula = B ~ A + C + D, data = df, prior = NULL, algorithm = "fullrank")
mod_f <- stan_lm(formula = F ~ C + B + E, data = df, prior = NULL, algorithm = "fullrank")
# now, let's simulate an intervention on A and find its effect on F
# Adding 1.3 to A
df_int <- df
df_int$A <- df_int$A + 1.3
# what would C be?
df_int$C <- posterior_predict(mod_c, df_int, draws = 1)[1,]
tibble(x = c(df$C, df_int$C), type = rep(c("original", "intervention"), each = nrow(df))) |>
ggplot(aes(x = x, fill = type)) +
geom_density() +
labs(title = "Distribution of C")
# what would B be?
df_int$B <- posterior_predict(mod_b, df_int, draws = 1)[1,]
tibble(x = c(df$B, df_int$B), type = rep(c("original", "intervention"), each = nrow(df))) |>
ggplot(aes(x = x, fill = type)) +
geom_density() +
labs(title = "Distribution of B")
# finally, what would F be?
df_int$F <- posterior_predict(mod_f, df_int, draws = 1)[1,]
tibble(x = c(df$F, df_int$F), type = rep(c("original", "intervention"), each = nrow(df))) |>
ggplot(aes(x = x, fill = type)) +
geom_density() +
labs(title = "Distribution of F")
# the average total causal effect
mean(df_int$F - df$F)
@vankesteren
Copy link
Author

Untitled

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment