Created
January 27, 2026 18:59
-
-
Save mingjiphd/9945223cad36ac98c91e6e01ea0d15ef to your computer and use it in GitHub Desktop.
Causal Analysis using R G Estimation by using the CausalModels Package
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| This R script is a step by step tutorial on how to implementation the G estimation by using the R package CausalModels. We showed several methods for estimating causal effects including the basic G estimation, the Standardization, the Inverse Probability Weighting Estimation and the Doubly Robust Estimation. | |
| A step by step video demonstration can be found at: https://youtu.be/fiu6SizLlkg?si=IgZpRq9Ds2VOtOX9 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #### Causal Analysis using R G Estimation by the R package CausalModels | |
| # Install and load CausalModels package if not installed | |
| if (!requireNamespace("CausalModels", quietly = TRUE)) { | |
| install.packages("CausalModels") | |
| } | |
| library(CausalModels) | |
| library(causaldata) | |
| data(nhefs) | |
| ###Delete missing data | |
| nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)),] | |
| ##Code quit smoking as factor | |
| nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk) | |
| ##Define confounders | |
| confounders <- c("sex", "race", "age", "education", "smokeintensity", | |
| "smokeyrs", "exercise", "active", "wt71") | |
| # Initialize parameters as before | |
| init_params(outcome = "wt82_71", | |
| treatment = "qsmk", | |
| covariates = confounders, | |
| data = nhefs.nmv, | |
| simple = FALSE) | |
| # Run g-estimation: Fits structual nested mean model (SNMM) for estimaing causal effect of qsmk on weight | |
| # after adjusting for confounders | |
| # G-estimation fits a structural nested mean model (SNMM) by solving special estimating equations | |
| # that explicitly model treatment effects conditionally on covariate and treatment history. | |
| # Suitable for time varying covariates | |
| model <- gestimation(nhefs.nmv, type = "one.linear", n.boot = 150) #one.linear means one parameter linear model | |
| # 1. Print summary of causal effect estimates | |
| summary(model) #The Beta is the conditional causal effect estimate | |
| # 2. Compare with standardization estimation. | |
| # Standardization involves fitting an outcome regression model conditional | |
| # on treatment and covariates, then averaging predicted outcomes over the covariate distribution | |
| # for each treatment level. It is the population averaged or marginal causal effect | |
| # Suitable for fixed covariates | |
| std_model <- standardization(nhefs.nmv) | |
| summary(std_model) | |
| # 3. Perform inverse probability weighting estimation | |
| # IPW creates a weighted pseudo-population by weighting each subject by | |
| # the inverse probability of receiving the treatment they actually received | |
| # (the propensity score). Estimate marginal causal effect | |
| # Suitable for time varying covariates and confounders | |
| ipw_model <- ipweighting(nhefs.nmv) | |
| summary(ipw_model) #Has significance test for the exposure | |
| # 4. Doubly robust estimation combining standardization and IPW. It has two models. | |
| # A model for the outcome given treatment and covariates (outcome regression) | |
| # A model for the treatment assignment (propensity score model) | |
| # The estimates are unbiased and consistent even if there is only one model correctly specified | |
| # Estimate marginal causal effect in the presence of time vayring covariates and confounders | |
| dr_model <- doubly_robust(nhefs.nmv) | |
| summary(dr_model) | |
| # 5. Plot estimated ATE with 95% confidence intervals for different methods | |
| library(ggplot2) | |
| estimates <- data.frame( | |
| Method = c("G-estimation", "Standardization", "IP Weighting"), | |
| ATE = c(summary(model)$ATE$Beta, summary(std_model)$ATE$Beta, summary(ipw_model)$ATE$Beta), | |
| SE = c(summary(model)$ATE$SE, summary(std_model)$ATE$SE, summary(ipw_model)$ATE$SE) | |
| ) | |
| estimates$Lower <- estimates$ATE - 1.96 * estimates$SE | |
| estimates$Upper <- estimates$ATE + 1.96 * estimates$SE | |
| ggplot(estimates, aes(x = Method, y = ATE)) + | |
| geom_point() + | |
| geom_errorbar(aes(ymin = Lower, ymax = Upper), width = 0.2) + | |
| ylab("Estimated Average Treatment Effect (ATE)") + | |
| ggtitle("Comparison of Causal Effect Estimates") | |
| # 6. Sensitivity analysis or subgroup analysis by age group (example) | |
| nhefs.nmv$age_group <- cut(nhefs.nmv$age, breaks = c(18,40,60,80), right = FALSE) | |
| for (group in levels(nhefs.nmv$age_group)) { | |
| cat("\nAge group:", group, "\n") | |
| sub_data <- subset(nhefs.nmv, age_group == group) | |
| init_params(outcome = "wt82_71", treatment = "qsmk", covariates = confounders, | |
| data = sub_data, simple = FALSE) | |
| sub_model <- gestimation(sub_data, type = "one.linear", n.boot = 100) | |
| print(summary(sub_model)) | |
| } | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| A step by step video demonstration can be found at: https://youtu.be/fiu6SizLlkg?si=IgZpRq9Ds2VOtOX9 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment