mingjiphd · January 27, 2026 18:59
diff --git a/Readme.txt b/Readme.txt
 This R script is a step by step tutorial on how to implementation the G estimation by using the R package CausalModels.   We showed several methods for estimating causal effects including the basic G estimation, the Standardization, the Inverse Probability Weighting Estimation and the Doubly Robust Estimation. 
 A step by step video demonstration can be found at:  https://youtu.be/fiu6SizLlkg?si=IgZpRq9Ds2VOtOX9 
diff --git a/R Script b/R Script
 ####  Causal Analysis using R G Estimation by the R package CausalModels

 # Install and load CausalModels package if not installed
 if (!requireNamespace("CausalModels", quietly = TRUE)) {
  install.packages("CausalModels")
 }

 library(CausalModels)
 library(causaldata)
 data(nhefs)

 ###Delete missing data
 nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)),]

 ##Code quit smoking as factor
 nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk)

 ##Define confounders
 confounders <- c("sex", "race", "age", "education", "smokeintensity",
                 "smokeyrs", "exercise", "active", "wt71")

 # Initialize parameters as before
 init_params(outcome = "wt82_71",
            treatment = "qsmk",
            covariates = confounders,
            data = nhefs.nmv,
            simple = FALSE)

 # Run g-estimation: Fits structual nested mean model (SNMM) for estimaing causal effect of qsmk on weight
 # after adjusting for confounders
 # G-estimation fits a structural nested mean model (SNMM) by solving special estimating equations 
 # that explicitly model treatment effects conditionally on covariate and treatment history.
 # Suitable for time varying covariates
 model <- gestimation(nhefs.nmv, type = "one.linear", n.boot = 150) #one.linear means one parameter linear model

 # 1. Print summary of causal effect estimates
 summary(model)  #The Beta is the conditional causal effect estimate

 # 2. Compare with standardization estimation.
 # Standardization involves fitting an outcome regression model conditional 
 # on treatment and covariates, then averaging predicted outcomes over the covariate distribution  
 # for each treatment level. It is the population averaged or marginal causal effect 
 # Suitable for fixed covariates
 std_model <- standardization(nhefs.nmv)
 summary(std_model)

 # 3. Perform inverse probability weighting estimation
 # IPW creates a weighted pseudo-population by weighting each subject by 
 # the inverse probability of receiving the treatment they actually received
 # (the propensity score).  Estimate marginal causal effect
 # Suitable for time varying covariates and confounders
 ipw_model <- ipweighting(nhefs.nmv)
 summary(ipw_model)  #Has significance test for the exposure

 # 4. Doubly robust estimation combining standardization and IPW. It has two models.
 # A model for the outcome given treatment and covariates (outcome regression)
 # A model for the treatment assignment (propensity score model)
 # The estimates are unbiased and consistent even if there is only one model correctly specified
 # Estimate marginal causal effect in the presence of time vayring covariates and confounders
 dr_model <- doubly_robust(nhefs.nmv)
 summary(dr_model)
  


 # 5. Plot estimated ATE with 95% confidence intervals for different methods
 library(ggplot2)
 estimates <- data.frame(
  Method = c("G-estimation", "Standardization", "IP Weighting"),
  ATE = c(summary(model)$ATE$Beta, summary(std_model)$ATE$Beta, summary(ipw_model)$ATE$Beta),
  SE = c(summary(model)$ATE$SE, summary(std_model)$ATE$SE, summary(ipw_model)$ATE$SE)
 )
 estimates$Lower <- estimates$ATE - 1.96 * estimates$SE
 estimates$Upper <- estimates$ATE + 1.96 * estimates$SE

 ggplot(estimates, aes(x = Method, y = ATE)) +
  geom_point() +
  geom_errorbar(aes(ymin = Lower, ymax = Upper), width = 0.2) +
  ylab("Estimated Average Treatment Effect (ATE)") +
  ggtitle("Comparison of Causal Effect Estimates")

 # 6. Sensitivity analysis or subgroup analysis by age group (example)
 nhefs.nmv$age_group <- cut(nhefs.nmv$age, breaks = c(18,40,60,80), right = FALSE)
 for (group in levels(nhefs.nmv$age_group)) {
  cat("\nAge group:", group, "\n")
  sub_data <- subset(nhefs.nmv, age_group == group)
  init_params(outcome = "wt82_71", treatment = "qsmk", covariates = confounders,
              data = sub_data, simple = FALSE)
  sub_model <- gestimation(sub_data, type = "one.linear", n.boot = 100)
  print(summary(sub_model))
 }

diff --git a/VideoDemo b/VideoDemo
 A step by step video demonstration can be found at:  https://youtu.be/fiu6SizLlkg?si=IgZpRq9Ds2VOtOX9
	This R script is a step by step tutorial on how to implementation the G estimation by using the R package CausalModels. We showed several methods for estimating causal effects including the basic G estimation, the Standardization, the Inverse Probability Weighting Estimation and the Doubly Robust Estimation.
	A step by step video demonstration can be found at: https://youtu.be/fiu6SizLlkg?si=IgZpRq9Ds2VOtOX9
	#### Causal Analysis using R G Estimation by the R package CausalModels

	# Install and load CausalModels package if not installed
	if (!requireNamespace("CausalModels", quietly = TRUE)) {
	install.packages("CausalModels")
	}

	library(CausalModels)
	library(causaldata)
	data(nhefs)

	###Delete missing data
	nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)),]

	##Code quit smoking as factor
	nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk)

	##Define confounders
	confounders <- c("sex", "race", "age", "education", "smokeintensity",
	"smokeyrs", "exercise", "active", "wt71")

	# Initialize parameters as before
	init_params(outcome = "wt82_71",
	treatment = "qsmk",
	covariates = confounders,
	data = nhefs.nmv,
	simple = FALSE)

	# Run g-estimation: Fits structual nested mean model (SNMM) for estimaing causal effect of qsmk on weight
	# after adjusting for confounders
	# G-estimation fits a structural nested mean model (SNMM) by solving special estimating equations
	# that explicitly model treatment effects conditionally on covariate and treatment history.
	# Suitable for time varying covariates
	model <- gestimation(nhefs.nmv, type = "one.linear", n.boot = 150) #one.linear means one parameter linear model

	# 1. Print summary of causal effect estimates
	summary(model) #The Beta is the conditional causal effect estimate

	# 2. Compare with standardization estimation.
	# Standardization involves fitting an outcome regression model conditional
	# on treatment and covariates, then averaging predicted outcomes over the covariate distribution
	# for each treatment level. It is the population averaged or marginal causal effect
	# Suitable for fixed covariates
	std_model <- standardization(nhefs.nmv)
	summary(std_model)

	# 3. Perform inverse probability weighting estimation
	# IPW creates a weighted pseudo-population by weighting each subject by
	# the inverse probability of receiving the treatment they actually received
	# (the propensity score). Estimate marginal causal effect
	# Suitable for time varying covariates and confounders
	ipw_model <- ipweighting(nhefs.nmv)
	summary(ipw_model) #Has significance test for the exposure

	# 4. Doubly robust estimation combining standardization and IPW. It has two models.
	# A model for the outcome given treatment and covariates (outcome regression)
	# A model for the treatment assignment (propensity score model)
	# The estimates are unbiased and consistent even if there is only one model correctly specified
	# Estimate marginal causal effect in the presence of time vayring covariates and confounders
	dr_model <- doubly_robust(nhefs.nmv)
	summary(dr_model)



	# 5. Plot estimated ATE with 95% confidence intervals for different methods
	library(ggplot2)
	estimates <- data.frame(
	Method = c("G-estimation", "Standardization", "IP Weighting"),
	ATE = c(summary(model)$ATE$Beta, summary(std_model)$ATE$Beta, summary(ipw_model)$ATE$Beta),
	SE = c(summary(model)$ATE$SE, summary(std_model)$ATE$SE, summary(ipw_model)$ATE$SE)
	)
	estimates$Lower <- estimates$ATE - 1.96 * estimates$SE
	estimates$Upper <- estimates$ATE + 1.96 * estimates$SE

	ggplot(estimates, aes(x = Method, y = ATE)) +
	geom_point() +
	geom_errorbar(aes(ymin = Lower, ymax = Upper), width = 0.2) +
	ylab("Estimated Average Treatment Effect (ATE)") +
	ggtitle("Comparison of Causal Effect Estimates")

	# 6. Sensitivity analysis or subgroup analysis by age group (example)
	nhefs.nmv$age_group <- cut(nhefs.nmv$age, breaks = c(18,40,60,80), right = FALSE)
	for (group in levels(nhefs.nmv$age_group)) {
	cat("\nAge group:", group, "\n")
	sub_data <- subset(nhefs.nmv, age_group == group)
	init_params(outcome = "wt82_71", treatment = "qsmk", covariates = confounders,
	data = sub_data, simple = FALSE)
	sub_model <- gestimation(sub_data, type = "one.linear", n.boot = 100)
	print(summary(sub_model))
	}