mingjiphd · January 27, 2026 18:39 · mingjiphd · Jan 26, 2026
diff --git a/Readme.txt b/Readme.txt
 This R script is a step-by-step tutorial on performing a weighted propensity score analysis in R. The R packages used are WeightIt and cobalt. We cover different weight estimation methods for ATT and ATE in WeightIt, and demonstrate how to check balance between treatment and control groups both before and after propensity score weighting.
 A step by step video demonstration can be found at: https://youtu.be/ekLMBLfJ03s 
diff --git a/R Script b/R Script
 ### How to perform a Weighted Propensity Score Analysis
 
 

 library(WeightIt)
 library(cobalt)

 # Use the lalonde dataset from cobalt package
 data("lalonde", package = "cobalt")

 head(lalonde)

 # Basic WeightIt call - logistic regression, ATT estimand
 #ATT estimand --- The Average Treatment Effect on the Treated (ATT) 
 ##                is the average causal effect of a treatment on the individuals 
 ##                 who actually received the treatment. 
 w1 <- weightit(treat ~ age + educ + nodegree + married + race + re74 + re75,
               data = lalonde,
               method = "glm",
               estimand = "ATT",
               stabilize = FALSE,
               s.weights = NULL,
               verbose = TRUE)

 # Boosted regression (GBM) method to estimate weights for ATE
 #ATE -- The Average Treatment Effect (ATE) is the average causal effect of a treatment 
 #       across the entire population, including both treated and untreated individuals.
 w2 <- weightit(treat ~ age + educ + nodegree + married + race + re74 + re75,
               data = lalonde,
               method = "gbm",
               estimand = "ATE",
               stabilize = TRUE,  # Use stabilized weights
               verbose = TRUE,
               stop.method = "es.mean")  # Stop boosting when mean absolute standardized difference 
                                         # is minimal

 # Covariate balancing propensity score method, ATT estimand, includes the over-identified option
 # CBPS --- an advanced method for estimating propensity scores that simultaneously optimizes two goals:
 #          1. Maximizing covariate balance between treated and control groups after weighting.
 #          2. Accurately modeling the treatment assignment mechanism (predicting treatment).
 w3 <- weightit(treat ~ age + educ + nodegree + married + race + re74 + re75,
               data = lalonde,
               method = "cbps",
               estimand = "ATT",
               over = TRUE,  # Use over-identified version of CBPS
               verbose = TRUE)

 # Use entropy balancing method for ATT
 w4 <- weightit(treat ~ age + educ + nodegree + married + race + re74 + re75,
               data = lalonde,
               method = "ebal",
               estimand = "ATT",
               verbose = TRUE)

 # Check balance diagnostics for all weights
 bal.tab(w1, un = TRUE, weights = w1$weights)  ##|Diff.weightit|<<Diff.un Common Threshold <0.1 or 0.2
 bal.tab(w2, un = TRUE, weights = w2$weights)  # If not, weighted PS not working well
 bal.tab(w3, un = TRUE, weights = w3$weights)
 bal.tab(w4, un = TRUE, weights = w4$weights)

 # Visualize the distribution of propensity scores before and after weighting
 bal.plot(w1, var = "prop.score", which = "both")  # 'both' shows before and after weighting plots
 bal.plot(w2, var = "prop.score", which = "both")
 bal.plot(w3, var = "prop.score", which = "both")
 bal.plot(w4, var = "prop.score", which = "both")
 
 # Add weights to dataset for downstream weighted analysis
 lalonde$weights_glm <- w1$weights
 lalonde$weights_gbm <- w2$weights
 lalonde$weights_cbps <- w3$weights
 lalonde$weights_ebal <- w4$weights

 # Example weighted regression adjusting for weight estimation uncertainty
 if (!requireNamespace("sandwich", quietly = TRUE)) {
   
 }
 library(sandwich)

 # Linear regression for outcome 're78' weighted by glm weights
 lm_glm <- lm(re78 ~ treat, data = lalonde, weights = weights_glm)
 summary(lm_glm)

 # Using WeightIt's advanced outcome modeling to account for weight estimation uncertainty
 fit1 <- lm_weightit(re78 ~ treat, data = lalonde, weightit = w1)
 summary(fit1,ci=TRUE)

 # Additional options:
 # - 'subclass' can be specified for subclassification weighting;
 # - 'missing' for handling missing data;
 # - 'focal' for specifying focal groups in multigroup treatments;
 # - 's.weights' provide sampling weights if applicable;
 # - 'verbose' prints detailed fitting information.
diff --git a/VideoDemo b/VideoDemo
 A step by step video demonstration can be found at: https://youtu.be/ekLMBLfJ03s
	This R script is a step-by-step tutorial on performing a weighted propensity score analysis in R. The R packages used are WeightIt and cobalt. We cover different weight estimation methods for ATT and ATE in WeightIt, and demonstrate how to check balance between treatment and control groups both before and after propensity score weighting.
	A step by step video demonstration can be found at: https://youtu.be/ekLMBLfJ03s
	### How to perform a Weighted Propensity Score Analysis



	library(WeightIt)
	library(cobalt)

	# Use the lalonde dataset from cobalt package
	data("lalonde", package = "cobalt")

	head(lalonde)

	# Basic WeightIt call - logistic regression, ATT estimand
	#ATT estimand --- The Average Treatment Effect on the Treated (ATT)
	## is the average causal effect of a treatment on the individuals
	## who actually received the treatment.
	w1 <- weightit(treat ~ age + educ + nodegree + married + race + re74 + re75,
	data = lalonde,
	method = "glm",
	estimand = "ATT",
	stabilize = FALSE,
	s.weights = NULL,
	verbose = TRUE)

	# Boosted regression (GBM) method to estimate weights for ATE
	#ATE -- The Average Treatment Effect (ATE) is the average causal effect of a treatment
	# across the entire population, including both treated and untreated individuals.
	w2 <- weightit(treat ~ age + educ + nodegree + married + race + re74 + re75,
	data = lalonde,
	method = "gbm",
	estimand = "ATE",
	stabilize = TRUE, # Use stabilized weights
	verbose = TRUE,
	stop.method = "es.mean") # Stop boosting when mean absolute standardized difference
	# is minimal

	# Covariate balancing propensity score method, ATT estimand, includes the over-identified option
	# CBPS --- an advanced method for estimating propensity scores that simultaneously optimizes two goals:
	# 1. Maximizing covariate balance between treated and control groups after weighting.
	# 2. Accurately modeling the treatment assignment mechanism (predicting treatment).
	w3 <- weightit(treat ~ age + educ + nodegree + married + race + re74 + re75,
	data = lalonde,
	method = "cbps",
	estimand = "ATT",
	over = TRUE, # Use over-identified version of CBPS
	verbose = TRUE)

	# Use entropy balancing method for ATT
	w4 <- weightit(treat ~ age + educ + nodegree + married + race + re74 + re75,
	data = lalonde,
	method = "ebal",
	estimand = "ATT",
	verbose = TRUE)

	# Check balance diagnostics for all weights
	bal.tab(w1, un = TRUE, weights = w1$weights) ##\|Diff.weightit\|<<Diff.un Common Threshold <0.1 or 0.2
	bal.tab(w2, un = TRUE, weights = w2$weights) # If not, weighted PS not working well
	bal.tab(w3, un = TRUE, weights = w3$weights)
	bal.tab(w4, un = TRUE, weights = w4$weights)

	# Visualize the distribution of propensity scores before and after weighting
	bal.plot(w1, var = "prop.score", which = "both") # 'both' shows before and after weighting plots
	bal.plot(w2, var = "prop.score", which = "both")
	bal.plot(w3, var = "prop.score", which = "both")
	bal.plot(w4, var = "prop.score", which = "both")

	# Add weights to dataset for downstream weighted analysis
	lalonde$weights_glm <- w1$weights
	lalonde$weights_gbm <- w2$weights
	lalonde$weights_cbps <- w3$weights
	lalonde$weights_ebal <- w4$weights

	# Example weighted regression adjusting for weight estimation uncertainty
	if (!requireNamespace("sandwich", quietly = TRUE)) {

	}
	library(sandwich)

	# Linear regression for outcome 're78' weighted by glm weights
	lm_glm <- lm(re78 ~ treat, data = lalonde, weights = weights_glm)
	summary(lm_glm)

	# Using WeightIt's advanced outcome modeling to account for weight estimation uncertainty
	fit1 <- lm_weightit(re78 ~ treat, data = lalonde, weightit = w1)
	summary(fit1,ci=TRUE)

	# Additional options:
	# - 'subclass' can be specified for subclassification weighting;
	# - 'missing' for handling missing data;
	# - 'focal' for specifying focal groups in multigroup treatments;
	# - 's.weights' provide sampling weights if applicable;
	# - 'verbose' prints detailed fitting information.