Last active
January 27, 2026 18:39
-
-
Save mingjiphd/36cc33b8eb98292ced7a171c5d829080 to your computer and use it in GitHub Desktop.
Causal Analysis using R Weighted Propensity Score Analysis
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| This R script is a step-by-step tutorial on performing a weighted propensity score analysis in R. The R packages used are WeightIt and cobalt. We cover different weight estimation methods for ATT and ATE in WeightIt, and demonstrate how to check balance between treatment and control groups both before and after propensity score weighting. | |
| A step by step video demonstration can be found at: https://youtu.be/ekLMBLfJ03s |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ### How to perform a Weighted Propensity Score Analysis | |
| library(WeightIt) | |
| library(cobalt) | |
| # Use the lalonde dataset from cobalt package | |
| data("lalonde", package = "cobalt") | |
| head(lalonde) | |
| # Basic WeightIt call - logistic regression, ATT estimand | |
| #ATT estimand --- The Average Treatment Effect on the Treated (ATT) | |
| ## is the average causal effect of a treatment on the individuals | |
| ## who actually received the treatment. | |
| w1 <- weightit(treat ~ age + educ + nodegree + married + race + re74 + re75, | |
| data = lalonde, | |
| method = "glm", | |
| estimand = "ATT", | |
| stabilize = FALSE, | |
| s.weights = NULL, | |
| verbose = TRUE) | |
| # Boosted regression (GBM) method to estimate weights for ATE | |
| #ATE -- The Average Treatment Effect (ATE) is the average causal effect of a treatment | |
| # across the entire population, including both treated and untreated individuals. | |
| w2 <- weightit(treat ~ age + educ + nodegree + married + race + re74 + re75, | |
| data = lalonde, | |
| method = "gbm", | |
| estimand = "ATE", | |
| stabilize = TRUE, # Use stabilized weights | |
| verbose = TRUE, | |
| stop.method = "es.mean") # Stop boosting when mean absolute standardized difference | |
| # is minimal | |
| # Covariate balancing propensity score method, ATT estimand, includes the over-identified option | |
| # CBPS --- an advanced method for estimating propensity scores that simultaneously optimizes two goals: | |
| # 1. Maximizing covariate balance between treated and control groups after weighting. | |
| # 2. Accurately modeling the treatment assignment mechanism (predicting treatment). | |
| w3 <- weightit(treat ~ age + educ + nodegree + married + race + re74 + re75, | |
| data = lalonde, | |
| method = "cbps", | |
| estimand = "ATT", | |
| over = TRUE, # Use over-identified version of CBPS | |
| verbose = TRUE) | |
| # Use entropy balancing method for ATT | |
| w4 <- weightit(treat ~ age + educ + nodegree + married + race + re74 + re75, | |
| data = lalonde, | |
| method = "ebal", | |
| estimand = "ATT", | |
| verbose = TRUE) | |
| # Check balance diagnostics for all weights | |
| bal.tab(w1, un = TRUE, weights = w1$weights) ##|Diff.weightit|<<Diff.un Common Threshold <0.1 or 0.2 | |
| bal.tab(w2, un = TRUE, weights = w2$weights) # If not, weighted PS not working well | |
| bal.tab(w3, un = TRUE, weights = w3$weights) | |
| bal.tab(w4, un = TRUE, weights = w4$weights) | |
| # Visualize the distribution of propensity scores before and after weighting | |
| bal.plot(w1, var = "prop.score", which = "both") # 'both' shows before and after weighting plots | |
| bal.plot(w2, var = "prop.score", which = "both") | |
| bal.plot(w3, var = "prop.score", which = "both") | |
| bal.plot(w4, var = "prop.score", which = "both") | |
| # Add weights to dataset for downstream weighted analysis | |
| lalonde$weights_glm <- w1$weights | |
| lalonde$weights_gbm <- w2$weights | |
| lalonde$weights_cbps <- w3$weights | |
| lalonde$weights_ebal <- w4$weights | |
| # Example weighted regression adjusting for weight estimation uncertainty | |
| if (!requireNamespace("sandwich", quietly = TRUE)) { | |
| } | |
| library(sandwich) | |
| # Linear regression for outcome 're78' weighted by glm weights | |
| lm_glm <- lm(re78 ~ treat, data = lalonde, weights = weights_glm) | |
| summary(lm_glm) | |
| # Using WeightIt's advanced outcome modeling to account for weight estimation uncertainty | |
| fit1 <- lm_weightit(re78 ~ treat, data = lalonde, weightit = w1) | |
| summary(fit1,ci=TRUE) | |
| # Additional options: | |
| # - 'subclass' can be specified for subclassification weighting; | |
| # - 'missing' for handling missing data; | |
| # - 'focal' for specifying focal groups in multigroup treatments; | |
| # - 's.weights' provide sampling weights if applicable; | |
| # - 'verbose' prints detailed fitting information. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| A step by step video demonstration can be found at: https://youtu.be/ekLMBLfJ03s |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
A step by step video demonstration can be found at: https://youtu.be/ekLMBLfJ03s