Last active
January 27, 2026 18:42
-
-
Save mingjiphd/21d08af7c541a1779defd223fddedf40 to your computer and use it in GitHub Desktop.
Causal Analysis using R with More Sophisticated DAGs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| This R script is a step by step tutorial on how to do causal analysis using R with more sophisticated DAGs. Topics covered include: How to generate simulated data for DAG; How to specify a DAG model; How to plot a DAG model; Minimal Adjustment Sets; D-Separation; Conditional Independencies; and Instrumental Variables. | |
| A step by step video demonstration can be found at: https://youtu.be/oN9qJmVKKmE |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ### Causal Analysis using R: Sophisticated DAGs | |
| ###Generate Data | |
| # Install if needed | |
| #if (!requireNamespace("simDAG", quietly = TRUE)) { | |
| # install.packages("simDAG") | |
| #} | |
| library(simDAG) | |
| # Define DAG with root nodes and child nodes with parents and regression formulas | |
| dag <- empty_dag() + | |
| # Root nodes with distributions | |
| node("age", type = "rnorm", mean = 50, sd = 10) + | |
| node("sex", type = "rbernoulli", p = 0.5) + | |
| # Child continuous node "bmi" depends on age and sex | |
| node( | |
| "bmi", | |
| type = "gaussian", | |
| parents = c("age", "sex"), | |
| formula = ~ -2 + age * 0.3 + sexTRUE * (-0.2), | |
| error = 5 | |
| ) + | |
| # Child binary node "smoker" depends on age and sex | |
| node( | |
| "smoker", | |
| type = "binomial", | |
| parents = c("age", "sex"), | |
| formula = ~ -4 + age * 0.05 + sexTRUE * 0.8 | |
| ) + | |
| # Child continuous node "blood_pressure" depends on bmi and smoker | |
| node( | |
| "blood_pressure", | |
| type = "gaussian", | |
| parents = c("bmi", "smoker"), | |
| formula = ~ 120 + bmi * 1.2 + smoker * 3.0, | |
| error = 8 | |
| ) + | |
| # Outcome binary node "heart_disease" depends on blood_pressure and age | |
| node( | |
| "heart_disease", | |
| type = "binomial", | |
| parents = c("blood_pressure", "age"), | |
| formula = ~ -6 + blood_pressure * 0.08 + age * 0.01 | |
| ) | |
| # Simulate 1000 samples from the DAG | |
| set.seed(123) | |
| sim_data <- sim_from_dag(dag, n_sim = 1000) | |
| # View first few rows | |
| head(sim_data) | |
| ##Visualization | |
| # Install ggdag if needed | |
| #if (!requireNamespace("ggdag", quietly = TRUE)) { | |
| # install.packages("ggdag") | |
| #} | |
| library(ggdag) | |
| library(ggplot2) | |
| # Define the DAG with relationships as in the simulation | |
| dag_object <- dagify( | |
| bmi ~ age + sex, | |
| smoker ~ age + sex, | |
| blood_pressure ~ bmi + smoker, | |
| heart_disease ~ blood_pressure + age, | |
| exposure = "smoker", | |
| outcome = "heart_disease" | |
| ) | |
| # Plot the DAG with ggdag | |
| library(ggplot2) | |
| ggdag(dag_object) + | |
| theme_dag() + | |
| ggtitle("DAG Visualization for Simulated Data") | |
| ### Minimal Adjustment Sets | |
| library(dagitty) | |
| library(ggdag) | |
| # Define DAG in dagitty | |
| dag_code <- ' | |
| dag { | |
| age -> bmi | |
| sex -> bmi | |
| age -> smoker | |
| sex -> smoker | |
| bmi -> blood_pressure | |
| smoker -> blood_pressure | |
| blood_pressure -> heart_disease | |
| age -> heart_disease | |
| } | |
| ' | |
| dag_obj <- dagitty(dag_code) | |
| exposures(dag_obj) <- "smoker" | |
| outcomes(dag_obj) <- "heart_disease" | |
| # Get minimal adjustment sets | |
| adj_sets <- adjustmentSets(dag_obj) | |
| print(adj_sets) # e.g., list of character vectors | |
| # Take first minimal adjustment set as example | |
| first_set <- adj_sets[[1]] | |
| # Visualize adjustment set with ggdag, note use of 'adjusted' parameter | |
| ggdag_adjustment_set(dag_obj, exposure = "smoker", outcome = "heart_disease", shadow = TRUE) + | |
| theme_dag() + | |
| ggplot2::ggtitle("Minimal Adjustment Sets Visualization") | |
| ###Further Analysis suing the Minimal Adjustment Set | |
| # Assume you have sim_data from the previous example and minimal adjustment set identified, e.g.: | |
| minimal_adj_set <- c("age", "bmi") # Example minimal adjustment set from previous step | |
| # Fit logistic regression for binary outcome 'heart_disease' on exposure 'smoker' | |
| # adjusting for the minimal adjustment set | |
| model <- glm(heart_disease ~ smoker + age + bmi, | |
| data = sim_data, | |
| family = binomial(link = "logit")) | |
| # Summary of the model to see coefficients and significance | |
| summary(model) | |
| # Interpret the coefficient for 'smoker' as the estimated causal effect adjusted for confounding | |
| ### More Advanced Options | |
| # Install packages if needed | |
| #if (!requireNamespace("dagitty", quietly = TRUE)) install.packages("dagitty") | |
| #if (!requireNamespace("ggdag", quietly = TRUE)) install.packages("ggdag") | |
| library(dagitty) | |
| library(ggdag) | |
| # Define DAG in dagitty syntax with coordinates for better plotting | |
| # Create the DAG object (your original specification) | |
| dag <- dagitty('dag { | |
| age [pos="0.100,0.500"] | |
| blood_pressure [pos="0.700,0.400"] | |
| bmi [pos="0.400,0.500"] | |
| heart_disease [outcome,pos="0.900,0.300"] | |
| sex [pos="0.100,0.200"] | |
| smoker [exposure,pos="0.400,0.200"] | |
| age -> bmi | |
| age -> heart_disease | |
| age -> smoker | |
| blood_pressure -> heart_disease | |
| bmi -> blood_pressure | |
| sex -> bmi | |
| sex -> smoker | |
| smoker -> blood_pressure | |
| }') | |
| # Explicitly set the coordinates (names and values as in your file) | |
| coordinates(dag) <- list( | |
| x = c(age = 0.1, blood_pressure = 0.7, bmi = 0.4, heart_disease = 0.9, | |
| sex = 0.1, smoker = 0.4, stress = 0.2), | |
| y = c(age = 0.5, blood_pressure = 0.4, bmi = 0.5, heart_disease = 0.3, | |
| sex = 0.2, smoker = 0.2, stress = 0.6) | |
| ) | |
| # Plot with your coordinates | |
| plot(dag) | |
| # Use ggdag to plot with ggplot2 style | |
| ggdag(dag) + | |
| theme_dag() + | |
| ggtitle("Causal DAG with Coordinates and Layout") | |
| # Check d-separation: Is 'bmi' a confounder that blocks backdoor path? | |
| #D-separation is a graphical criterion that tells you whether two variables | |
| #are conditionally independent given a set of other variables, based on the | |
| #structure of a causal DAG. | |
| dseparated(dag, "smoker", "heart_disease", c("bmi")) | |
| # List all testable conditional independencies implied by the DAG | |
| #The output of impliedConditionalIndependencies(dag) lists the conditional | |
| #independence statements that your DAG implies must hold true in any data generated | |
| #from the causal structure. | |
| impliedConditionalIndependencies(dag) | |
| # Identify all minimal sufficient adjustment sets | |
| adj_sets <- adjustmentSets(dag, exposure = "smoker", outcome = "heart_disease") | |
| print(adj_sets) | |
| # Find instrumental variables (IVs) for 'smoker' -> 'heart_disease' effect if any | |
| # An IV must be associated with the exposure ("smoker") but affects the outcome ("heart_disease") | |
| # only through this exposure and not via confounding paths. | |
| ivs <- instrumentalVariables(dag_obj, exposure = "smoker", outcome = "heart_disease") | |
| print(ivs) | |
| # Visualize one minimal adjustment set (the first one) | |
| # Visualize all minimal adjustment sets (no manual passing of sets) | |
| ggdag_adjustment_set( | |
| dag_obj, | |
| exposure = "smoker", | |
| outcome = "heart_disease", | |
| shadow = TRUE | |
| ) + | |
| theme_dag() + | |
| ggtitle("Minimal Adjustment Sets Visualization") | |
| # Set manual coordinates on the dag object | |
| coordinates(dag_obj) <- list( | |
| x = c(age = 0.1, sex = 0.1, bmi = 0.4, smoker = 0.4, blood_pressure = 0.7, heart_disease = 0.9), | |
| y = c(age = 0.5, sex = 0.2, bmi = 0.5, smoker = 0.2, blood_pressure = 0.4, heart_disease = 0.3) | |
| ) | |
| # Plot adjustment sets using the same dag_obj | |
| p <- ggdag_adjustment_set( | |
| dag_obj, | |
| exposure = "smoker", | |
| outcome = "heart_disease", | |
| shadow = TRUE | |
| ) + theme_dag() + ggtitle("Minimal Adjustment Sets Visualization") | |
| print(p) | |
| # Export DAG to text file for use outside R | |
| writeLines(as.character(dag), "dagitty_export.dag") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| A step by step video demonstration can be found at: https://youtu.be/oN9qJmVKKmE |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
A step by step video demonstration can be found at: https://youtu.be/oN9qJmVKKmE