mingjiphd · January 27, 2026 18:54
diff --git a/Readme.txt b/Readme.txt
 This R script is a step by step tutorial on how to implement the DO Calculus developed by Judea Pearl.  The R packages used include: causaleffect, mediation, igraph and dplyr.  We demonstrate how to create a simulated data set, how to define a DAG,  how to estimate the expression identified by the DO Calculus and how to use that expression to calculate average causal effect (ACE).  Furthermore, we showed how to perform causal mediational analysis and how to interpret its outputs. 
 A step by step video demonstration can be found at: https://youtu.be/yNzpoWRgZdE?si=MJIpRcp74uqfqhjc 
diff --git a/RforDOCalculus b/RforDOCalculus
 #### Causal Analysis using R   How to Implement Judea Pearl's DO Calculus


 ##Do-calculus, created by Judea Pearl, uses three rules to transform causal expressions with 
 ##interventions (do-operator) into observational terms.

 ##Its goal is to identify causal effects from observational data given assumptions encoded 
 ##in a causal graph.

 ##Rule 1 allows adding or removing observations when certain conditional independencies hold.

 ##Rule 2 lets you exchange actions (interventions) with observations under specified 
 ##graphical conditions.

 ##Rule 3 permits insertion or deletion of actions when specific identifiability criteria are met.

 ##Applying these rules systematically aims to express causal queries without do-operators, 
 ##enabling causal effect estimation from observed data.
 # Install needed packages if not already installed
 if (!requireNamespace("mediation", quietly = TRUE)) install.packages("mediation")
 if (!requireNamespace("causaleffect", quietly = TRUE)) install.packages("causaleffect")
 if (!requireNamespace("igraph", quietly = TRUE)) install.packages("igraph")
 if (!requireNamespace("dplyr", quietly = TRUE)) install.packages("dplyr")

 library(mediation)
 library(causaleffect)
 library(igraph)
 library(dplyr)

 # Define the DAG using igraph as before
 g <- graph_from_literal(X -+ Z, Z -+ Y, X -+ Y)

 # Simulate data again for clarity/reproducibility
 set.seed(11162025)
 n <- 1000
 X <- rbinom(n, 1, 0.5)
 Z <- rbinom(n, 1, plogis(0.5 * X))
 Y <- rbinom(n, 1, plogis(0.7 * X + 0.3 * Z))
 data <- data.frame(X, Z, Y)
 head(data)
 # 1. Do-calculus identification expression (as before)
 causal_effect_expr <- causal.effect(y = "Y", x = "X", z = NULL, G = g)
 print(causal_effect_expr)

 ##The output is P(Y|do(X))  --- means the probability of Y when we intervene and set 
 ## X to a specific value, rather than just observing X. It captures the causal effect of 
 ## X on Y, isolating the impact of actively changing X.

 ##This expression says that Intuitively, this means that to estimate the causal effect of setting 
 ##X, you condition on Z, weigh the conditional probability of Y given X and Z by the probability of 
 ##Z conditioned on X, and sum over all values of Z.

 # 2. Estimate average causal effect with backdoor adjustment (adjusting for Z) - as before
 p_y_given_x_z <- data %>%
  group_by(X, Z) %>%
  summarize(p = mean(Y), .groups = 'drop')
 head(p_y_given_x_z)
 p_z_given_x <- data %>%
  group_by(X, Z) %>%
  summarize(count = n(), .groups = 'drop') %>%
  group_by(X) %>%
  mutate(p = count / sum(count)) %>%
  ungroup()
 head(p_z_given_x)


 est_1 <- sum(p_y_given_x_z$p[p_y_given_x_z$X == 1] * p_z_given_x$p[p_z_given_x$X == 1])
 est_0 <- sum(p_y_given_x_z$p[p_y_given_x_z$X == 0] * p_z_given_x$p[p_z_given_x$X == 0])
 ace <- est_1 - est_0
 cat("Average causal effect of X on Y (backdoor):", ace, "\n")

 # 3. Causal Mediation Analysis using the 'mediation' package:
 # Fit mediator model (Z ~ X)
 med_fit <- glm(Z ~ X, family = binomial(), data = data)
 # Fit outcome model (Y ~ X + Z)
 out_fit <- glm(Y ~ X + Z, family = binomial(), data = data)

 # Conduct mediation analysis
 med_out <- mediate(med_fit, out_fit, treat = "X", mediator = "Z",
                   boot = TRUE, sims = 1000)

 summary(med_out)

 ##ACME (Average Causal Mediation Effect)
 ##ADE (Average Direct Effect)
 ##Prop. Mediated
 #  Fraction of total effect explained by mediation via Z


 # 4. Explore other causal queries, for example, causal effect of Z on Y adjusting for X
 causal_effect_ZY <- causal.effect(y = "Y", x = "Z", z = "X", G = g)
 print(causal_effect_ZY)

 # 5. Visualization of DAG (optional)
 plot(g, vertex.size=30, vertex.color="skyblue", edge.arrow.size=0.5)

diff --git a/VideoDemo b/VideoDemo
 A step by step video demonstration can be found at: https://youtu.be/yNzpoWRgZdE?si=MJIpRcp74uqfqhjc
	#### Causal Analysis using R How to Implement Judea Pearl's DO Calculus


	##Do-calculus, created by Judea Pearl, uses three rules to transform causal expressions with
	##interventions (do-operator) into observational terms.

	##Its goal is to identify causal effects from observational data given assumptions encoded
	##in a causal graph.

	##Rule 1 allows adding or removing observations when certain conditional independencies hold.

	##Rule 2 lets you exchange actions (interventions) with observations under specified
	##graphical conditions.

	##Rule 3 permits insertion or deletion of actions when specific identifiability criteria are met.

	##Applying these rules systematically aims to express causal queries without do-operators,
	##enabling causal effect estimation from observed data.
	# Install needed packages if not already installed
	if (!requireNamespace("mediation", quietly = TRUE)) install.packages("mediation")
	if (!requireNamespace("causaleffect", quietly = TRUE)) install.packages("causaleffect")
	if (!requireNamespace("igraph", quietly = TRUE)) install.packages("igraph")
	if (!requireNamespace("dplyr", quietly = TRUE)) install.packages("dplyr")

	library(mediation)
	library(causaleffect)
	library(igraph)
	library(dplyr)

	# Define the DAG using igraph as before
	g <- graph_from_literal(X -+ Z, Z -+ Y, X -+ Y)

	# Simulate data again for clarity/reproducibility
	set.seed(11162025)
	n <- 1000
	X <- rbinom(n, 1, 0.5)
	Z <- rbinom(n, 1, plogis(0.5 * X))
	Y <- rbinom(n, 1, plogis(0.7 * X + 0.3 * Z))
	data <- data.frame(X, Z, Y)
	head(data)
	# 1. Do-calculus identification expression (as before)
	causal_effect_expr <- causal.effect(y = "Y", x = "X", z = NULL, G = g)
	print(causal_effect_expr)

	##The output is P(Y\|do(X)) --- means the probability of Y when we intervene and set
	## X to a specific value, rather than just observing X. It captures the causal effect of
	## X on Y, isolating the impact of actively changing X.

	##This expression says that Intuitively, this means that to estimate the causal effect of setting
	##X, you condition on Z, weigh the conditional probability of Y given X and Z by the probability of
	##Z conditioned on X, and sum over all values of Z.

	# 2. Estimate average causal effect with backdoor adjustment (adjusting for Z) - as before
	p_y_given_x_z <- data %>%
	group_by(X, Z) %>%
	summarize(p = mean(Y), .groups = 'drop')
	head(p_y_given_x_z)
	p_z_given_x <- data %>%
	group_by(X, Z) %>%
	summarize(count = n(), .groups = 'drop') %>%
	group_by(X) %>%
	mutate(p = count / sum(count)) %>%
	ungroup()
	head(p_z_given_x)


	est_1 <- sum(p_y_given_x_z$p[p_y_given_x_z$X == 1] * p_z_given_x$p[p_z_given_x$X == 1])
	est_0 <- sum(p_y_given_x_z$p[p_y_given_x_z$X == 0] * p_z_given_x$p[p_z_given_x$X == 0])
	ace <- est_1 - est_0
	cat("Average causal effect of X on Y (backdoor):", ace, "\n")

	# 3. Causal Mediation Analysis using the 'mediation' package:
	# Fit mediator model (Z ~ X)
	med_fit <- glm(Z ~ X, family = binomial(), data = data)
	# Fit outcome model (Y ~ X + Z)
	out_fit <- glm(Y ~ X + Z, family = binomial(), data = data)

	# Conduct mediation analysis
	med_out <- mediate(med_fit, out_fit, treat = "X", mediator = "Z",
	boot = TRUE, sims = 1000)

	summary(med_out)

	##ACME (Average Causal Mediation Effect)
	##ADE (Average Direct Effect)
	##Prop. Mediated
	# Fraction of total effect explained by mediation via Z


	# 4. Explore other causal queries, for example, causal effect of Z on Y adjusting for X
	causal_effect_ZY <- causal.effect(y = "Y", x = "Z", z = "X", G = g)
	print(causal_effect_ZY)

	# 5. Visualization of DAG (optional)
	plot(g, vertex.size=30, vertex.color="skyblue", edge.arrow.size=0.5)
No results found