Skip to content

Instantly share code, notes, and snippets.

@mingjiphd
Last active January 27, 2026 18:45
Show Gist options
  • Select an option

  • Save mingjiphd/4b12ba14bb1eb7802d737284d00f7ce0 to your computer and use it in GitHub Desktop.

Select an option

Save mingjiphd/4b12ba14bb1eb7802d737284d00f7ce0 to your computer and use it in GitHub Desktop.
Causal Analysis using R with a More Sophisticated Example of Bayesian Networks
This R script is a step by step demonstration of a more sophisticated Bayesian Network Analysis using R. The R packages used are bnlearn and Rgraphviz.
A step by step video demonstartion can be found at: https://youtu.be/n5tGa5ZBvgU
### Causal Analysis using R: A More Sophiscated Example of Bayesian Networks
##In this video, we present an example of using the R package bnlearn
##for Bayesian Network Analysis
## The Following topics are covered:
#Generates a mixed-type synthetic data set
#Applies multiple structure learning algorithms
#Fits parameters
#Performs arc strength estimation and model averaging
#Visualizes networks using base and advanced plotting
#Demonstrates classifiers
#Handles missing data
#Performs model comparison
# =========================
# Data Generation
# =========================
library(MASS)
set.seed(123)
n <- 500
Sigma <- matrix(c(1, 0.5, 0.3,
0.5, 1, 0.4,
0.3, 0.4, 1), nrow=3)
continuous_data <- mvrnorm(n=n, mu=c(5, 10, 15), Sigma=Sigma)
colnames(continuous_data) <- c("Cont1", "Cont2", "Cont3")
Discrete1 <- rbinom(n, 1, plogis(0.3*continuous_data[,1] - 0.2*continuous_data[,2]))
Discrete2 <- rbinom(n, 1, plogis(-0.5*continuous_data[,2] + 0.7*continuous_data[,3] + 0.5*Discrete1))
Discrete3 <- rbinom(n, 1, plogis(0.6*Discrete1 - 0.4*Discrete2))
library(nnet)
levels_multi <- c("A", "B", "C")
linear_pred <- 0.5*continuous_data[,1] - 0.3*Discrete2 + 0.4*Discrete3
probs <- exp(cbind(0, linear_pred, -linear_pred))
probs <- probs / rowSums(probs)
Multinomial <- apply(probs, 1, function(p) sample(levels_multi, 1, prob=p))
bayesian_data <- data.frame(continuous_data, Discrete1=factor(Discrete1), Discrete2=factor(Discrete2),
Discrete3=factor(Discrete3), Multinomial=factor(Multinomial))
head(bayesian_data)
# =========================
# Bayesian Network Analysis
# =========================
library(bnlearn)
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("Rgraphviz")
library(Rgraphviz)
# Structure learning with various algorithms
bn_hc <- hc(bayesian_data, score = "bic-cg") #he Hill-Climbing algorithm with the "bic-cg" score
bn_gs <- gs(bayesian_data) #the Grow-Shrink (GS) algorithm
bn_tabu <- tabu(bayesian_data) #the Tabu Search algorithm
bn_mmhc <- mmhc(bayesian_data) #the Max-Min Hill-Climbing (MMHC) algorithm
# Fit parameters
bn_fit <- bn.fit(bn_hc, data = bayesian_data)
# Plot basic and advanced graphs
par(mfrow=c(2,2))
plot(bn_hc, main='HC Structure')
plot(bn_gs, main='GS Structure')
plot(bn_tabu, main='Tabu Structure')
plot(bn_mmhc, main='MMHC Structure')
par(mfrow=c(1,1))
##Different algoirthms lead to different structures. Each has its strengths and weaknesses
##Model comprison and selection should be performed subsequently.
graphviz.plot(bn_hc, main="Graphviz: HC Structure")
##the graphviz.plot function can generate more sophisticated graphs with more professional looks
##than the plot() function
# Conditional queries and prediction
print(cpquery(bn_fit, event = (Discrete1 == "1"), evidence = (Cont1 > 5))) #Pr(Discrete1=1|Cont1>5)
print(predict(bn_fit, node = "Cont1",
data = data.frame(Discrete1 = factor("1", levels = levels(bayesian_data$Discrete1))))
) #Predicting the conditional mean of Cont1 given that Discrete1=1
#Predicting the closest category of a categorical variable given that Discrete1=1
#If data is new data, then predictions are for new data
# Arc strength via bootstrap and model averaging
strength_boot <- boot.strength(bayesian_data, R=200, algorithm="hc", algorithm.args=list(score="bic-cg"))
avg_net <- averaged.network(strength_boot, threshold=0.85)
plot(avg_net, main="Averaged Network")
arc_strengths <- arc.strength(bn_hc, data = bayesian_data)
print(head(arc_strengths))
##arc strength is a measure of confidence or strength of each arc (directed edge)
##the strength numeric value is the difference of network scores (such as BIC)
##More negative strength means stronger strength
# Classifiers
# install.packages("e1071")
library(e1071)
# Assume "Multinomial" is the target (class) variable
# Predict Multinomial based on all other variables
# Train Naive Bayes model
nb_model <- naiveBayes(Multinomial ~ ., data = bayesian_data)
# Print summary of the trained model
print(nb_model)
# Predict classes for the same data (or new data)
predictions <- predict(nb_model, bayesian_data)
# Print prediction table (confusion matrix)
print(table(predictions, bayesian_data$Multinomial))
# Get predictions as probabilities for each class
predicted_probs <- predict(nb_model, bayesian_data, type = "raw")
head(predicted_probs)
# Handling missing data with EM algorithm
bayesian_data_missing <- bayesian_data
bayesian_data_missing[sample(1:nrow(bayesian_data_missing), 20), "Cont1"] <- NA
# Suppose bayesian_data_missing is your data frame with missing values
# Run structural EM with Hill-Climbing as the maximize step
# Create an empty skeleton
start_dag <- empty.graph(names(bayesian_data_missing))
# Fit it on original data (may need complete cases or ignore missing partially)
start_fit <- bn.fit(start_dag, data = bayesian_data, method = "mle-cg")
# Structural EM run
result_all <- structural.em(bayesian_data_missing,
start = start_fit,
max.iter = 10,
maximize = "hc",
maximize.args = list(score = "bic-cg"),
return.all = TRUE)
graphviz.plot(result_all$dag) #DAG
head(result_all$imputed) ##imputed data set
head(result_all$fitted) #Fitted values
# Model comparison
compare(bn_gs, bn_hc)
##Compare BIC values
# Convert cpdag (partially directed) to a fully directed acyclic graph
dag_gs <- cextend(bn_gs)
# Now compute BIC scores on fully directed graphs
bic_gs <- score(dag_gs, data = bayesian_data, type = "bic-cg")
bic_hc <- score(bn_hc, data = bayesian_data, type = "bic-cg")
cat("BIC score of bn_gs (extended):", bic_gs, "\n")
cat("BIC score of bn_hc:", bic_hc, "\n")
A step by step video demonstartion can be found at: https://youtu.be/n5tGa5ZBvgU
@mingjiphd
Copy link
Author

A step by step video demonstartion can be found at: https://youtu.be/n5tGa5ZBvgU

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment