Last active
January 27, 2026 18:45
-
-
Save mingjiphd/4b12ba14bb1eb7802d737284d00f7ce0 to your computer and use it in GitHub Desktop.
Causal Analysis using R with a More Sophisticated Example of Bayesian Networks
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| This R script is a step by step demonstration of a more sophisticated Bayesian Network Analysis using R. The R packages used are bnlearn and Rgraphviz. | |
| A step by step video demonstartion can be found at: https://youtu.be/n5tGa5ZBvgU |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ### Causal Analysis using R: A More Sophiscated Example of Bayesian Networks | |
| ##In this video, we present an example of using the R package bnlearn | |
| ##for Bayesian Network Analysis | |
| ## The Following topics are covered: | |
| #Generates a mixed-type synthetic data set | |
| #Applies multiple structure learning algorithms | |
| #Fits parameters | |
| #Performs arc strength estimation and model averaging | |
| #Visualizes networks using base and advanced plotting | |
| #Demonstrates classifiers | |
| #Handles missing data | |
| #Performs model comparison | |
| # ========================= | |
| # Data Generation | |
| # ========================= | |
| library(MASS) | |
| set.seed(123) | |
| n <- 500 | |
| Sigma <- matrix(c(1, 0.5, 0.3, | |
| 0.5, 1, 0.4, | |
| 0.3, 0.4, 1), nrow=3) | |
| continuous_data <- mvrnorm(n=n, mu=c(5, 10, 15), Sigma=Sigma) | |
| colnames(continuous_data) <- c("Cont1", "Cont2", "Cont3") | |
| Discrete1 <- rbinom(n, 1, plogis(0.3*continuous_data[,1] - 0.2*continuous_data[,2])) | |
| Discrete2 <- rbinom(n, 1, plogis(-0.5*continuous_data[,2] + 0.7*continuous_data[,3] + 0.5*Discrete1)) | |
| Discrete3 <- rbinom(n, 1, plogis(0.6*Discrete1 - 0.4*Discrete2)) | |
| library(nnet) | |
| levels_multi <- c("A", "B", "C") | |
| linear_pred <- 0.5*continuous_data[,1] - 0.3*Discrete2 + 0.4*Discrete3 | |
| probs <- exp(cbind(0, linear_pred, -linear_pred)) | |
| probs <- probs / rowSums(probs) | |
| Multinomial <- apply(probs, 1, function(p) sample(levels_multi, 1, prob=p)) | |
| bayesian_data <- data.frame(continuous_data, Discrete1=factor(Discrete1), Discrete2=factor(Discrete2), | |
| Discrete3=factor(Discrete3), Multinomial=factor(Multinomial)) | |
| head(bayesian_data) | |
| # ========================= | |
| # Bayesian Network Analysis | |
| # ========================= | |
| library(bnlearn) | |
| if (!requireNamespace("BiocManager", quietly = TRUE)) | |
| install.packages("BiocManager") | |
| BiocManager::install("Rgraphviz") | |
| library(Rgraphviz) | |
| # Structure learning with various algorithms | |
| bn_hc <- hc(bayesian_data, score = "bic-cg") #he Hill-Climbing algorithm with the "bic-cg" score | |
| bn_gs <- gs(bayesian_data) #the Grow-Shrink (GS) algorithm | |
| bn_tabu <- tabu(bayesian_data) #the Tabu Search algorithm | |
| bn_mmhc <- mmhc(bayesian_data) #the Max-Min Hill-Climbing (MMHC) algorithm | |
| # Fit parameters | |
| bn_fit <- bn.fit(bn_hc, data = bayesian_data) | |
| # Plot basic and advanced graphs | |
| par(mfrow=c(2,2)) | |
| plot(bn_hc, main='HC Structure') | |
| plot(bn_gs, main='GS Structure') | |
| plot(bn_tabu, main='Tabu Structure') | |
| plot(bn_mmhc, main='MMHC Structure') | |
| par(mfrow=c(1,1)) | |
| ##Different algoirthms lead to different structures. Each has its strengths and weaknesses | |
| ##Model comprison and selection should be performed subsequently. | |
| graphviz.plot(bn_hc, main="Graphviz: HC Structure") | |
| ##the graphviz.plot function can generate more sophisticated graphs with more professional looks | |
| ##than the plot() function | |
| # Conditional queries and prediction | |
| print(cpquery(bn_fit, event = (Discrete1 == "1"), evidence = (Cont1 > 5))) #Pr(Discrete1=1|Cont1>5) | |
| print(predict(bn_fit, node = "Cont1", | |
| data = data.frame(Discrete1 = factor("1", levels = levels(bayesian_data$Discrete1)))) | |
| ) #Predicting the conditional mean of Cont1 given that Discrete1=1 | |
| #Predicting the closest category of a categorical variable given that Discrete1=1 | |
| #If data is new data, then predictions are for new data | |
| # Arc strength via bootstrap and model averaging | |
| strength_boot <- boot.strength(bayesian_data, R=200, algorithm="hc", algorithm.args=list(score="bic-cg")) | |
| avg_net <- averaged.network(strength_boot, threshold=0.85) | |
| plot(avg_net, main="Averaged Network") | |
| arc_strengths <- arc.strength(bn_hc, data = bayesian_data) | |
| print(head(arc_strengths)) | |
| ##arc strength is a measure of confidence or strength of each arc (directed edge) | |
| ##the strength numeric value is the difference of network scores (such as BIC) | |
| ##More negative strength means stronger strength | |
| # Classifiers | |
| # install.packages("e1071") | |
| library(e1071) | |
| # Assume "Multinomial" is the target (class) variable | |
| # Predict Multinomial based on all other variables | |
| # Train Naive Bayes model | |
| nb_model <- naiveBayes(Multinomial ~ ., data = bayesian_data) | |
| # Print summary of the trained model | |
| print(nb_model) | |
| # Predict classes for the same data (or new data) | |
| predictions <- predict(nb_model, bayesian_data) | |
| # Print prediction table (confusion matrix) | |
| print(table(predictions, bayesian_data$Multinomial)) | |
| # Get predictions as probabilities for each class | |
| predicted_probs <- predict(nb_model, bayesian_data, type = "raw") | |
| head(predicted_probs) | |
| # Handling missing data with EM algorithm | |
| bayesian_data_missing <- bayesian_data | |
| bayesian_data_missing[sample(1:nrow(bayesian_data_missing), 20), "Cont1"] <- NA | |
| # Suppose bayesian_data_missing is your data frame with missing values | |
| # Run structural EM with Hill-Climbing as the maximize step | |
| # Create an empty skeleton | |
| start_dag <- empty.graph(names(bayesian_data_missing)) | |
| # Fit it on original data (may need complete cases or ignore missing partially) | |
| start_fit <- bn.fit(start_dag, data = bayesian_data, method = "mle-cg") | |
| # Structural EM run | |
| result_all <- structural.em(bayesian_data_missing, | |
| start = start_fit, | |
| max.iter = 10, | |
| maximize = "hc", | |
| maximize.args = list(score = "bic-cg"), | |
| return.all = TRUE) | |
| graphviz.plot(result_all$dag) #DAG | |
| head(result_all$imputed) ##imputed data set | |
| head(result_all$fitted) #Fitted values | |
| # Model comparison | |
| compare(bn_gs, bn_hc) | |
| ##Compare BIC values | |
| # Convert cpdag (partially directed) to a fully directed acyclic graph | |
| dag_gs <- cextend(bn_gs) | |
| # Now compute BIC scores on fully directed graphs | |
| bic_gs <- score(dag_gs, data = bayesian_data, type = "bic-cg") | |
| bic_hc <- score(bn_hc, data = bayesian_data, type = "bic-cg") | |
| cat("BIC score of bn_gs (extended):", bic_gs, "\n") | |
| cat("BIC score of bn_hc:", bic_hc, "\n") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| A step by step video demonstartion can be found at: https://youtu.be/n5tGa5ZBvgU |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
A step by step video demonstartion can be found at: https://youtu.be/n5tGa5ZBvgU