JimGrange · November 19, 2017 15:14
diff --git a/Bayesian estimate of partial correlation b/Bayesian estimate of partial correlation
 #------------------------------------------------------------------------------
 ### initial set up
 rm(list = ls())

 # set working directory
 setwd("D:/Work/Other/Blog_YouTube code/Blog/Bayesian Partial Correlation")

 # load libraries
 library(ppcor)
 library(Hmisc)
 library(R2jags)

 # set seed for reproducible code
 set.seed(42)
 #------------------------------------------------------------------------------


 #------------------------------------------------------------------------------
 ### define functions

 # function to simulate partially correlated data
 get_data <- function(n){
  x <- rnorm(n, 0, 1)
  y <- .5 * x + rnorm(n, 0, 1)
  z <- .3 * x + .6 * y + rnorm(n, 0, 1)
  
  data <- data.frame(x = x, y = y, z = z)
  
  return(data)
 }
 #------------------------------------------------------------------------------


 #------------------------------------------------------------------------------
 ### get data & conduct frequentist analysis

 # generate the data
 n <- 75
 x <- get_data(n)

 ## plot the data with linear model fits
 op = par(cex.main = 1.5, mar = c(5,6,4,5) + 0.1, mgp = c(3.5, 1,0), 
         cex.lab = 1.5, font.lab = 2, cex.axis = 1.3, bty = "n", las = 1)


 # do pairs plot
 pdf("pairs.pdf", width = 6, height = 6)
 pairs(x, upper.panel = NULL, pch = 19)
 dev.off()

 # do correlation plot
 pdf("correlation.pdf", width = 6, height = 6)
 plot(x$y, x$x, pch = 17, ylab = "", xlab = "")

 # model not controlling for z
 mod_1 <- lm(x$x ~ x$y)
 abline(a = mod_1$coefficients[1], b = mod_1$coefficients[2], lwd = 3, lty = 1, 
       col = "red")

 # model controlling for z
 mod_2 <- lm(x$x ~ x$y + x$z)
 abline(a = mod_2$coefficients[1], b = mod_2$coefficients[2], lwd = 3, lty = 1, 
       col = "blue")
 legend("bottomright", c("Regular", "Partial"), lty = c(1, 1), bty = "n", 
       lwd = 3, col = c("red","blue"), cex = 1.5)
 dev.off()

 # get the frequentist estimate of correlation & partial correlation
 # note that the correlation between x and y is no longer significant when
 # controlling for z
 freq_r <- rcorr(as.matrix(x))
 freq_partial_r <- pcor(x)
 #------------------------------------------------------------------------------


 #------------------------------------------------------------------------------
 ### Conduct Bayesian parameter estimation

 # declare the JAGS model code
 model_code <- "
 model {

 # data
 for(i in 1:n){
 x[i, 1:3] ~ dmnorm.vcov(mu[], TI[,])
 }

 # priors
 mu[1] ~ dnorm(0, .001)
 mu[2] ~ dnorm(0, .001)
 mu[3] ~ dnorm(0, .001)

 lambda[1] ~ dgamma(.001, .001)
 lambda[2] ~ dgamma(.001, .001)
 lambda[3] ~ dgamma(.001, .001)

 r_xy ~ dunif(-1, 1)
 r_xz ~ dunif(-1, 1)
 r_yz ~ dunif(-1, 1)

 # reparameterisation
 sigma[1] <- 1/sqrt(lambda[1])
 sigma[2] <- 1/sqrt(lambda[2])
 sigma[3] <- 1/sqrt(lambda[3])

 T[1, 1] <- 1 / lambda[1]
 T[1, 2] <- r_xy * sigma[1] * sigma[2]
 T[1, 3] <- r_xz * sigma[1] * sigma[3]

 T[2, 1] <- r_xy * sigma[1] * sigma[2]
 T[2, 2] <- 1 / lambda[2]
 T[2, 3] <- r_yz * sigma[2] * sigma[3]

 T[3, 1] <- r_xz * sigma[1] * sigma[3]
 T[3, 2] <- r_yz * sigma[2] * sigma[3]
 T[3, 3] <- 1 / lambda[3]

 TI[1:3, 1:3] <- T[1:3, 1:3]

 # partial correlation calculation
 num <- r_xy - (r_xz * r_yz)
 denom <- sqrt(1 - pow(r_xz, 2)) * sqrt(1 - pow(r_yz, 2))
 partial <- num/denom
 }
 "

 # model details
 jags_info <- list("x", "n")
 parameters <- c("r_xy", "r_xz", "r_yz", "mu", "sigma", "partial")

 # fit the model
 sample <- jags(jags_info, inits = NULL, parameters, 
               model.file = textConnection(model_code), n.chains = 1, 
               n.iter = 10000, n.burnin = 500, n.thin = 5, DIC = F)

 # look at the overview of the parameter estimates
 sample


 # extract the posterior samples of the partial correlation (4th column) &
 # calculate the 95% HDI
 posterior <- sample$BUGSoutput$sims.matrix[, 4]
 sample_mcmc <- as.mcmc(posterior)
 hdi <- HPDinterval(sample_mcmc)



 ###  plot

 # do some preparation for plotting by finding the mode of the posterior
 dens <- density(posterior)
 posterior_mode <- dens$x[which.max(dens$y)]

 # do the plot
 pdf("bayesian_estimate.pdf", width = 6, height = 6)
 plot(dens, xlim = c(-1, 1), ylim = c(0, max(dens$y) + 0.55), 
     main = "", xlab = "Partial Correlation Estimate", lwd = 2)

 # add the mode of the sample & the HDI etc.
 lines(x=c(posterior_mode, posterior_mode), y=c(2.5, 3.8), lty = 2, lwd = 2, 
      col = "red")
 text(x= posterior_mode, y = max(dens$y) + 0.5, paste("Posterior mode =",
                                         round(posterior_mode, 3), sep = " "), 
     cex = 1.2, col = "red")
 lines(x = c(hdi[1],hdi[1]), y = c(0,0.2), lwd = 2, col = "red")
 lines(x = c(hdi[2],hdi[2]), y = c(0,0.2), lwd = 2, col = "red")
 lines(x = c(hdi[1],hdi[2]), y = c(0.1,0.1), lwd = 2, col = "red")
 text(x = (hdi[1] + hdi[2]) / 2, y = 0.325, "95% HDI", cex = 1.2, col = "red")
 dev.off()
 #------------------------------------------------------------------------------
	#------------------------------------------------------------------------------
	### initial set up
	rm(list = ls())

	# set working directory
	setwd("D:/Work/Other/Blog_YouTube code/Blog/Bayesian Partial Correlation")

	# load libraries
	library(ppcor)
	library(Hmisc)
	library(R2jags)

	# set seed for reproducible code
	set.seed(42)
	#------------------------------------------------------------------------------


	#------------------------------------------------------------------------------
	### define functions

	# function to simulate partially correlated data
	get_data <- function(n){
	x <- rnorm(n, 0, 1)
	y <- .5 * x + rnorm(n, 0, 1)
	z <- .3 * x + .6 * y + rnorm(n, 0, 1)

	data <- data.frame(x = x, y = y, z = z)

	return(data)
	}
	#------------------------------------------------------------------------------


	#------------------------------------------------------------------------------
	### get data & conduct frequentist analysis

	# generate the data
	n <- 75
	x <- get_data(n)

	## plot the data with linear model fits
	op = par(cex.main = 1.5, mar = c(5,6,4,5) + 0.1, mgp = c(3.5, 1,0),
	cex.lab = 1.5, font.lab = 2, cex.axis = 1.3, bty = "n", las = 1)


	# do pairs plot
	pdf("pairs.pdf", width = 6, height = 6)
	pairs(x, upper.panel = NULL, pch = 19)
	dev.off()

	# do correlation plot
	pdf("correlation.pdf", width = 6, height = 6)
	plot(x$y, x$x, pch = 17, ylab = "", xlab = "")

	# model not controlling for z
	mod_1 <- lm(x$x ~ x$y)
	abline(a = mod_1$coefficients[1], b = mod_1$coefficients[2], lwd = 3, lty = 1,
	col = "red")

	# model controlling for z
	mod_2 <- lm(x$x ~ x$y + x$z)
	abline(a = mod_2$coefficients[1], b = mod_2$coefficients[2], lwd = 3, lty = 1,
	col = "blue")
	legend("bottomright", c("Regular", "Partial"), lty = c(1, 1), bty = "n",
	lwd = 3, col = c("red","blue"), cex = 1.5)
	dev.off()

	# get the frequentist estimate of correlation & partial correlation
	# note that the correlation between x and y is no longer significant when
	# controlling for z
	freq_r <- rcorr(as.matrix(x))
	freq_partial_r <- pcor(x)
	#------------------------------------------------------------------------------


	#------------------------------------------------------------------------------
	### Conduct Bayesian parameter estimation

	# declare the JAGS model code
	model_code <- "
	model {

	# data
	for(i in 1:n){
	x[i, 1:3] ~ dmnorm.vcov(mu[], TI[,])
	}

	# priors
	mu[1] ~ dnorm(0, .001)
	mu[2] ~ dnorm(0, .001)
	mu[3] ~ dnorm(0, .001)

	lambda[1] ~ dgamma(.001, .001)
	lambda[2] ~ dgamma(.001, .001)
	lambda[3] ~ dgamma(.001, .001)

	r_xy ~ dunif(-1, 1)
	r_xz ~ dunif(-1, 1)
	r_yz ~ dunif(-1, 1)

	# reparameterisation
	sigma[1] <- 1/sqrt(lambda[1])
	sigma[2] <- 1/sqrt(lambda[2])
	sigma[3] <- 1/sqrt(lambda[3])

	T[1, 1] <- 1 / lambda[1]
	T[1, 2] <- r_xy * sigma[1] * sigma[2]
	T[1, 3] <- r_xz * sigma[1] * sigma[3]

	T[2, 1] <- r_xy * sigma[1] * sigma[2]
	T[2, 2] <- 1 / lambda[2]
	T[2, 3] <- r_yz * sigma[2] * sigma[3]

	T[3, 1] <- r_xz * sigma[1] * sigma[3]
	T[3, 2] <- r_yz * sigma[2] * sigma[3]
	T[3, 3] <- 1 / lambda[3]

	TI[1:3, 1:3] <- T[1:3, 1:3]

	# partial correlation calculation
	num <- r_xy - (r_xz * r_yz)
	denom <- sqrt(1 - pow(r_xz, 2)) * sqrt(1 - pow(r_yz, 2))
	partial <- num/denom
	}
	"

	# model details
	jags_info <- list("x", "n")
	parameters <- c("r_xy", "r_xz", "r_yz", "mu", "sigma", "partial")

	# fit the model
	sample <- jags(jags_info, inits = NULL, parameters,
	model.file = textConnection(model_code), n.chains = 1,
	n.iter = 10000, n.burnin = 500, n.thin = 5, DIC = F)

	# look at the overview of the parameter estimates
	sample


	# extract the posterior samples of the partial correlation (4th column) &
	# calculate the 95% HDI
	posterior <- sample$BUGSoutput$sims.matrix[, 4]
	sample_mcmc <- as.mcmc(posterior)
	hdi <- HPDinterval(sample_mcmc)



	### plot

	# do some preparation for plotting by finding the mode of the posterior
	dens <- density(posterior)
	posterior_mode <- dens$x[which.max(dens$y)]

	# do the plot
	pdf("bayesian_estimate.pdf", width = 6, height = 6)
	plot(dens, xlim = c(-1, 1), ylim = c(0, max(dens$y) + 0.55),
	main = "", xlab = "Partial Correlation Estimate", lwd = 2)

	# add the mode of the sample & the HDI etc.
	lines(x=c(posterior_mode, posterior_mode), y=c(2.5, 3.8), lty = 2, lwd = 2,
	col = "red")
	text(x= posterior_mode, y = max(dens$y) + 0.5, paste("Posterior mode =",
	round(posterior_mode, 3), sep = " "),
	cex = 1.2, col = "red")
	lines(x = c(hdi[1],hdi[1]), y = c(0,0.2), lwd = 2, col = "red")
	lines(x = c(hdi[2],hdi[2]), y = c(0,0.2), lwd = 2, col = "red")
	lines(x = c(hdi[1],hdi[2]), y = c(0.1,0.1), lwd = 2, col = "red")
	text(x = (hdi[1] + hdi[2]) / 2, y = 0.325, "95% HDI", cex = 1.2, col = "red")
	dev.off()
	#------------------------------------------------------------------------------