Skip to content

Instantly share code, notes, and snippets.

@JimGrange
Created November 19, 2017 15:14
Show Gist options
  • Save JimGrange/4e91190285a89eaabb46aa29d9b31d40 to your computer and use it in GitHub Desktop.
Save JimGrange/4e91190285a89eaabb46aa29d9b31d40 to your computer and use it in GitHub Desktop.
#------------------------------------------------------------------------------
### initial set up
rm(list = ls())
# set working directory
setwd("D:/Work/Other/Blog_YouTube code/Blog/Bayesian Partial Correlation")
# load libraries
library(ppcor)
library(Hmisc)
library(R2jags)
# set seed for reproducible code
set.seed(42)
#------------------------------------------------------------------------------
#------------------------------------------------------------------------------
### define functions
# function to simulate partially correlated data
get_data <- function(n){
x <- rnorm(n, 0, 1)
y <- .5 * x + rnorm(n, 0, 1)
z <- .3 * x + .6 * y + rnorm(n, 0, 1)
data <- data.frame(x = x, y = y, z = z)
return(data)
}
#------------------------------------------------------------------------------
#------------------------------------------------------------------------------
### get data & conduct frequentist analysis
# generate the data
n <- 75
x <- get_data(n)
## plot the data with linear model fits
op = par(cex.main = 1.5, mar = c(5,6,4,5) + 0.1, mgp = c(3.5, 1,0),
cex.lab = 1.5, font.lab = 2, cex.axis = 1.3, bty = "n", las = 1)
# do pairs plot
pdf("pairs.pdf", width = 6, height = 6)
pairs(x, upper.panel = NULL, pch = 19)
dev.off()
# do correlation plot
pdf("correlation.pdf", width = 6, height = 6)
plot(x$y, x$x, pch = 17, ylab = "", xlab = "")
# model not controlling for z
mod_1 <- lm(x$x ~ x$y)
abline(a = mod_1$coefficients[1], b = mod_1$coefficients[2], lwd = 3, lty = 1,
col = "red")
# model controlling for z
mod_2 <- lm(x$x ~ x$y + x$z)
abline(a = mod_2$coefficients[1], b = mod_2$coefficients[2], lwd = 3, lty = 1,
col = "blue")
legend("bottomright", c("Regular", "Partial"), lty = c(1, 1), bty = "n",
lwd = 3, col = c("red","blue"), cex = 1.5)
dev.off()
# get the frequentist estimate of correlation & partial correlation
# note that the correlation between x and y is no longer significant when
# controlling for z
freq_r <- rcorr(as.matrix(x))
freq_partial_r <- pcor(x)
#------------------------------------------------------------------------------
#------------------------------------------------------------------------------
### Conduct Bayesian parameter estimation
# declare the JAGS model code
model_code <- "
model {
# data
for(i in 1:n){
x[i, 1:3] ~ dmnorm.vcov(mu[], TI[,])
}
# priors
mu[1] ~ dnorm(0, .001)
mu[2] ~ dnorm(0, .001)
mu[3] ~ dnorm(0, .001)
lambda[1] ~ dgamma(.001, .001)
lambda[2] ~ dgamma(.001, .001)
lambda[3] ~ dgamma(.001, .001)
r_xy ~ dunif(-1, 1)
r_xz ~ dunif(-1, 1)
r_yz ~ dunif(-1, 1)
# reparameterisation
sigma[1] <- 1/sqrt(lambda[1])
sigma[2] <- 1/sqrt(lambda[2])
sigma[3] <- 1/sqrt(lambda[3])
T[1, 1] <- 1 / lambda[1]
T[1, 2] <- r_xy * sigma[1] * sigma[2]
T[1, 3] <- r_xz * sigma[1] * sigma[3]
T[2, 1] <- r_xy * sigma[1] * sigma[2]
T[2, 2] <- 1 / lambda[2]
T[2, 3] <- r_yz * sigma[2] * sigma[3]
T[3, 1] <- r_xz * sigma[1] * sigma[3]
T[3, 2] <- r_yz * sigma[2] * sigma[3]
T[3, 3] <- 1 / lambda[3]
TI[1:3, 1:3] <- T[1:3, 1:3]
# partial correlation calculation
num <- r_xy - (r_xz * r_yz)
denom <- sqrt(1 - pow(r_xz, 2)) * sqrt(1 - pow(r_yz, 2))
partial <- num/denom
}
"
# model details
jags_info <- list("x", "n")
parameters <- c("r_xy", "r_xz", "r_yz", "mu", "sigma", "partial")
# fit the model
sample <- jags(jags_info, inits = NULL, parameters,
model.file = textConnection(model_code), n.chains = 1,
n.iter = 10000, n.burnin = 500, n.thin = 5, DIC = F)
# look at the overview of the parameter estimates
sample
# extract the posterior samples of the partial correlation (4th column) &
# calculate the 95% HDI
posterior <- sample$BUGSoutput$sims.matrix[, 4]
sample_mcmc <- as.mcmc(posterior)
hdi <- HPDinterval(sample_mcmc)
### plot
# do some preparation for plotting by finding the mode of the posterior
dens <- density(posterior)
posterior_mode <- dens$x[which.max(dens$y)]
# do the plot
pdf("bayesian_estimate.pdf", width = 6, height = 6)
plot(dens, xlim = c(-1, 1), ylim = c(0, max(dens$y) + 0.55),
main = "", xlab = "Partial Correlation Estimate", lwd = 2)
# add the mode of the sample & the HDI etc.
lines(x=c(posterior_mode, posterior_mode), y=c(2.5, 3.8), lty = 2, lwd = 2,
col = "red")
text(x= posterior_mode, y = max(dens$y) + 0.5, paste("Posterior mode =",
round(posterior_mode, 3), sep = " "),
cex = 1.2, col = "red")
lines(x = c(hdi[1],hdi[1]), y = c(0,0.2), lwd = 2, col = "red")
lines(x = c(hdi[2],hdi[2]), y = c(0,0.2), lwd = 2, col = "red")
lines(x = c(hdi[1],hdi[2]), y = c(0.1,0.1), lwd = 2, col = "red")
text(x = (hdi[1] + hdi[2]) / 2, y = 0.325, "95% HDI", cex = 1.2, col = "red")
dev.off()
#------------------------------------------------------------------------------
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment