Created
November 19, 2017 15:14
-
-
Save JimGrange/4e91190285a89eaabb46aa29d9b31d40 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#------------------------------------------------------------------------------ | |
### initial set up | |
rm(list = ls()) | |
# set working directory | |
setwd("D:/Work/Other/Blog_YouTube code/Blog/Bayesian Partial Correlation") | |
# load libraries | |
library(ppcor) | |
library(Hmisc) | |
library(R2jags) | |
# set seed for reproducible code | |
set.seed(42) | |
#------------------------------------------------------------------------------ | |
#------------------------------------------------------------------------------ | |
### define functions | |
# function to simulate partially correlated data | |
get_data <- function(n){ | |
x <- rnorm(n, 0, 1) | |
y <- .5 * x + rnorm(n, 0, 1) | |
z <- .3 * x + .6 * y + rnorm(n, 0, 1) | |
data <- data.frame(x = x, y = y, z = z) | |
return(data) | |
} | |
#------------------------------------------------------------------------------ | |
#------------------------------------------------------------------------------ | |
### get data & conduct frequentist analysis | |
# generate the data | |
n <- 75 | |
x <- get_data(n) | |
## plot the data with linear model fits | |
op = par(cex.main = 1.5, mar = c(5,6,4,5) + 0.1, mgp = c(3.5, 1,0), | |
cex.lab = 1.5, font.lab = 2, cex.axis = 1.3, bty = "n", las = 1) | |
# do pairs plot | |
pdf("pairs.pdf", width = 6, height = 6) | |
pairs(x, upper.panel = NULL, pch = 19) | |
dev.off() | |
# do correlation plot | |
pdf("correlation.pdf", width = 6, height = 6) | |
plot(x$y, x$x, pch = 17, ylab = "", xlab = "") | |
# model not controlling for z | |
mod_1 <- lm(x$x ~ x$y) | |
abline(a = mod_1$coefficients[1], b = mod_1$coefficients[2], lwd = 3, lty = 1, | |
col = "red") | |
# model controlling for z | |
mod_2 <- lm(x$x ~ x$y + x$z) | |
abline(a = mod_2$coefficients[1], b = mod_2$coefficients[2], lwd = 3, lty = 1, | |
col = "blue") | |
legend("bottomright", c("Regular", "Partial"), lty = c(1, 1), bty = "n", | |
lwd = 3, col = c("red","blue"), cex = 1.5) | |
dev.off() | |
# get the frequentist estimate of correlation & partial correlation | |
# note that the correlation between x and y is no longer significant when | |
# controlling for z | |
freq_r <- rcorr(as.matrix(x)) | |
freq_partial_r <- pcor(x) | |
#------------------------------------------------------------------------------ | |
#------------------------------------------------------------------------------ | |
### Conduct Bayesian parameter estimation | |
# declare the JAGS model code | |
model_code <- " | |
model { | |
# data | |
for(i in 1:n){ | |
x[i, 1:3] ~ dmnorm.vcov(mu[], TI[,]) | |
} | |
# priors | |
mu[1] ~ dnorm(0, .001) | |
mu[2] ~ dnorm(0, .001) | |
mu[3] ~ dnorm(0, .001) | |
lambda[1] ~ dgamma(.001, .001) | |
lambda[2] ~ dgamma(.001, .001) | |
lambda[3] ~ dgamma(.001, .001) | |
r_xy ~ dunif(-1, 1) | |
r_xz ~ dunif(-1, 1) | |
r_yz ~ dunif(-1, 1) | |
# reparameterisation | |
sigma[1] <- 1/sqrt(lambda[1]) | |
sigma[2] <- 1/sqrt(lambda[2]) | |
sigma[3] <- 1/sqrt(lambda[3]) | |
T[1, 1] <- 1 / lambda[1] | |
T[1, 2] <- r_xy * sigma[1] * sigma[2] | |
T[1, 3] <- r_xz * sigma[1] * sigma[3] | |
T[2, 1] <- r_xy * sigma[1] * sigma[2] | |
T[2, 2] <- 1 / lambda[2] | |
T[2, 3] <- r_yz * sigma[2] * sigma[3] | |
T[3, 1] <- r_xz * sigma[1] * sigma[3] | |
T[3, 2] <- r_yz * sigma[2] * sigma[3] | |
T[3, 3] <- 1 / lambda[3] | |
TI[1:3, 1:3] <- T[1:3, 1:3] | |
# partial correlation calculation | |
num <- r_xy - (r_xz * r_yz) | |
denom <- sqrt(1 - pow(r_xz, 2)) * sqrt(1 - pow(r_yz, 2)) | |
partial <- num/denom | |
} | |
" | |
# model details | |
jags_info <- list("x", "n") | |
parameters <- c("r_xy", "r_xz", "r_yz", "mu", "sigma", "partial") | |
# fit the model | |
sample <- jags(jags_info, inits = NULL, parameters, | |
model.file = textConnection(model_code), n.chains = 1, | |
n.iter = 10000, n.burnin = 500, n.thin = 5, DIC = F) | |
# look at the overview of the parameter estimates | |
sample | |
# extract the posterior samples of the partial correlation (4th column) & | |
# calculate the 95% HDI | |
posterior <- sample$BUGSoutput$sims.matrix[, 4] | |
sample_mcmc <- as.mcmc(posterior) | |
hdi <- HPDinterval(sample_mcmc) | |
### plot | |
# do some preparation for plotting by finding the mode of the posterior | |
dens <- density(posterior) | |
posterior_mode <- dens$x[which.max(dens$y)] | |
# do the plot | |
pdf("bayesian_estimate.pdf", width = 6, height = 6) | |
plot(dens, xlim = c(-1, 1), ylim = c(0, max(dens$y) + 0.55), | |
main = "", xlab = "Partial Correlation Estimate", lwd = 2) | |
# add the mode of the sample & the HDI etc. | |
lines(x=c(posterior_mode, posterior_mode), y=c(2.5, 3.8), lty = 2, lwd = 2, | |
col = "red") | |
text(x= posterior_mode, y = max(dens$y) + 0.5, paste("Posterior mode =", | |
round(posterior_mode, 3), sep = " "), | |
cex = 1.2, col = "red") | |
lines(x = c(hdi[1],hdi[1]), y = c(0,0.2), lwd = 2, col = "red") | |
lines(x = c(hdi[2],hdi[2]), y = c(0,0.2), lwd = 2, col = "red") | |
lines(x = c(hdi[1],hdi[2]), y = c(0.1,0.1), lwd = 2, col = "red") | |
text(x = (hdi[1] + hdi[2]) / 2, y = 0.325, "95% HDI", cex = 1.2, col = "red") | |
dev.off() | |
#------------------------------------------------------------------------------ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment