Last active
May 8, 2016 12:51
-
-
Save timflutre/56e36d4769491c282793 to your computer and use it in GitHub Desktop.
learn how to use various R packages to perform Bayesian inference via sampling
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Goal of this document: learn how to use various R packages to perform | |
## Bayesian inference via sampling | |
## Author: Timothée Flutre (INRA) | |
## I. Model | |
## sub-section: details about Ga and InvGa distributions | |
## II. Simulations | |
## III. Inference via sampling | |
## III.1 Fit with OpenBUGS (many explanations) | |
## sub-section: detailed diagnostics with "coda" and "mcmcse" | |
## III.2 Fit with JAGS | |
## III.3 Fit with Stan | |
## III.4 Fit with MCMCglmm | |
rm(list=ls()) | |
library(parallel) | |
library(R2OpenBUGS) | |
library(rjags) | |
library(MCMCglmm) | |
## library(mcmc) | |
library(rstan) | |
library(coda) | |
library(mcmcse) | |
## ============================================================================ | |
## I. Model | |
## Normal with unknown mean and variance | |
## Ref: DeGroot (1970, p169), Hoff (2009, p73) | |
## http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf | |
## likelihood: for all n in {1,...,N}, y_n | mu, tau ~ N(mu, 1/tau) | |
## parameters: mean mu and precision tau | |
## priors: tau ~ G(a_0, b_0) and mu | tau ~ N(m_0, 1/(tau t_0)) | |
## hyper-parameters: shape a_0, rate b_0, mean m_0, precision t_0 | |
## posteriors: tau | y ~ G(a_N, b_N) and mu | y, tau ~ N(m_N, t_N) | |
## Note: Gamma prior on tau not recommended anymore (Gelman et al, 2006) | |
## ---------------------------------------------------------------------------- | |
## Remarks about the Gamma and Inverse-Gamma distributions | |
## On Wikipedia: https://en.wikipedia.org/wiki/Gamma_distribution | |
## X ~ Ga(k, theta) where k is "shape" and theta is "scale" | |
## E[X] = k theta and V[X] = k theta^2 | |
## X ~ Ga(alpha, beta) where alpha is "shape" and beta is "rate" (rate=1/scale) | |
## E[X] = alpha / beta and V[X] = alpha / beta^2 | |
## X ~ Ga(shape, scale) <=> 1/X ~ InvGa(shape, 1/scale) | |
## In R: http://stat.ethz.ch/R-manual/R-patched/library/stats/html/GammaDist.html | |
## X ~ Ga(a, s) where a is "shape" and "s" is "scale", but also option "rate" | |
## In OpenBugs: http://www.openbugs.net/Manuals/ModelSpecification.html | |
## X ~ Ga(r, mu) where r is "shape" and mu is "rate" | |
## if X ~ Ga(shape=0.001, rate=0.001) => E[X]=1 and V[X]=1000 (uninformative) | |
dgamma(x=1, shape=10, scale=0.1) # 1.2511 | |
dgamma(x=1, shape=10, rate=1/0.1) # 1.2511 | |
pdfGa <- function(x, shape, scale=NULL, rate=NULL){ | |
stopifnot(xor(is.null(scale), is.null(rate))) | |
if(is.null(rate)){ | |
(1 / (gamma(shape) * scale^shape)) * x^(shape - 1) * exp(- x / scale) | |
} else | |
rate^shape / gamma(shape) * x^(shape - 1) * exp(- rate * x) | |
} | |
pdfGa(x=1, shape=10, scale=0.1) # 1.2511 | |
pdfGa(x=1, shape=10, rate=1/0.1) # 1.2511 | |
pdfInvGa <- function(x, shape, scale){ | |
scale^shape / gamma(shape) * x^(- shape - 1) * exp(- scale / x) | |
} | |
pdfInvGa(x=1, shape=10, scale=1/0.1) # 1.2511 | |
library(MCMCpack) | |
dinvgamma(x=1, shape=10, scale=1/0.1) # 1.2511 | |
sampleInvGa <- function(n, shape, scale){ | |
1 / rgamma(n=n, shape=shape, scale=1/scale) | |
} | |
set.seed(1) | |
x1 <- sampleInvGa(n=1000, shape=2.1, scale=1.1) | |
head(x1) | |
summary(x1) | |
set.seed(1) | |
x2 <- rinvgamma(n=1000, shape=2.1, scale=1.1) | |
head(x2) | |
summary(x2) | |
## ============================================================================ | |
## II. Simulations | |
set.seed(7263) | |
truth <- c() | |
## prior of tau | |
a.0 <- 3 | |
truth <- append(x=truth, | |
values=setNames(object=a.0, nm="a.0")) | |
b.0 <- 2 | |
truth <- append(x=truth, | |
values=setNames(object=b.0, nm="b.0")) | |
truth <- append(x=truth, | |
values=setNames(object=a.0 / b.0, nm="prior.mean.tau")) | |
truth <- append(x=truth, | |
values=setNames(object=a.0 / b.0^2, nm="prior.var.tau")) | |
plot(x=seq(0,11,0.1), | |
y=dgamma(x=seq(0,11,0.1), shape=a.0, rate=b.0), | |
main=paste0("Gamma(shape=", a.0, ", rate=", b.0, ")"), | |
xlab="x", ylab="pdf", type="l") | |
(tau <- rgamma(n=1, shape=a.0, rate=b.0)) | |
truth <- append(x=truth, | |
values=setNames(object=tau, nm="tau")) | |
truth <- append(x=truth, | |
values=setNames(object=1 / tau, nm="sigma2")) | |
truth <- append(x=truth, | |
values=setNames(object=sqrt(1 / tau), nm="sigma")) | |
## prior of mu | |
m.0 <- 73 | |
truth <- append(x=truth, | |
values=setNames(object=m.0, nm="prior.mean.mu")) | |
t.0 <- 10^(-1) | |
truth <- append(x=truth, | |
values=setNames(object=1 / (tau * t.0), nm="prior.var.mu")) | |
plot(x=seq(60,86,0.1), | |
y=dnorm(x=seq(60,86,0.1), mean=m.0, sd=sqrt(1/(tau*t.0))), | |
main=paste0("Normal(mean=", m.0, ", sd=", | |
format(sqrt(1/(tau*t.0)), digits=3), ")"), | |
xlab="x", ylab="pdf", type="l") | |
(mu <- rnorm(n=1, mean=m.0, sd=sqrt(1/(tau * t.0)))) | |
truth <- append(x=truth, | |
values=setNames(object=mu, nm="mu")) | |
## data | |
N <- 5*10^1 | |
y <- rnorm(n=N, mean=mu, sd=sqrt(1/tau)) | |
summary(y) | |
hist(y, breaks="FD", freq=FALSE, col="grey", border="white", | |
main="Simulated data", | |
ylim=c(0, dnorm(x=mu, mean=mu, sd=sqrt(1/tau)))) | |
abline(v=mu, col="red") | |
curve(dnorm(x=x, mean=mu, sd=sqrt(1/tau)), add=TRUE, col="red") | |
## maximum likelihood estimates | |
truth <- append(x=truth, | |
values=setNames(object=mean(y), nm="mle.mu")) | |
truth <- append(x=truth, | |
values=setNames(object=1 / var(y), nm="mle.tau")) | |
truth <- append(x=truth, | |
values=setNames(object=var(y), nm="mle.sigma2")) | |
truth <- append(x=truth, | |
values=setNames(object=sd(y), nm="mle.sigma")) | |
## posterior | |
a.N <- a.0 + N / 2 | |
truth <- append(x=truth, | |
values=setNames(object=a.N, nm="a.N")) | |
b.N <- b.0 + (1/2) * sum((y - mean(y))^2) + | |
(t.0 * N * (mean(y) - m.0)^2) / (2 * (t.0 + N)) | |
truth <- append(x=truth, | |
values=setNames(object=b.N, nm="b.N")) | |
truth <- append(x=truth, | |
values=setNames(object=a.N / b.N, nm="post.mean.tau")) | |
truth <- append(x=truth, | |
values=setNames(object=a.N / b.N^2, nm="post.var.tau")) | |
truth <- append(x=truth, | |
values=setNames(object=b.N / (a.N - 1), nm="post.mean.sigma2")) | |
truth <- append(x=truth, | |
values=setNames(object=b.N^2 / ((a.N - 1)^2 * (a.N - 2)), | |
nm="post.var.sigma2")) | |
m.N <- (t.0 * m.0 + N * mean(y)) / (t.0 + N) | |
truth <- append(x=truth, | |
values=setNames(object=m.N, nm="post.mean.mu")) | |
t.N <- (t.0 + N) | |
truth <- append(x=truth, | |
values=setNames(object=1 / (t.N * a.N / b.N), | |
nm="post.var.mu")) | |
## graphical overview of the analytical inference | |
plot(x=0, y=0, type="n", xlim=c(65,82), ylim=c(0,2), | |
xlab="x", ylab="density", | |
main="Bayesian inference of a Normal with unknown mean and variance") | |
curve(dnorm(x=x, mean=truth["prior.mean.mu"], sd=sqrt(truth["prior.var.mu"])), | |
add=TRUE, col="green", lwd=2) | |
curve(dnorm(x=x, mean=mu, sd=sqrt(1/tau)), add=TRUE, col="black", lwd=2) | |
curve(dnorm(x=x, mean=truth["post.mean.mu"], sd=sqrt(truth["post.var.mu"])), | |
add=TRUE, col="red", lwd=2) | |
legend(x="topright", legend=c("prior(mu|tau)", "likelihood(y;mu,tau)", | |
"posterior(mu|y,tau)"), | |
col=c("green","black","red"), lwd=2, bty="n") | |
## ============================================================================ | |
## III. Inference via sampling | |
nb.chains <- 4 | |
nb.iters <- 13000 | |
nb.cores <- 1 # need to check how to set seeds in parallel before setting it > 1 | |
##' MCMC diagnostics | |
##' | |
##' Plot the trace, autocorrelation, running average and density side by side for a single chain. | |
##' TODO: show dark background for burn-in in traceplot | |
##' @param res.mcmc object of class "mcmc" (not "mcmc.list"!) | |
##' @param param.name parameter name in the columns of res.mcmc | |
##' @param subplots vector indicating which sub-plot(s) to make (1 for trace, 2 for autocorrelation, 3 for running quantiles, 4 for density) | |
##' @param pe point estimate (optional, but required if "hi" is given) | |
##' @param hi half-interval (optional) | |
##' @return nothing | |
##' @author Timothee Flutre | |
plot.mcmc.chain <- function(res.mcmc, param.name, subplots=1:4, | |
pe=NULL, hi=NULL){ | |
library(coda) | |
stopifnot(is.mcmc(res.mcmc), | |
is.character(param.name), | |
param.name %in% colnames(res.mcmc), | |
is.vector(subplots), is.numeric(subplots)) | |
if(! is.null(hi)) | |
stopifnot(! is.null(pe)) | |
changed.par <- FALSE | |
if(1 %in% subplots & 2 %in% subplots & 3 %in% subplots & 4 %in% subplots & | |
all(par("mfrow") == c(1, 1))){ | |
par(mfrow=c(2,2)) | |
changed.par <- TRUE | |
} | |
if(1 %in% subplots){ | |
## ts.plot(res.mcmc[, param.name], | |
coda::traceplot(res.mcmc[, param.name], | |
## xlab="Iterations", | |
ylab="Trace", | |
main=paste0(param.name)) | |
if(! is.null(pe)){ | |
abline(h=pe, col="red") | |
if(! is.null(hi)){ | |
abline(h=pe + 2 * hi, col="red", lty=2) | |
abline(h=pe - 2 * hi, col="red", lty=2) | |
} | |
} | |
} | |
if(2 %in% subplots){ | |
## acf(res.mcmc[, param.name], | |
autocorr.plot(res.mcmc[, param.name], | |
main=paste0(param.name), | |
## xlab="Lag", | |
## ylab="Autocorrelation", | |
auto.layout=FALSE, | |
ask=FALSE) | |
} | |
if(3 %in% subplots){ | |
## ts.plot(cumsum(res.mcmc[, param.name]) / seq(along=res.mcmc[, param.name]), | |
cumuplot(res.mcmc[, param.name], | |
probs=c(0.025, 0.5, 0.975), | |
main=paste0(param.name), | |
## xlab="Iterations", | |
ylab="Running quantiles", | |
auto.layout=FALSE, | |
ask=FALSE) | |
if(! is.null(pe)){ | |
abline(h=pe, col="red") | |
if(! is.null(hi)){ | |
abline(h=pe + 2 * hi, col="red", lty=2) | |
abline(h=pe - 2 * hi, col="red", lty=2) | |
} | |
} | |
} | |
if(4 %in% subplots){ | |
## plot(density(res.mcmc[, param.name]), type="l", | |
densplot(res.mcmc[, param.name], | |
ylab="Density", | |
main=paste0(param.name)) | |
if(! is.null(pe)){ | |
abline(v=pe, col="red") | |
if(! is.null(hi)){ | |
abline(v=pe + 2 * hi, col="red", lty=2) | |
abline(v=pe - 2 * hi, col="red", lty=2) | |
} | |
} | |
} | |
if(changed.par) | |
par(mfrow=c(1,1)) | |
} | |
## ---------------------------------------------------------------------------- | |
## III.1 Fit with OpenBUGS | |
## priors: mu ~ N(0, 10^8) and sigma^2 ~ IG(0.001,0.001) | |
model.file <- "model_bugs.txt" | |
model <- function(){ | |
for(i in 1:N){ | |
y[i] ~ dnorm(mu, tau) | |
} | |
mu ~ dnorm(0, 1.0E-8) | |
tau ~ dgamma(0.001, 0.001) | |
sigma2 <- 1 / tau | |
} | |
write.model(model=model, con=model.file) | |
inits <- function(nb.chains=1){ | |
return(lapply(1:nb.chains, function(c){list(mu=0, tau=1)})) | |
} | |
(st.r2ob <- system.time( | |
fit.r2ob <- bugs(data=list(N=N, y=y), | |
inits=inits(nb.chains), | |
parameters.to.save=c("mu", "sigma2"), | |
n.iter=nb.iters, | |
model.file=model.file, | |
n.chains=nb.chains, | |
n.burnin=0, | |
n.thin=1, | |
DIC=TRUE, | |
codaPkg=FALSE, # use "as.mcmc.list" | |
saveExec=TRUE, | |
restart=FALSE, # used when resampling | |
working.directory=getwd()) | |
)) | |
fit.r2ob | |
## convert output into a format ready for the "coda" package | |
res.r2ob <- as.mcmc.list(fit.r2ob) | |
## now, we have to perform diagnostics (see sub-section below) | |
## ... | |
## after diagnostics, sometimes, we may want to run more iterations: | |
fit.r2ob <- bugs(data=list(N=N, y=y), | |
inits=inits(nb.chains), | |
parameters.to.save=c("mu", "sigma2"), | |
n.iter=nb.iters, | |
model.file=model.file, | |
n.chains=nb.chains, | |
n.burnin=0, # don't change so that prev iterations are kept | |
n.thin=1, | |
DIC=TRUE, | |
codaPkg=FALSE, | |
saveExec=TRUE, | |
restart=TRUE, # this changed compare to the first call | |
working.directory=getwd()) | |
fit.r2ob # notice iterations from both first and second runs | |
## go back to the diagnostics, check if more iterations are needed | |
## if not, remove burn-in and thin if necessary | |
## and report posterior means and variances | |
## as well as their Monte Carlo standard errors | |
## ------------------------------------- | |
## Diagnostics and MCSEs | |
res <- res.r2ob # so that code of this sub-section works whatever the sampler | |
## plot diagnostics for a single chain | |
chain.idx <- 1 | |
par(mfcol=c(4,2)) | |
plot.mcmc.chain(res[[chain.idx]], "mu", | |
pe=truth["post.mean.mu"], hi=sqrt(truth["post.var.mu"])) | |
plot.mcmc.chain(res[[chain.idx]], "sigma2", | |
pe=truth["post.mean.sigma2"]) | |
par(mfrow=c(1,1)) | |
## conclusion: convergence seem to have been reached; no lag | |
## plot diagnostics for all chains but only for "mu" | |
par(mfcol=c(4,nb.chains)) | |
for(chain.idx in 1:nb.chains) | |
plot.mcmc.chain(res[[chain.idx]], "mu", | |
pe=truth["post.mean.mu"], hi=sqrt(truth["post.var.mu"])) | |
par(mfrow=c(1,1)) | |
## same conclusion per chain; same results across chains | |
## plot diagnostics for all chains but only for "sigma2" | |
par(mfcol=c(4,nb.chains)) | |
for(chain.idx in 1:nb.chains) | |
plot.mcmc.chain(res[[chain.idx]], "sigma2", | |
pe=truth["post.mean.sigma2"])#, hi=sqrt(post.var.sigma2)) | |
par(mfrow=c(1,1)) | |
## idem; note the skewness of the posterior of sigma2 | |
## plot trace for all chains and all parameters | |
par(mfrow=c(nb.chains,2)) | |
for(chain.idx in 1:nb.chains){ | |
plot.mcmc.chain(res[[chain.idx]], "mu", subplots=1, | |
pe=truth["post.mean.mu"], hi=sqrt(truth["post.var.mu"])) | |
plot.mcmc.chain(res[[chain.idx]], "sigma2", subplots=1, | |
pe=truth["post.mean.sigma2"])#, hi=sqrt(post.var.sigma2)) | |
} | |
## Gelman-Rubin R-hat | |
gelman.plot(res) | |
## non-graphical diagnostics | |
lapply(res, autocorr.diag) | |
t(sapply(res, effectiveSize)) | |
(gel <- gelman.diag(res)) | |
geweke.diag(res) | |
heidel.diag(res) | |
raftery.diag(res) | |
## at this step, it may be necessary to resample | |
## if yes, go back to the section of each sampler to see how it is done | |
## if this is still not enough, the model may have to be re-parametrized | |
## or even modified substantially, correlated variables may have to be updated | |
## together ("block"), auxiliary variables may have to be added ("slice"), | |
## try sophisticated schemes (e.g. parallel tempering) or even whole other | |
## methods (HMC, particle MCMC)... | |
## depending on the diagnostics above, apply burn-in and thinning | |
## but they are not always needed (Geyer, Handbook of MCMC, 2011, ch1) | |
burnin <- 1000 | |
thin <- 1 | |
res <- window(x=res, start=1 + burnin, end=nb.iters, thin=thin) | |
## check again with all chains pooled | |
autocorr.diag(res) | |
effectiveSize(res) | |
## summarize the inference | |
summary(res) | |
## mu: post.mean=73.705 ts.SE=0.0009981 | |
## sigma2: post.mean=2.417 ts.SE=0.0023422 | |
plot(res) | |
## estimate mean and Monte Carlo standard errors (MCSE) for "mu" and "sigma2" | |
(post.means <- mcse.mat(x=do.call(rbind, res)[,c("mu","sigma2")], | |
method="bm", g=NULL)) | |
## TODO: for MCSEs, check if it's right to pool draws from all chains | |
## see Flegal & Jones, Handbook of MCMC, 2011, ch7 | |
## but should we permute draws, as in "rstan"? | |
## TODO: estimate variance and MCSE for "mu" and "sigma2" | |
## calculate effective sample size with "mcmcse" | |
ess(do.call(rbind, res)[,c("mu","sigma2")]) | |
multiESS(do.call(rbind, res)[,c("mu","sigma2")]) | |
par(mfrow=c(1,2)) | |
estvssamp(do.call(rbind, res)[,"mu"]) | |
estvssamp(do.call(rbind, res)[,"sigma2"]) | |
## ---------------------------------------------------------------------------- | |
## III.2 Fit with JAGS | |
## priors: mu ~ N(0, 10^8) and sigma^2 ~ IG(0.001,0.001) | |
model.file <- "model_jags.txt" | |
model <- function(){ | |
for(i in 1:N){ | |
y[i] ~ dnorm(mu, tau) | |
} | |
mu ~ dnorm(0, 1.0E-8) | |
tau ~ dgamma(0.001, 0.001) | |
sigma2 <- 1 / tau | |
} | |
write.model(model=model, con=model.file) # useful fct from R2OpenBUGS | |
inits <- function(nb.chains=1){ | |
return(lapply(1:nb.chains, function(c){list(mu=0, tau=1)})) | |
} | |
jags <- jags.model(file=model.file, | |
data=list(N=N, y=y), | |
inits=inits(nb.chains), | |
n.chains=nb.chains, | |
n.adapt=0, | |
quiet=TRUE) | |
(st.rjags <- system.time( | |
res.rjags <- coda.samples(model=jags, variable.names=c("mu", "sigma2"), | |
n.iter=nb.iters, thin=1) | |
)) | |
## play with graphical diagnostics as for R2OpenBUGS | |
## ... | |
## re-sample if necessary | |
res.rjags <- coda.samples(model=jags, variable.names=c("mu", "sigma2"), | |
n.iter=nb.iters, thin=1) | |
## ---------------------------------------------------------------------------- | |
## III.3 Fit with Stan | |
## priors: mu ~ C(0, 5) and sigma ~ |C(0, 5)| | |
model <- "data { | |
int<lower=0> N; | |
vector[N] y; | |
} | |
parameters { | |
real mu; | |
real<lower=0> sigma; | |
} | |
transformed parameters { | |
real<lower=0> sigma2; | |
sigma2 <- sigma^2; | |
} | |
model { | |
sigma ~ cauchy(0, 5); // implicit half-Cauchy | |
mu ~ cauchy(0, 5); | |
y ~ normal(mu, sigma); | |
}" | |
model.file <- "model.stan" | |
write(x=model, file=model.file) | |
rt <- stanc(file=model.file, model_name="Simple Normal") | |
sm <- stan_model(stanc_ret=rt, verbose=FALSE) | |
save(sm, file="rstan_sm.RData") | |
load("rstan_sm.RData") | |
warmup <- 100 | |
st.rstan <- system.time( | |
fit.rstan <- sampling(object=sm, | |
data=list(N=N, y=y), | |
iter=nb.iters + warmup, | |
warmup=warmup, | |
thin=1, | |
chains=nb.chains) | |
) | |
traceplot(object=fit.rstan, ask=FALSE) | |
traceplot(object=fit.rstan, ask=FALSE, inc_warmup=FALSE) | |
traceplot(object=fit.rstan, pars="mu", ask=FALSE) | |
print(fit.rstan) | |
res.rstan <- as.mcmc.list( | |
lapply(1:dim(fit.rstan)[2], | |
function(chain.idx){ | |
as.mcmc(as.array(fit.rstan)[,chain.idx,]) | |
})) | |
## play with graphical diagnostics as for R2OpenBUGS | |
## ... | |
burnin <- 2000 | |
thin <- 1 | |
res.rstan <- window(x=res.rstan, start=1 + burnin, end=nb.iters-warmup, | |
thin=thin) | |
## ... | |
## TODO: show how to re-sample, starting at the current position of each chain | |
## ---------------------------------------------------------------------------- | |
## III.4 Fit with MCMCglmm | |
## priors: mu ~ N(0, 10^8) and sigma^2 ~ IG(0.001,0.001) | |
## TODO: set seeds for parallel chains? | |
(st.mcmcglmm <- system.time( | |
fit.mcmcglmm <- | |
mclapply(1:nb.chains, function(c){ | |
MCMCglmm(y ~ 1, data=data.frame(y=y), | |
prior=list(B=list(mu=0, V=10^8), | |
R=list(V=1, nu=0.002)), | |
nitt=nb.iters, thin=1, burnin=0, | |
verbose=FALSE) | |
}, mc.cores=nb.cores) | |
)) | |
res.mcmcglmm <- as.mcmc.list( | |
lapply(1:nb.chains, function(c){ | |
mcmc(data=cbind(fit.mcmcglmm[[c]]$Sol, | |
fit.mcmcglmm[[c]]$VCV)) | |
})) | |
## play with graphical diagnostics as for R2OpenBUGS | |
## ... | |
## TODO: show how to re-sample, starting at the current position of each chain |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment