Skip to content

Instantly share code, notes, and snippets.

@richarddmorey
Last active August 29, 2015 14:26
Show Gist options
  • Save richarddmorey/d1ba3d4ff63a968944c3 to your computer and use it in GitHub Desktop.
Save richarddmorey/d1ba3d4ff63a968944c3 to your computer and use it in GitHub Desktop.
Utility code for "What Are the Odds? Modern Relevance and Bayes Factor Solutions for MacAlister's Problem from the 1881 Educational Times."
#####################
# Code for computing probit model Bayes factors for
# 2x2 contingency table data
# Code by Richard D. Morey, July, 2015
# For:
# "What Are the Odds? Modern Relevance and Bayes
# Factor Solutions for MacAlister's Problem from the 1881 Educational Times."
# Authors: Tahira Jamil, Maarten Marsman, Alexander Ly,
# Richard D. Morey, Eric-Jan Wagenmakers
#####################
logPost = Vectorize(function(mu, delta, y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), log = FALSE, logConst = 0, opt.pars = c(0,0))
{
delta = delta + opt.pars[2]
mu = mu + opt.pars[1]
if( (delta<min(delta.lim)) | (delta>max(delta.lim)) ){
ans = -Inf
}else{
area.delta = log(diff(pnorm(sort(delta.lim), mu_delta, sd_delta)))
p1 = pnorm(mu - delta/2)
p2 = pnorm(mu + delta/2)
ans = dbinom(y[1], N[1], p1, log = TRUE) +
dbinom(y[2], N[2], p2, log = TRUE) +
dnorm(mu, mu_mu, sd_mu, log = TRUE) +
dnorm(delta, mu_delta, sd_delta, log = TRUE) - area.delta - logConst
}
if(log){
return(ans)
}else{
return(exp(ans))
}
},"mu")
logPost.pv = function(mu.and.delta, ...){
logPost(mu.and.delta[1], mu.and.delta[2], ...)
}
grad.logPost = function(mu.and.delta, y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), log=NA, opt.pars = c(0,0))
{
delta = mu.and.delta[2] + opt.pars[2]
mu = mu.and.delta[1] + opt.pars[1]
if( (delta<min(delta.lim)) | (delta>max(delta.lim)) ){
ans = NA
}else{
g1 = y[1]*dnorm(mu - delta/2)/pnorm(mu - delta/2) - (N[1] - y[1])*dnorm(delta/2-mu)/pnorm(delta/2 - mu) +
y[2]*dnorm(mu + delta/2)/pnorm(mu + delta/2) - (N[2] - y[2])*dnorm(-delta/2-mu)/pnorm(-delta/2 - mu) +
-1/sd_mu^2 * (mu - mu_mu)
g2 = -y[1]*dnorm(mu - delta/2)/pnorm(mu - delta/2) + (N[1] - y[1])*dnorm(delta/2-mu)/pnorm(delta/2 - mu) +
y[2]*dnorm(mu + delta/2)/pnorm(mu + delta/2) - (N[2] - y[2])*dnorm(-delta/2-mu)/pnorm(-delta/2 - mu) +
-1/sd_delta^2 * (delta/2 - mu_delta)
ans = c(g1, g2/2)
}
return(ans)
}
margDelta = Vectorize(function(delta, y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), log = FALSE, logConst = 0, opt.pars = c(0,0))
{
delta = delta + opt.pars[2]
logDensAtMax = logPost.pv(opt.pars, y=y, N=N, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, log = TRUE)
if( (delta<min(delta.lim)) | (delta>max(delta.lim)) ){
log_ans = -Inf
}else{
ans = integrate(logPost, -Inf, Inf, delta = delta, y=y, N=N, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim, logConst = logDensAtMax, opt.pars = opt.pars * c(1,0))
log_ans = log(ans[[1]]) - logConst + logDensAtMax
}
if(log){
return(log_ans)
}else{
return(exp(log_ans))
}
},"delta")
normConst = function(y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), opt.pars = c(0,0))
{
ans = integrate(margDelta, min(delta.lim)-opt.pars[2], max(delta.lim)-opt.pars[2], y=y, N=N, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, logConst = 0, delta.lim = delta.lim, opt.pars = opt.pars)
log_ans = log(ans[[1]])
return(log_ans)
}
postMode.norestrict = function(y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1){
sol = optim(c(0,0), fn = logPost.pv, gr = grad.logPost, method="BFGS", control = list(fnscale=-1), hessian = TRUE, y=y, N=N, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, log = TRUE)
list(par=sol$par, hessian=sol$hessian)
}
bfProbit<-function(y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), log = TRUE, opt.pars = c(0,0)){
area.delta = log(diff(pnorm(sort(delta.lim), mu_delta, sd_delta)))
logConst = normConst(y=y, N=N, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim, opt.pars = opt.pars)
logDensAtMax = logPost.onep(opt.pars[1], y=y, N=N, mu_mu = mu_mu, sd_mu = sd_mu, log = TRUE)
nullLike = log(integrate(logPost.onep, -Inf, Inf, y=y, N=N, mu_mu = mu_mu, sd_mu = sd_mu, logConst = logDensAtMax, opt.pars = opt.pars[1])[[1]]) + logDensAtMax
log.bf = nullLike - logConst
if(log){
return(log.bf)
}else{
return(exp(log.bf))
}
}
bfProbit.mcmc = function(y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), M = 10000){
require(mcmc)
opt = postMode.norestrict(y, N, mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta)
samps = metrop(logPost.pv, initial = opt$par, scale = 2*sqrt(diag(-solve(opt$hessian))),nbatch=M,
y=y,N=N,mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta,log=TRUE)
area.delta = log(diff(pnorm(sort(delta.lim), mu_delta, sd_delta)))
log.bfr1 = log(mean(samps$batch[,2]>min(delta.lim) & samps$batch[,2]<max(delta.lim))) - area.delta
log.bf01 = dnorm(0,mean(samps$batch[,2]), sd(samps$batch[,2]), log = TRUE) - dnorm(0,mu_delta,sd_delta,log=TRUE)
return(list(
bf = exp(log.bf01 - log.bfr1),
logbf = log.bf01 - log.bfr1,
samples = samps
))
}
bfProbit.importance = function(y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), log = TRUE, M = 10000){
ml1 = norm.null.importance(y, N, mu_mu, sd_mu, M=M)
ml2 = norm.alt.importance(y, N, mu_mu, sd_mu, mu_delta, sd_delta, delta.lim = delta.lim, M = M)
return(c(
bf = ml1[1] / ml2[1],
logbf = log(ml1[1] / ml2[1]),
properror = sqrt((ml1[2]/ml1[1])^2 + (ml2[2]/ml2[1])^2),
error = sqrt((ml1[2]/ml1[1])^2 + (ml2[2]/ml2[1])^2) * (ml1[1] / ml2[1])
))
}
dps.p = Vectorize(function(p1, p2, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf)){
mu = (qnorm(p1) + qnorm(p2))/2
delta = (qnorm(p2) - qnorm(p1))
if( (delta < min(delta.lim)) | (delta > max(delta.lim))){
return(0)
}else{
area.delta = log(diff(pnorm(sort(delta.lim), mu_delta, sd_delta)))
ans = dnorm(mu, mu_mu, sd_mu, log = TRUE) +
dnorm(delta, mu_delta, sd_delta, log = TRUE) - area.delta +
-dnorm(qnorm(p1), log=TRUE) - dnorm(qnorm(p2), log=TRUE)
}
return(exp(ans))
},c("p1","p2"))
marg.prior.p2 = Vectorize(function(p2, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf)){
integrate(dps.p, 0, 1, p2 = p2, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim)[[1]]
},"p2")
marg.prior.p1 = Vectorize(function(p1, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf)){
dps2 = function(p2, p1, ...) dps.p(p1, p2, ...)
integrate(dps2, 0, 1, p1 = p1, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim)[[1]]
},"p1")
normP = function(mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf)){
integrate(marg.prior.p2, 0, 1, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim)[[1]]
}
limitsP = function(delta.lim){
c1 = min(delta.lim)*2
c2 = max(delta.lim)*2
p.lim = c(NA,NA)
if(c1>=0){
p.lim[1] = 2*pnorm(c1) - 1
}else{
p.lim[1] = -(2*pnorm(-c1) - 1)
}
if(c2>=0){
p.lim[2] = 2*pnorm(c2) - 1
}else{
p.lim[2] = -(2*pnorm(-c2) - 1)
}
if(max(p.lim)<0) p.lim[2] = 0
if(min(p.lim)>0) p.lim[1] = 0
return(p.lim)
}
dps = Vectorize(function(mn, df, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf)){
if( (abs(df/2)>min(mn,1-mn)) ) return(0*mn)
if(df>=1) return(0*mn)
p1 = mn - .5*df
p2 = mn + .5*df
dps.p(p1, p2, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim)
},c("mn","df"))
marg.prior.df = Vectorize(function(df, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf)){
integrate(dps, .0001, .9999, df = df, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim)[[1]]
},"df")
marg.prior.mn = Vectorize(function(mn, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf)){
dps2 = function(df, mn, ...) dps(mn, df, ...)
integrate(dps2, limitsP(delta.lim)[1]+.0001,limitsP(delta.lim)[2]-.0001, mn = mn, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim)[[1]]
},"mn")
logPost.onep = Vectorize(function(mu, y, N, mu_mu = 0, sd_mu = 1, log = FALSE, logConst = 0, opt.pars = c(0,0))
{
mu = mu + opt.pars[1]
p = pnorm(mu)
ans = dbinom(y[1], N[1], p, log = TRUE) +
dbinom(y[2], N[2], p, log = TRUE) +
dnorm(mu, mu_mu, sd_mu, log = TRUE) - logConst
if(log){
return(ans)
}else{
return(exp(ans))
}
}, "mu")
norm.alt.importance = function(y=y, N=N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), M = 1000){
opt.par = c(mean(qnorm(y/N)), diff(qnorm(y/N)))
post.sd.delta = sqrt(sum((y/N*(1-y/N)/N)/dnorm(qnorm(y/N))^2))
samp.mu = rnorm(M, opt.par[1], post.sd.delta)
samp.delta = rnorm(M, opt.par[2], post.sd.delta)
Z = apply(cbind(samp.mu, samp.delta), 1, logPost.pv, y=y, N=N, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim, log = TRUE) +
-dnorm(samp.mu, opt.par[1], post.sd.delta, log = TRUE) +
-dnorm(samp.delta, opt.par[2], post.sd.delta, log = TRUE)
return(c(mean = mean(exp(Z)), sd.err = sd(exp(Z))/sqrt(M)))
}
norm.null.importance = function(y=y, N=N, mu_mu = 0, sd_mu = 1, M = 1000){
phat = sum(y)/sum(N)
post.sd.delta = sqrt(sum((phat*(1-phat)/sum(N))/dnorm(qnorm(phat))^2))
samp = rnorm(M, qnorm(phat), post.sd.delta)
Z = logPost.onep(samp, y, N, mu_mu = mu_mu, sd_mu = sd_mu, log = TRUE) - dnorm(samp, qnorm(phat), post.sd.delta, log = TRUE)
return(c(mean = mean(exp(Z)), sd.err = sd(exp(Z))/sqrt(M)))
}
bfProbit.laplace = function(y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), log = TRUE, method = "optim"){
if(method == "optim"){
opt = postMode.norestrict(y, N, mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta)
post.mean = opt$par[2]
post.sd = sqrt(-solve(opt$hessian)[2,2])
}else if(method == "delta"){
post.mean = diff(qnorm(y/N))
post.sd = sqrt(sum((y/N*(1-y/N)/N)/dnorm(qnorm(y/N))^2))
}else{
stop("Unknown method.")
}
area.delta = log(diff(pnorm(sort(delta.lim), mu_delta, sd_delta)))
log.bfr1 = log(diff(pnorm(sort(delta.lim), post.mean,post.sd))) - area.delta
log.bf01 = dnorm(0,post.mean,post.sd, log = TRUE) - dnorm(0, mu_delta, sd_delta, log = TRUE)
if(log){
return(log.bf01 - log.bfr1)
}else{
return(exp(log.bf01 - log.bfr1))
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment