Last active
August 29, 2015 14:26
-
-
Save richarddmorey/d1ba3d4ff63a968944c3 to your computer and use it in GitHub Desktop.
Utility code for "What Are the Odds? Modern Relevance and Bayes Factor Solutions for MacAlister's Problem from the 1881 Educational Times."
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##################### | |
# Code for computing probit model Bayes factors for | |
# 2x2 contingency table data | |
# Code by Richard D. Morey, July, 2015 | |
# For: | |
# "What Are the Odds? Modern Relevance and Bayes | |
# Factor Solutions for MacAlister's Problem from the 1881 Educational Times." | |
# Authors: Tahira Jamil, Maarten Marsman, Alexander Ly, | |
# Richard D. Morey, Eric-Jan Wagenmakers | |
##################### | |
logPost = Vectorize(function(mu, delta, y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), log = FALSE, logConst = 0, opt.pars = c(0,0)) | |
{ | |
delta = delta + opt.pars[2] | |
mu = mu + opt.pars[1] | |
if( (delta<min(delta.lim)) | (delta>max(delta.lim)) ){ | |
ans = -Inf | |
}else{ | |
area.delta = log(diff(pnorm(sort(delta.lim), mu_delta, sd_delta))) | |
p1 = pnorm(mu - delta/2) | |
p2 = pnorm(mu + delta/2) | |
ans = dbinom(y[1], N[1], p1, log = TRUE) + | |
dbinom(y[2], N[2], p2, log = TRUE) + | |
dnorm(mu, mu_mu, sd_mu, log = TRUE) + | |
dnorm(delta, mu_delta, sd_delta, log = TRUE) - area.delta - logConst | |
} | |
if(log){ | |
return(ans) | |
}else{ | |
return(exp(ans)) | |
} | |
},"mu") | |
logPost.pv = function(mu.and.delta, ...){ | |
logPost(mu.and.delta[1], mu.and.delta[2], ...) | |
} | |
grad.logPost = function(mu.and.delta, y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), log=NA, opt.pars = c(0,0)) | |
{ | |
delta = mu.and.delta[2] + opt.pars[2] | |
mu = mu.and.delta[1] + opt.pars[1] | |
if( (delta<min(delta.lim)) | (delta>max(delta.lim)) ){ | |
ans = NA | |
}else{ | |
g1 = y[1]*dnorm(mu - delta/2)/pnorm(mu - delta/2) - (N[1] - y[1])*dnorm(delta/2-mu)/pnorm(delta/2 - mu) + | |
y[2]*dnorm(mu + delta/2)/pnorm(mu + delta/2) - (N[2] - y[2])*dnorm(-delta/2-mu)/pnorm(-delta/2 - mu) + | |
-1/sd_mu^2 * (mu - mu_mu) | |
g2 = -y[1]*dnorm(mu - delta/2)/pnorm(mu - delta/2) + (N[1] - y[1])*dnorm(delta/2-mu)/pnorm(delta/2 - mu) + | |
y[2]*dnorm(mu + delta/2)/pnorm(mu + delta/2) - (N[2] - y[2])*dnorm(-delta/2-mu)/pnorm(-delta/2 - mu) + | |
-1/sd_delta^2 * (delta/2 - mu_delta) | |
ans = c(g1, g2/2) | |
} | |
return(ans) | |
} | |
margDelta = Vectorize(function(delta, y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), log = FALSE, logConst = 0, opt.pars = c(0,0)) | |
{ | |
delta = delta + opt.pars[2] | |
logDensAtMax = logPost.pv(opt.pars, y=y, N=N, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, log = TRUE) | |
if( (delta<min(delta.lim)) | (delta>max(delta.lim)) ){ | |
log_ans = -Inf | |
}else{ | |
ans = integrate(logPost, -Inf, Inf, delta = delta, y=y, N=N, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim, logConst = logDensAtMax, opt.pars = opt.pars * c(1,0)) | |
log_ans = log(ans[[1]]) - logConst + logDensAtMax | |
} | |
if(log){ | |
return(log_ans) | |
}else{ | |
return(exp(log_ans)) | |
} | |
},"delta") | |
normConst = function(y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), opt.pars = c(0,0)) | |
{ | |
ans = integrate(margDelta, min(delta.lim)-opt.pars[2], max(delta.lim)-opt.pars[2], y=y, N=N, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, logConst = 0, delta.lim = delta.lim, opt.pars = opt.pars) | |
log_ans = log(ans[[1]]) | |
return(log_ans) | |
} | |
postMode.norestrict = function(y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1){ | |
sol = optim(c(0,0), fn = logPost.pv, gr = grad.logPost, method="BFGS", control = list(fnscale=-1), hessian = TRUE, y=y, N=N, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, log = TRUE) | |
list(par=sol$par, hessian=sol$hessian) | |
} | |
bfProbit<-function(y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), log = TRUE, opt.pars = c(0,0)){ | |
area.delta = log(diff(pnorm(sort(delta.lim), mu_delta, sd_delta))) | |
logConst = normConst(y=y, N=N, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim, opt.pars = opt.pars) | |
logDensAtMax = logPost.onep(opt.pars[1], y=y, N=N, mu_mu = mu_mu, sd_mu = sd_mu, log = TRUE) | |
nullLike = log(integrate(logPost.onep, -Inf, Inf, y=y, N=N, mu_mu = mu_mu, sd_mu = sd_mu, logConst = logDensAtMax, opt.pars = opt.pars[1])[[1]]) + logDensAtMax | |
log.bf = nullLike - logConst | |
if(log){ | |
return(log.bf) | |
}else{ | |
return(exp(log.bf)) | |
} | |
} | |
bfProbit.mcmc = function(y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), M = 10000){ | |
require(mcmc) | |
opt = postMode.norestrict(y, N, mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta) | |
samps = metrop(logPost.pv, initial = opt$par, scale = 2*sqrt(diag(-solve(opt$hessian))),nbatch=M, | |
y=y,N=N,mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta,log=TRUE) | |
area.delta = log(diff(pnorm(sort(delta.lim), mu_delta, sd_delta))) | |
log.bfr1 = log(mean(samps$batch[,2]>min(delta.lim) & samps$batch[,2]<max(delta.lim))) - area.delta | |
log.bf01 = dnorm(0,mean(samps$batch[,2]), sd(samps$batch[,2]), log = TRUE) - dnorm(0,mu_delta,sd_delta,log=TRUE) | |
return(list( | |
bf = exp(log.bf01 - log.bfr1), | |
logbf = log.bf01 - log.bfr1, | |
samples = samps | |
)) | |
} | |
bfProbit.importance = function(y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), log = TRUE, M = 10000){ | |
ml1 = norm.null.importance(y, N, mu_mu, sd_mu, M=M) | |
ml2 = norm.alt.importance(y, N, mu_mu, sd_mu, mu_delta, sd_delta, delta.lim = delta.lim, M = M) | |
return(c( | |
bf = ml1[1] / ml2[1], | |
logbf = log(ml1[1] / ml2[1]), | |
properror = sqrt((ml1[2]/ml1[1])^2 + (ml2[2]/ml2[1])^2), | |
error = sqrt((ml1[2]/ml1[1])^2 + (ml2[2]/ml2[1])^2) * (ml1[1] / ml2[1]) | |
)) | |
} | |
dps.p = Vectorize(function(p1, p2, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf)){ | |
mu = (qnorm(p1) + qnorm(p2))/2 | |
delta = (qnorm(p2) - qnorm(p1)) | |
if( (delta < min(delta.lim)) | (delta > max(delta.lim))){ | |
return(0) | |
}else{ | |
area.delta = log(diff(pnorm(sort(delta.lim), mu_delta, sd_delta))) | |
ans = dnorm(mu, mu_mu, sd_mu, log = TRUE) + | |
dnorm(delta, mu_delta, sd_delta, log = TRUE) - area.delta + | |
-dnorm(qnorm(p1), log=TRUE) - dnorm(qnorm(p2), log=TRUE) | |
} | |
return(exp(ans)) | |
},c("p1","p2")) | |
marg.prior.p2 = Vectorize(function(p2, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf)){ | |
integrate(dps.p, 0, 1, p2 = p2, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim)[[1]] | |
},"p2") | |
marg.prior.p1 = Vectorize(function(p1, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf)){ | |
dps2 = function(p2, p1, ...) dps.p(p1, p2, ...) | |
integrate(dps2, 0, 1, p1 = p1, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim)[[1]] | |
},"p1") | |
normP = function(mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf)){ | |
integrate(marg.prior.p2, 0, 1, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim)[[1]] | |
} | |
limitsP = function(delta.lim){ | |
c1 = min(delta.lim)*2 | |
c2 = max(delta.lim)*2 | |
p.lim = c(NA,NA) | |
if(c1>=0){ | |
p.lim[1] = 2*pnorm(c1) - 1 | |
}else{ | |
p.lim[1] = -(2*pnorm(-c1) - 1) | |
} | |
if(c2>=0){ | |
p.lim[2] = 2*pnorm(c2) - 1 | |
}else{ | |
p.lim[2] = -(2*pnorm(-c2) - 1) | |
} | |
if(max(p.lim)<0) p.lim[2] = 0 | |
if(min(p.lim)>0) p.lim[1] = 0 | |
return(p.lim) | |
} | |
dps = Vectorize(function(mn, df, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf)){ | |
if( (abs(df/2)>min(mn,1-mn)) ) return(0*mn) | |
if(df>=1) return(0*mn) | |
p1 = mn - .5*df | |
p2 = mn + .5*df | |
dps.p(p1, p2, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim) | |
},c("mn","df")) | |
marg.prior.df = Vectorize(function(df, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf)){ | |
integrate(dps, .0001, .9999, df = df, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim)[[1]] | |
},"df") | |
marg.prior.mn = Vectorize(function(mn, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf)){ | |
dps2 = function(df, mn, ...) dps(mn, df, ...) | |
integrate(dps2, limitsP(delta.lim)[1]+.0001,limitsP(delta.lim)[2]-.0001, mn = mn, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim)[[1]] | |
},"mn") | |
logPost.onep = Vectorize(function(mu, y, N, mu_mu = 0, sd_mu = 1, log = FALSE, logConst = 0, opt.pars = c(0,0)) | |
{ | |
mu = mu + opt.pars[1] | |
p = pnorm(mu) | |
ans = dbinom(y[1], N[1], p, log = TRUE) + | |
dbinom(y[2], N[2], p, log = TRUE) + | |
dnorm(mu, mu_mu, sd_mu, log = TRUE) - logConst | |
if(log){ | |
return(ans) | |
}else{ | |
return(exp(ans)) | |
} | |
}, "mu") | |
norm.alt.importance = function(y=y, N=N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), M = 1000){ | |
opt.par = c(mean(qnorm(y/N)), diff(qnorm(y/N))) | |
post.sd.delta = sqrt(sum((y/N*(1-y/N)/N)/dnorm(qnorm(y/N))^2)) | |
samp.mu = rnorm(M, opt.par[1], post.sd.delta) | |
samp.delta = rnorm(M, opt.par[2], post.sd.delta) | |
Z = apply(cbind(samp.mu, samp.delta), 1, logPost.pv, y=y, N=N, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim, log = TRUE) + | |
-dnorm(samp.mu, opt.par[1], post.sd.delta, log = TRUE) + | |
-dnorm(samp.delta, opt.par[2], post.sd.delta, log = TRUE) | |
return(c(mean = mean(exp(Z)), sd.err = sd(exp(Z))/sqrt(M))) | |
} | |
norm.null.importance = function(y=y, N=N, mu_mu = 0, sd_mu = 1, M = 1000){ | |
phat = sum(y)/sum(N) | |
post.sd.delta = sqrt(sum((phat*(1-phat)/sum(N))/dnorm(qnorm(phat))^2)) | |
samp = rnorm(M, qnorm(phat), post.sd.delta) | |
Z = logPost.onep(samp, y, N, mu_mu = mu_mu, sd_mu = sd_mu, log = TRUE) - dnorm(samp, qnorm(phat), post.sd.delta, log = TRUE) | |
return(c(mean = mean(exp(Z)), sd.err = sd(exp(Z))/sqrt(M))) | |
} | |
bfProbit.laplace = function(y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), log = TRUE, method = "optim"){ | |
if(method == "optim"){ | |
opt = postMode.norestrict(y, N, mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta) | |
post.mean = opt$par[2] | |
post.sd = sqrt(-solve(opt$hessian)[2,2]) | |
}else if(method == "delta"){ | |
post.mean = diff(qnorm(y/N)) | |
post.sd = sqrt(sum((y/N*(1-y/N)/N)/dnorm(qnorm(y/N))^2)) | |
}else{ | |
stop("Unknown method.") | |
} | |
area.delta = log(diff(pnorm(sort(delta.lim), mu_delta, sd_delta))) | |
log.bfr1 = log(diff(pnorm(sort(delta.lim), post.mean,post.sd))) - area.delta | |
log.bf01 = dnorm(0,post.mean,post.sd, log = TRUE) - dnorm(0, mu_delta, sd_delta, log = TRUE) | |
if(log){ | |
return(log.bf01 - log.bfr1) | |
}else{ | |
return(exp(log.bf01 - log.bfr1)) | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment