Last active
August 29, 2015 14:26
-
-
Save richarddmorey/d1ba3d4ff63a968944c3 to your computer and use it in GitHub Desktop.
Utility code for "What Are the Odds? Modern Relevance and Bayes Factor Solutions for MacAlister's Problem from the 1881 Educational Times."
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ##################### | |
| # Code for computing probit model Bayes factors for | |
| # 2x2 contingency table data | |
| # Code by Richard D. Morey, July, 2015 | |
| # For: | |
| # "What Are the Odds? Modern Relevance and Bayes | |
| # Factor Solutions for MacAlister's Problem from the 1881 Educational Times." | |
| # Authors: Tahira Jamil, Maarten Marsman, Alexander Ly, | |
| # Richard D. Morey, Eric-Jan Wagenmakers | |
| ##################### | |
| logPost = Vectorize(function(mu, delta, y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), log = FALSE, logConst = 0, opt.pars = c(0,0)) | |
| { | |
| delta = delta + opt.pars[2] | |
| mu = mu + opt.pars[1] | |
| if( (delta<min(delta.lim)) | (delta>max(delta.lim)) ){ | |
| ans = -Inf | |
| }else{ | |
| area.delta = log(diff(pnorm(sort(delta.lim), mu_delta, sd_delta))) | |
| p1 = pnorm(mu - delta/2) | |
| p2 = pnorm(mu + delta/2) | |
| ans = dbinom(y[1], N[1], p1, log = TRUE) + | |
| dbinom(y[2], N[2], p2, log = TRUE) + | |
| dnorm(mu, mu_mu, sd_mu, log = TRUE) + | |
| dnorm(delta, mu_delta, sd_delta, log = TRUE) - area.delta - logConst | |
| } | |
| if(log){ | |
| return(ans) | |
| }else{ | |
| return(exp(ans)) | |
| } | |
| },"mu") | |
| logPost.pv = function(mu.and.delta, ...){ | |
| logPost(mu.and.delta[1], mu.and.delta[2], ...) | |
| } | |
| grad.logPost = function(mu.and.delta, y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), log=NA, opt.pars = c(0,0)) | |
| { | |
| delta = mu.and.delta[2] + opt.pars[2] | |
| mu = mu.and.delta[1] + opt.pars[1] | |
| if( (delta<min(delta.lim)) | (delta>max(delta.lim)) ){ | |
| ans = NA | |
| }else{ | |
| g1 = y[1]*dnorm(mu - delta/2)/pnorm(mu - delta/2) - (N[1] - y[1])*dnorm(delta/2-mu)/pnorm(delta/2 - mu) + | |
| y[2]*dnorm(mu + delta/2)/pnorm(mu + delta/2) - (N[2] - y[2])*dnorm(-delta/2-mu)/pnorm(-delta/2 - mu) + | |
| -1/sd_mu^2 * (mu - mu_mu) | |
| g2 = -y[1]*dnorm(mu - delta/2)/pnorm(mu - delta/2) + (N[1] - y[1])*dnorm(delta/2-mu)/pnorm(delta/2 - mu) + | |
| y[2]*dnorm(mu + delta/2)/pnorm(mu + delta/2) - (N[2] - y[2])*dnorm(-delta/2-mu)/pnorm(-delta/2 - mu) + | |
| -1/sd_delta^2 * (delta/2 - mu_delta) | |
| ans = c(g1, g2/2) | |
| } | |
| return(ans) | |
| } | |
| margDelta = Vectorize(function(delta, y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), log = FALSE, logConst = 0, opt.pars = c(0,0)) | |
| { | |
| delta = delta + opt.pars[2] | |
| logDensAtMax = logPost.pv(opt.pars, y=y, N=N, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, log = TRUE) | |
| if( (delta<min(delta.lim)) | (delta>max(delta.lim)) ){ | |
| log_ans = -Inf | |
| }else{ | |
| ans = integrate(logPost, -Inf, Inf, delta = delta, y=y, N=N, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim, logConst = logDensAtMax, opt.pars = opt.pars * c(1,0)) | |
| log_ans = log(ans[[1]]) - logConst + logDensAtMax | |
| } | |
| if(log){ | |
| return(log_ans) | |
| }else{ | |
| return(exp(log_ans)) | |
| } | |
| },"delta") | |
| normConst = function(y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), opt.pars = c(0,0)) | |
| { | |
| ans = integrate(margDelta, min(delta.lim)-opt.pars[2], max(delta.lim)-opt.pars[2], y=y, N=N, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, logConst = 0, delta.lim = delta.lim, opt.pars = opt.pars) | |
| log_ans = log(ans[[1]]) | |
| return(log_ans) | |
| } | |
| postMode.norestrict = function(y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1){ | |
| sol = optim(c(0,0), fn = logPost.pv, gr = grad.logPost, method="BFGS", control = list(fnscale=-1), hessian = TRUE, y=y, N=N, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, log = TRUE) | |
| list(par=sol$par, hessian=sol$hessian) | |
| } | |
| bfProbit<-function(y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), log = TRUE, opt.pars = c(0,0)){ | |
| area.delta = log(diff(pnorm(sort(delta.lim), mu_delta, sd_delta))) | |
| logConst = normConst(y=y, N=N, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim, opt.pars = opt.pars) | |
| logDensAtMax = logPost.onep(opt.pars[1], y=y, N=N, mu_mu = mu_mu, sd_mu = sd_mu, log = TRUE) | |
| nullLike = log(integrate(logPost.onep, -Inf, Inf, y=y, N=N, mu_mu = mu_mu, sd_mu = sd_mu, logConst = logDensAtMax, opt.pars = opt.pars[1])[[1]]) + logDensAtMax | |
| log.bf = nullLike - logConst | |
| if(log){ | |
| return(log.bf) | |
| }else{ | |
| return(exp(log.bf)) | |
| } | |
| } | |
| bfProbit.mcmc = function(y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), M = 10000){ | |
| require(mcmc) | |
| opt = postMode.norestrict(y, N, mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta) | |
| samps = metrop(logPost.pv, initial = opt$par, scale = 2*sqrt(diag(-solve(opt$hessian))),nbatch=M, | |
| y=y,N=N,mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta,log=TRUE) | |
| area.delta = log(diff(pnorm(sort(delta.lim), mu_delta, sd_delta))) | |
| log.bfr1 = log(mean(samps$batch[,2]>min(delta.lim) & samps$batch[,2]<max(delta.lim))) - area.delta | |
| log.bf01 = dnorm(0,mean(samps$batch[,2]), sd(samps$batch[,2]), log = TRUE) - dnorm(0,mu_delta,sd_delta,log=TRUE) | |
| return(list( | |
| bf = exp(log.bf01 - log.bfr1), | |
| logbf = log.bf01 - log.bfr1, | |
| samples = samps | |
| )) | |
| } | |
| bfProbit.importance = function(y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), log = TRUE, M = 10000){ | |
| ml1 = norm.null.importance(y, N, mu_mu, sd_mu, M=M) | |
| ml2 = norm.alt.importance(y, N, mu_mu, sd_mu, mu_delta, sd_delta, delta.lim = delta.lim, M = M) | |
| return(c( | |
| bf = ml1[1] / ml2[1], | |
| logbf = log(ml1[1] / ml2[1]), | |
| properror = sqrt((ml1[2]/ml1[1])^2 + (ml2[2]/ml2[1])^2), | |
| error = sqrt((ml1[2]/ml1[1])^2 + (ml2[2]/ml2[1])^2) * (ml1[1] / ml2[1]) | |
| )) | |
| } | |
| dps.p = Vectorize(function(p1, p2, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf)){ | |
| mu = (qnorm(p1) + qnorm(p2))/2 | |
| delta = (qnorm(p2) - qnorm(p1)) | |
| if( (delta < min(delta.lim)) | (delta > max(delta.lim))){ | |
| return(0) | |
| }else{ | |
| area.delta = log(diff(pnorm(sort(delta.lim), mu_delta, sd_delta))) | |
| ans = dnorm(mu, mu_mu, sd_mu, log = TRUE) + | |
| dnorm(delta, mu_delta, sd_delta, log = TRUE) - area.delta + | |
| -dnorm(qnorm(p1), log=TRUE) - dnorm(qnorm(p2), log=TRUE) | |
| } | |
| return(exp(ans)) | |
| },c("p1","p2")) | |
| marg.prior.p2 = Vectorize(function(p2, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf)){ | |
| integrate(dps.p, 0, 1, p2 = p2, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim)[[1]] | |
| },"p2") | |
| marg.prior.p1 = Vectorize(function(p1, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf)){ | |
| dps2 = function(p2, p1, ...) dps.p(p1, p2, ...) | |
| integrate(dps2, 0, 1, p1 = p1, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim)[[1]] | |
| },"p1") | |
| normP = function(mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf)){ | |
| integrate(marg.prior.p2, 0, 1, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim)[[1]] | |
| } | |
| limitsP = function(delta.lim){ | |
| c1 = min(delta.lim)*2 | |
| c2 = max(delta.lim)*2 | |
| p.lim = c(NA,NA) | |
| if(c1>=0){ | |
| p.lim[1] = 2*pnorm(c1) - 1 | |
| }else{ | |
| p.lim[1] = -(2*pnorm(-c1) - 1) | |
| } | |
| if(c2>=0){ | |
| p.lim[2] = 2*pnorm(c2) - 1 | |
| }else{ | |
| p.lim[2] = -(2*pnorm(-c2) - 1) | |
| } | |
| if(max(p.lim)<0) p.lim[2] = 0 | |
| if(min(p.lim)>0) p.lim[1] = 0 | |
| return(p.lim) | |
| } | |
| dps = Vectorize(function(mn, df, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf)){ | |
| if( (abs(df/2)>min(mn,1-mn)) ) return(0*mn) | |
| if(df>=1) return(0*mn) | |
| p1 = mn - .5*df | |
| p2 = mn + .5*df | |
| dps.p(p1, p2, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim) | |
| },c("mn","df")) | |
| marg.prior.df = Vectorize(function(df, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf)){ | |
| integrate(dps, .0001, .9999, df = df, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim)[[1]] | |
| },"df") | |
| marg.prior.mn = Vectorize(function(mn, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf)){ | |
| dps2 = function(df, mn, ...) dps(mn, df, ...) | |
| integrate(dps2, limitsP(delta.lim)[1]+.0001,limitsP(delta.lim)[2]-.0001, mn = mn, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim)[[1]] | |
| },"mn") | |
| logPost.onep = Vectorize(function(mu, y, N, mu_mu = 0, sd_mu = 1, log = FALSE, logConst = 0, opt.pars = c(0,0)) | |
| { | |
| mu = mu + opt.pars[1] | |
| p = pnorm(mu) | |
| ans = dbinom(y[1], N[1], p, log = TRUE) + | |
| dbinom(y[2], N[2], p, log = TRUE) + | |
| dnorm(mu, mu_mu, sd_mu, log = TRUE) - logConst | |
| if(log){ | |
| return(ans) | |
| }else{ | |
| return(exp(ans)) | |
| } | |
| }, "mu") | |
| norm.alt.importance = function(y=y, N=N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), M = 1000){ | |
| opt.par = c(mean(qnorm(y/N)), diff(qnorm(y/N))) | |
| post.sd.delta = sqrt(sum((y/N*(1-y/N)/N)/dnorm(qnorm(y/N))^2)) | |
| samp.mu = rnorm(M, opt.par[1], post.sd.delta) | |
| samp.delta = rnorm(M, opt.par[2], post.sd.delta) | |
| Z = apply(cbind(samp.mu, samp.delta), 1, logPost.pv, y=y, N=N, mu_mu = mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta, delta.lim = delta.lim, log = TRUE) + | |
| -dnorm(samp.mu, opt.par[1], post.sd.delta, log = TRUE) + | |
| -dnorm(samp.delta, opt.par[2], post.sd.delta, log = TRUE) | |
| return(c(mean = mean(exp(Z)), sd.err = sd(exp(Z))/sqrt(M))) | |
| } | |
| norm.null.importance = function(y=y, N=N, mu_mu = 0, sd_mu = 1, M = 1000){ | |
| phat = sum(y)/sum(N) | |
| post.sd.delta = sqrt(sum((phat*(1-phat)/sum(N))/dnorm(qnorm(phat))^2)) | |
| samp = rnorm(M, qnorm(phat), post.sd.delta) | |
| Z = logPost.onep(samp, y, N, mu_mu = mu_mu, sd_mu = sd_mu, log = TRUE) - dnorm(samp, qnorm(phat), post.sd.delta, log = TRUE) | |
| return(c(mean = mean(exp(Z)), sd.err = sd(exp(Z))/sqrt(M))) | |
| } | |
| bfProbit.laplace = function(y, N, mu_mu = 0, sd_mu = 1, mu_delta = 0, sd_delta = 1, delta.lim = c(-Inf, Inf), log = TRUE, method = "optim"){ | |
| if(method == "optim"){ | |
| opt = postMode.norestrict(y, N, mu_mu, sd_mu = sd_mu, mu_delta = mu_delta, sd_delta = sd_delta) | |
| post.mean = opt$par[2] | |
| post.sd = sqrt(-solve(opt$hessian)[2,2]) | |
| }else if(method == "delta"){ | |
| post.mean = diff(qnorm(y/N)) | |
| post.sd = sqrt(sum((y/N*(1-y/N)/N)/dnorm(qnorm(y/N))^2)) | |
| }else{ | |
| stop("Unknown method.") | |
| } | |
| area.delta = log(diff(pnorm(sort(delta.lim), mu_delta, sd_delta))) | |
| log.bfr1 = log(diff(pnorm(sort(delta.lim), post.mean,post.sd))) - area.delta | |
| log.bf01 = dnorm(0,post.mean,post.sd, log = TRUE) - dnorm(0, mu_delta, sd_delta, log = TRUE) | |
| if(log){ | |
| return(log.bf01 - log.bfr1) | |
| }else{ | |
| return(exp(log.bf01 - log.bfr1)) | |
| } | |
| } | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment