Created
December 31, 2014 13:43
-
-
Save fdabl/eaa55a6124416b3999c4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library('coda') | |
library('BayesFactor') | |
set.seed(1774) # Laplace easter egg :) | |
contains_zero <- function(interval) { | |
return(interval[1] < 0 && interval[2] > 0) | |
} | |
sim <- function(diff = 0, n = 500) { | |
m <- 100 | |
sd <- 15 | |
x <- rnorm(n, mean = m, sd = sd) | |
y <- rnorm(n, mean = m + diff, sd = sd) | |
# delta will be negative! | |
freq_test <- t.test(x, y) | |
bayes_test <- ttestBF(x, y) | |
bf <- as.vector(bayes_test) | |
samples <- posterior(bayes_test, iterations = 1000, progress = FALSE) | |
cred_interval <- as.vector(HPDinterval(samples)[4, ]) # CI around delta, the difference | |
return(list('bf' = bf, 'cred_interval' = cred_interval, | |
'p' = freq_test$p.value, 'conf_interval' = freq_test$conf.int)) | |
} | |
run <- function(diff = 0, n = 500, times = 2000, bf_cutoff = 3, alpha = .05) { | |
p_err <- 0 | |
bf_err <- 0 | |
cred_err <- 0 | |
conf_err <- 0 | |
freq_contr <- 0 | |
bayes_contr <- 0 | |
for (i in 1:times) { | |
simulation <- sim(diff, n) | |
p <- simulation$p | |
bf <- simulation$bf | |
cred0 <- contains_zero(simulation$cred_interval) | |
conf0 <- contains_zero(simulation$conf_interval) | |
# if there is no difference, check if a false positive occurred [bf >= cut-off; cred0 == FALSE] | |
# if there is a difference, check if a false negative occured [bf < cut-off; cred0 == TRUE] | |
p_erred <- if (diff == 0) p <= alpha else p > alpha | |
bf_erred <- if (diff == 0) bf >= bf_cutoff else bf < bf_cutoff | |
cred_erred <- if (diff == 0) !cred0 else cred0 | |
conf_erred <- if (diff == 0) !conf0 else conf0 | |
# assess errors | |
if (p_erred) p_err <- p_err + 1 | |
if (bf_erred) bf_err <- bf_err + 1 | |
if (cred_erred) cred_err <- cred_err + 1 | |
if (conf_erred) conf_err <- conf_err + 1 | |
# assess contradicting results | |
if (p_erred != conf_erred) | |
freq_contr <- freq_contr + 1 | |
if (bf_erred != cred_erred) | |
bayes_contr <- bayes_contr + 1 | |
} | |
res <-list('bf' = bf_err, 'cred' = cred_err, 'p' = p_err, 'conf' = conf_err, | |
'bayes_contr' = bayes_contr, 'freq_contr' = freq_contr) | |
return(lapply(res, function(e) e / times)) | |
} | |
# RESULTS: | |
# bayes: crebility intervals seem to be biased towards H1 (bayes factors not); that's because | |
# they don't control the type I error rate | |
# freq: p-values and confidence intervals reach equivalent conclusions (in this case, trivially so) | |
# the simulations compare the result of model comparison via Bayes Factors / p-values and | |
# interval estimation; A Bayes Factor >= 3 is taken as support for H1, as suggested by Jeffreys | |
# a p-value <= .05 is taken as *support* for H1, as suggested by Fisher | |
# When a specific interval does not include 0, this is taken as support for H1 | |
# n = 500, d = 2/3 ... in frequentist statistics that would amount to power of 100% | |
alpha <- run() # check false positives | |
# beta <- run(diff = 10) # check false negatives // since we're in the large sample, always makes the right decision | |
# N = 500 | |
# true difference (alpha errors): | |
# bf ~ .5%, cred ~ 5.1%, p ~ 5.15%, conf ~ 5.15% | |
# http://www.ejwagenmakers.com/submitted/AnotherStatisticalParadox.pdf argue that interval | |
# estimation biases in favour of H1; we see this in the case of credibility intervals, | |
# but not in terms of confidence intervals (the latter case - that p values and confidence intervals | |
# are formally equivalent, is mentioned in the paper) | |
# contradictions (BF / p-value differ from CIs): | |
# bayes ~ 4.6%, freq ~ 0% | |
# no difference: no beta errors (in the large samples, even p-values are consistent) | |
# n = 37, d = 2/3 ... frequentist power is 80%; pwr.t.test(n = 37, d = 2/3) using library('pwr') | |
alpha2 <- run(n = 37) | |
beta2 <- run(diff = 10, n = 37) | |
# N = 37 | |
# true difference (alpha errors): | |
# bf ~ 2%, cred ~ 4.55%, p ~ 5.65%, conf ~ 5.65% | |
# sort of a similar pattern emerges as with 100% power | |
# difference between bayes factor and credibility interval is not that high, though | |
# contradictions: bayes ~ 2.55%, freq ~ 0% | |
# no difference (beta errors): | |
# bf ~ 33%, cred ~ 22.45%, p ~ 18.1%, conf ~ 18.1% | |
# in bayesian statistics, uncertainty *smacks you in the face*; so I'm not sure how meaningful this test is | |
# contradictions: bayes ~ 10.55%, freq ~ 0% | |
# NOTE: | |
# p-values *suck*, see http://www.ejwagenmakers.com/2008/BayesFreqBook.pdf | |
# Here we use the Bayes Factor comparison in a rather mindless way (Gigerenzer, 2004); it should be noted | |
# that Jeffreys described a Bayes Factor as high as 5.33 as "odds that would interest a gambler, but would be hardly worth | |
# more than a passing mention in a scientific paper." (quoted from http://www.ejwagenmakers.com/inpress/VandekerckhoveEtAlinpress.pdf) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment