Last active
April 5, 2016 12:34
-
-
Save nicebread/36346c00ff5c876ec7a6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# I try to build a prior distribution for effect sizes that closely resembles meta-analytical summaries of effect sizes. Based on nearly 34,000 effect sizes, Richard, Bond, and Stokes-Zoota (2003) conclude that about 30% of published effects sizes are smaller than d=0.20, about 76% are smaller than 0.63, and 95% are smaller than 1.15. | |
# | |
# There is a normal distribution that has a near-perfect match to these percentages: mu=0, sigma=0.55. This leads to theoretical percentages of 28%, 75%, and 96%. | |
# 1-pnorm(0.20, 0, 0.55, lower.tail=FALSE)*2 # 28% are smaller than 1.15 | |
# 1-pnorm(0.63, 0, 0.55, lower.tail=FALSE)*2 # 75% are smaller than 1.15 | |
# 1-pnorm(1.15, 0, 0.55, lower.tail=FALSE)*2 # 96% are smaller than 1.15 | |
# However, we know that published effect sizes are overestimated due to publication bias (in RP:P and also Franco, Malhotra, & Simonovits, 2015, the reported ES was twice as large as the replication/the unreported ES. | |
# Therefore, we apply more shrinkage, using N(0, SD=0.3). With these settings, the shrunken original RP:P ES are exactly unbiased if the replication ES are taken as unbiased estimates of the true ES. | |
# Franco, A., Malhotra, N., & Simonovits, G. (2015). Underreporting in Psychology Experiments: Evidence From a Study Registry. Social Psychological and Personality Science, 7(1), 8–12. http://doi.org/10.1177/1948550615598377 | |
# --------------------------------------------------------------------- | |
# Shrinkage of effect size | |
# 1. Likelihood of ES | |
# 2. Weigh with half-N(0, 1) prior + prior odds for H1/H0 | |
# --> posterior effect size | |
#' deliver either d, t, r, or (as an approximation) the p-value | |
#' deliver either df or n1 and n2 | |
# TODO: implement flexible prior | |
shrink <- function(d=NULL, r=NULL, t=NULL, p.value=NULL, df=NULL, n1=NULL, n2=NULL, prior.sd=0.3, plot=FALSE, ...) { | |
if (sum(c(!is.null(d), !is.null(t), !is.null(p.value), !is.null(r))) > 1) stop("Please provide only one of d, r, t, or p.value!") | |
if (sum(c(!is.null(d), !is.null(t), !is.null(p.value), !is.null(r))) == 0) stop("Please provide either d, r, t, or p.value!") | |
if (is.null(df) & is.null(n1) & is.null(n2)) stop("Please provide either df or n1 & n2!") | |
library(dplyr) | |
if (is.null(df)) df <- (n1 + n2)-2 | |
if (!is.null(d)) { | |
t <- (d*sqrt(df)) / 2 | |
} | |
if (!is.null(t)) { | |
d <- (2*t)/sqrt(df) | |
} | |
if (!is.null(p.value)) { | |
# conversion formula for Hedge's g, see: https://books.google.de/books?id=GC42CwAAQBAJ&pg=PA100&lpg=PA100&dq=meta-analysis+convert+chi2+to+d+degrees+of+freedom&source=bl&ots=_c4EHEyRis&sig=yIaUDAbQ3RfPvLTfEE7-thpEXys&hl=de&sa=X&ved=0ahUKEwiyp-XntdnLAhWipnIKHab5CaA4ChDoAQhfMAg#v=onepage&q=meta-analysis%20convert%20chi2%20to%20d%20degrees%20of%20freedom&f=false | |
d <- (qnorm(1-(p.value/2))*2)/sqrt(df) | |
t <- (d*sqrt(df)) / 2 | |
print("Warning: If effect sizes are derived from p-values, the results are only approximate.") | |
} | |
if (!is.null(r)) { | |
d <- (2 * r)/sqrt(1 - r^2) | |
t <- (d*sqrt(df)) / 2 | |
} | |
# LH-function for these specific DFs | |
LHf0 <- function(delta, t, df) exp(sum(log( | |
suppressWarnings({dt(t, df, ncp=(delta*sqrt(df+2))/2)}) | |
))) | |
LHf1 <- Vectorize(LHf0, vectorize.args = "delta") | |
#Normalize density | |
K <- 1/integrate(LHf1, -Inf, Inf, t=t, df=df)$value | |
LHf <- function(delta, t, df) {K*LHf1(delta, t, df)} | |
stepsize <- .01 | |
LH <- data.frame(delta = seq(-5, 5, by=stepsize)) # which delta's should be probed? | |
LH <- LH %>% mutate( | |
LH = LHf(delta, t=t, df=df), # the likelihood function for delta | |
LH.dens = LH/(sum(LH)*stepsize), # normalize area to 1 to have a real density | |
# the shrinkage weighting function | |
shrink.weight = dnorm(delta, sd=prior.sd), # must be a real prior which integrates to 1 | |
shrunken.LH = LH*shrink.weight, # apply the shrinkage to the likelihood | |
shrunken.LH.dens = shrunken.LH/(sum(shrunken.LH)*stepsize), # normalize area to 1 to have a real density | |
# BF computation | |
BF.prior = dcauchy(delta, scale=sqrt(2)/2), # must be a real prior which integrates to 1 | |
#BF.prior = dnorm(delta, 0, 1), # must be a real prior which integrates to 1 | |
# compute naive (i.e., face-value) posterior with standard LH | |
posterior.naive = LH*BF.prior, | |
posterior.naive.dens = posterior.naive/(sum(posterior.naive)*stepsize), # normalize area to 1 to have a real density | |
# compute shrunken posterior with shrunken LH | |
posterior = shrunken.LH*BF.prior, | |
posterior.dens = posterior/(sum(posterior)*stepsize) # normalize area to 1 to have a real density | |
) | |
if (plot==TRUE) { | |
plot(LH$delta, LH$LH.dens, type="l", xlab="Effect size (Cohen's d)", ylab="Density", ylim=c(0, max(LH[, c(3, 4, 6)]))) | |
lines(LH$delta, LH$shrink.weight, lty="dotted", col="red") | |
lines(LH$delta, LH$shrunken.LH.dens, lty="dashed", col="red") | |
legend("topright", lty=c("solid", "dotted", "dashed"), col=c("black", "red", "red"), legend=c("Likelihood", "shrinkage prior", "posterior after shrinkage")) | |
} | |
MAP <- LH$delta[which.max(LH$shrunken.LH.dens)] | |
MEAN <- weighted.mean(LH$delta, LH$shrunken.LH.dens) | |
return(list( | |
d.naive = d, | |
d.shrunk = MEAN, | |
BF.naive = LH$BF.prior[LH$delta==0] / LH$posterior.naive.dens[LH$delta==0], | |
BF.shrunk = LH$BF.prior[LH$delta==0] / LH$posterior.dens[LH$delta==0] | |
)) | |
} | |
shrink(d=0.772, df=41) | |
shrink(r=0.36, df=41) | |
shrink(p.value=0.0177, df=41) | |
# --------------------------------------------------------------------- | |
# RV formula | |
RV9 <- function(CitationCount, lagYear, altmetric.percentile, | |
d=NULL, r=NULL, t=NULL, p.value=NULL, df=NULL, n1=NULL, n2=NULL, sd=0.3, plot=FALSE) { | |
academic.percentile <- 1-exp(-1.86 - 2*log(CitationCount) + 3.24*log(lagYear)) | |
IMPACT <- weighted.mean(c(academic.percentile, altmetric.percentile), w=c(2, 1), na.rm=TRUE) | |
EVIDENCE <- shrink(d=d, r=r, p.value=p.value, df=df, n1=n1, n2=n2, t=t, sd=sd, plot=plot) | |
# scale factor: Top 1% papers require an evidence of 10 --> scale to RV of 1 | |
K <- 10/0.99 | |
RV <- K*(IMPACT/EVIDENCE$BF.shrunk) | |
cat(paste0( | |
"Impact = ", round(IMPACT*100, 2), "%\n", | |
"Empirical effect size (Cohen's d) = ", round(EVIDENCE$d.naive, 2), "\n", | |
"Shrunken effect size (Cohen's d) = ", round(EVIDENCE$d.shrunk, 2), "\n", | |
"Evidence = ", round(EVIDENCE$BF.shrunk, 2), " (shrunken BF_10)\n", | |
"Replication Value = ", round(RV, 2) | |
)) | |
invisible(RV) | |
} | |
# --------------------------------------------------------------------- | |
# EXAMPLES for using the RV formula | |
# --------------------------------------------------------------------- | |
# You can either provide a t value or a d value. | |
# F-values with 1 df can be converted to t-values (actually all focused tests should have a single nominator df): t.value <- sqrt(F) | |
# r values can be converted to d values: d <- (2*r) / sqrt(1-r^2) | |
# Alter & Oppenheimer | |
RV9(CitationCount=190, lagYear=9, altmetric.percentile=0.987, t=2.01, df=39) | |
# Stroop | |
RV9(CitationCount=7510, lagYear=80, altmetric.percentile=NA, t=15.33, df=98) | |
# Bargh, J. A., Chen, M., & Burrows, L. (1996). Automaticity of social behavior: Direct effects of trait construct and stereotype activation on action. Journal of Personality and Social Psychology, 71(2), 230–244. http://doi.org/10.1037//0022-3514.71.2.230 | |
RV9(CitationCount=1339, lagYear=19, altmetric.percentile=0.98, t=2.86, df=28, sd=0.3) | |
RV9(CitationCount=1339, lagYear=19, altmetric.percentile=0.98, t=2.16, df=28, sd=0.3) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment