Last active
February 13, 2025 18:27
-
-
Save Lakens/568dd2f7cac48c1353f7 to your computer and use it in GitHub Desktop.
p-curve vs trim-and-fill
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("meta") | |
library("metafor") | |
#EXPLANATION: | |
#This script is adapted from Simonsohn, Nelson, & Simmons, 2014 - the original (2- Fig 2 - File-drawering and effect size estimation.R) can be downloaded here: http://www.p-curve.com/Supplement/Rcode/ | |
#The original created simulations of many different meta-analyses - I stripped everything to look at a single meta-analysis | |
#SIMPLIFIED LOSS FUNCTION, USED THROUGHOUT BELOW | |
#see appendix in paper for explanations and more robust version (robust to user input with t<0 and/or p>.05) | |
loss=function(t_obs,df_obs,d_est) { | |
ncp_est=sqrt((df_obs+2)/4)*d_est | |
tc=qt(.975,df_obs) | |
power_est=1-pt(tc,df_obs,ncp_est) | |
p_larger=pt(t_obs,df=df_obs,ncp=ncp_est) | |
ppr=(p_larger-(1-power_est))/power_est | |
KSD=ks.test(ppr,punif)$statistic | |
return(KSD) } | |
#################### | |
set.seed(1) | |
#Define parameters (for second analysis, for first analysis I set n1 to 20, ki to 1000) | |
n0<-20 # n0 is the smallest sample size considered | |
n1<-200 # n1 is the largest sample size in the set | |
d_mean<-0 # d_mean is the average of the true effect size being simulated, cohen-d | |
d_sd<-0.0 # d_sd is the stadnard deviation of the true effect size being simulated. # to simulate studies from the exact same effect size we set d_sd=0 | |
ki<-10 # ki is the number of p<.05 studies of a given sample size, e.g., if ki=100 and n0=20 and n1=25, there will 100 n=20 studies, 100 n=21, 100 n=22, etc. | |
#1) Generate the sample sizes to be used | |
ni=rep(n0:n1,each=ki) #vector ni has one scalar per study, the first ki elements are n0, the next ki are n0+1.... | |
df=2*ni-2 #degrees of freedom, is a vector | |
#2) true d | |
di=rnorm(n=ki*(n1-n0+1),mean=d_mean,sd=d_sd) | |
#3) Generate significant t-tests | |
tc=qt(.975,df) #3.1 critical t-test that is significant with the sample size | |
ncpi=sqrt(ni/2)*di #3.2 noncentarlity parameter used to draw from noncentral t, vector as different ncp for each n | |
poweri=1-pt(tc,df=df,ncp=ncpi) #3.3 power is computed to use in the next line to generate only p<.05 t-values | |
rp=runif(n=(n1-n0+1)*ki,min=1-poweri,max=1) #3.4 generates the randomly draw "p"-values from a uniform (p-values but computed with the noncentral | |
# This latter step is a bit tricky if you are not familiar with noncentral distributions | |
# Once we know power, we know that every t-value above the power_th percentile of them, under the ncp, | |
# will give a p<.05 if we evaluate it with the central t so we draw only from that range. | |
# Example. Say that for a given n=20 & d=5-->power =33%. To draw significant t-values under the null of d=.5, then, | |
# we compute the inverse of randomly drawn uniform values between .67 and 1 (the biggest third) form the qt(rt(min=.67,max=1),ncp=sqrt(20/2)*.5, df=38). . | |
t_publish=qt(p=rp,df=df,ncp=sqrt(ni/2)*di) #t-values associated with those uniform cdf values | |
#4) Find d-hat for p-curve | |
dp=optimize(loss,c(-.3,2),df_obs=df,t_obs=t_publish)$minimum #optimize misbehaves very far from the truth, so constrained to -.3,2 | |
#5) #Convert t-values into d-values using formula: d=2*t/sqrt(N) -shown in main paper, see footnote | |
d_publish=(2*t_publish)/sqrt(df+2) | |
#6) Meta-analysis estimates | |
#6.1) Compute the variance of each d-value in the set. | |
#the variance of a standardized coefficient depends only on sample size and the coefficient. | |
#I use the formula provided in the Meta-analysis book by Hedges and Olkin (1985), "Statistical Methods for Meta-Analysis", page 80, Equation 8 | |
#var(g=1/n~ + (d**2)/(2*(N-3.94)) | |
# Where: | |
# g is what I refer to as d here, the standardized difference of means | |
# n~ is n1*n2/(n1+n2) and n1,n2 refer to sample sizes of each samples, if n1=n2 then n~=n**2/2*n=n/2 [sorry, ~ is not approximate, but tilde] | |
# N is the total n, if n1=n2 then N=2n | |
vard_publish= 2/ni +(d_publish**2)/(2*(ni-3.94)) #this is var(d) | |
#6.2) Run the standard meta-analysis routine | |
naive=rma(yi=d_publish,vi=vard_publish,method="FE") #once again, you need to load metafor() to run RMA | |
naive | |
#this will run a fixed-effect meta-analysis (hence method="FE" | |
#using as the d.v. the d_publish vector and as the variance, its variance vard_publish) | |
#the result, naive, contains many values, not just the point estimate, we need that full set | |
#for trim-and-fill correction | |
#6.3) Apply the trim and fill correction to the estimate | |
tf=trimfill(naive) | |
#I've added the funnel plot (DL) | |
funnel(naive) | |
tf | |
#6.4) PET analysis (added by DL) | |
sed_publish=sqrt(vard_publish) | |
SE=lm(d_publish~sed_publish, weights = 1/vard_publish) | |
summary(SE) | |
confint(SE) | |
#6.5) PEESE analysis | |
V<-lm(d_publish~vard_publish, weights = 1/vard_publish) | |
summary(V) | |
confint(V) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment