Lakens · February 13, 2025 18:27
diff --git a/gistfile1.r b/gistfile1.r
 library("meta")
 library("metafor")
 #EXPLANATION:
 #This script is adapted from Simonsohn, Nelson, & Simmons, 2014 - the original (2- Fig 2 - File-drawering and effect size estimation.R) can be downloaded here: http://www.p-curve.com/Supplement/Rcode/   
 #The original created simulations of many different meta-analyses - I stripped everything to look at a single meta-analysis

 #SIMPLIFIED LOSS FUNCTION, USED THROUGHOUT BELOW
 #see appendix in paper for explanations and more robust version (robust to user input with t<0 and/or p>.05)
 loss=function(t_obs,df_obs,d_est) {  
  ncp_est=sqrt((df_obs+2)/4)*d_est
  tc=qt(.975,df_obs)              
  power_est=1-pt(tc,df_obs,ncp_est)
  p_larger=pt(t_obs,df=df_obs,ncp=ncp_est)  
  ppr=(p_larger-(1-power_est))/power_est 	
  KSD=ks.test(ppr,punif)$statistic       	
  return(KSD)     }
 ####################
 set.seed(1)

 #Define parameters (for second analysis, for first analysis I set n1 to 20, ki to 1000)
 n0<-20 # n0 is the smallest sample size considered 
 n1<-200 # n1 is the largest sample size in the set
 d_mean<-0 # d_mean is the average  of the true effect size being simulated, cohen-d
 d_sd<-0.0  # d_sd is the stadnard deviation of the true effect size being simulated.   #      to simulate studies from the exact same effect size we set d_sd=0
 ki<-10  # ki is the number of p<.05 studies of a given sample size, e.g., if ki=100 and n0=20 and n1=25, there will 100 n=20 studies, 100 n=21, 100 n=22, etc.

  #1) Generate the sample sizes to be used
  ni=rep(n0:n1,each=ki)            #vector ni has one scalar per study, the first ki elements are n0, the next ki are n0+1....
  df=2*ni-2                        #degrees of freedom, is a vector 
  
  #2) true d
  di=rnorm(n=ki*(n1-n0+1),mean=d_mean,sd=d_sd)
  
  #3) Generate significant t-tests
  tc=qt(.975,df)                                    #3.1 critical t-test that is significant with the sample size
  ncpi=sqrt(ni/2)*di                                #3.2 noncentarlity parameter used to draw from noncentral t, vector as different ncp for each n
  poweri=1-pt(tc,df=df,ncp=ncpi)                    #3.3 power is computed to use in the next line to generate only p<.05 t-values
  rp=runif(n=(n1-n0+1)*ki,min=1-poweri,max=1)       #3.4 generates the randomly draw "p"-values from a uniform (p-values but computed with the noncentral
  #   This latter step is a bit tricky if you are not familiar with noncentral distributions
  #   Once we know power, we know that every t-value above the power_th percentile of them, under the ncp, 
  #   will give a p<.05 if we evaluate it with the central t so we draw only from that range.
  #    Example. Say that for a given n=20 & d=5-->power =33%. To draw significant t-values under the null of d=.5, then,
  #    we compute the inverse of randomly drawn uniform values between .67 and 1 (the biggest third) form the qt(rt(min=.67,max=1),ncp=sqrt(20/2)*.5, df=38). .
  
  t_publish=qt(p=rp,df=df,ncp=sqrt(ni/2)*di)        #t-values associated with those uniform cdf values
  
  #4) Find d-hat for p-curve 
  dp=optimize(loss,c(-.3,2),df_obs=df,t_obs=t_publish)$minimum  #optimize misbehaves very far from the truth, so constrained to -.3,2
  
  #5) #Convert t-values into d-values using formula: d=2*t/sqrt(N) -shown in main paper, see footnote 
  d_publish=(2*t_publish)/sqrt(df+2)
  
  #6) Meta-analysis estimates
  
  #6.1) Compute the variance of each d-value in the set. 
  #the variance of a standardized coefficient depends only on sample size and the coefficient.
  #I use the formula provided in the Meta-analysis book by Hedges and Olkin (1985), "Statistical Methods for Meta-Analysis", page 80, Equation 8
  #var(g=1/n~ + (d**2)/(2*(N-3.94))
  #  Where:
  #        g is what I refer to as d here, the standardized difference of means
  #        n~ is n1*n2/(n1+n2) and n1,n2 refer to sample sizes of each samples, if n1=n2 then n~=n**2/2*n=n/2   [sorry, ~ is not approximate, but tilde]
  #        N is the total n, if n1=n2 then N=2n
  vard_publish= 2/ni +(d_publish**2)/(2*(ni-3.94))   #this is var(d)
  
  #6.2) Run the standard meta-analysis routine                                    
  naive=rma(yi=d_publish,vi=vard_publish,method="FE")   #once again, you need to load metafor() to run RMA
  naive
  #this will run a fixed-effect meta-analysis (hence method="FE"
  #using as the d.v. the d_publish vector and as the variance, its variance vard_publish)
  #the result, naive, contains many values, not just the point estimate, we need that full set
  #for trim-and-fill correction
  
  #6.3) Apply the trim and fill correction to the estimate
  tf=trimfill(naive)
  
  #I've added the funnel plot (DL)
  funnel(naive) 
  tf

  #6.4) PET analysis (added by DL)
  sed_publish=sqrt(vard_publish)
  SE=lm(d_publish~sed_publish, weights = 1/vard_publish)
  summary(SE)
  confint(SE)

  #6.5) PEESE analysis
  V<-lm(d_publish~vard_publish, weights = 1/vard_publish)
  summary(V)
  confint(V)
	library("meta")
	library("metafor")
	#EXPLANATION:
	#This script is adapted from Simonsohn, Nelson, & Simmons, 2014 - the original (2- Fig 2 - File-drawering and effect size estimation.R) can be downloaded here: http://www.p-curve.com/Supplement/Rcode/
	#The original created simulations of many different meta-analyses - I stripped everything to look at a single meta-analysis

	#SIMPLIFIED LOSS FUNCTION, USED THROUGHOUT BELOW
	#see appendix in paper for explanations and more robust version (robust to user input with t<0 and/or p>.05)
	loss=function(t_obs,df_obs,d_est) {
	ncp_est=sqrt((df_obs+2)/4)*d_est
	tc=qt(.975,df_obs)
	power_est=1-pt(tc,df_obs,ncp_est)
	p_larger=pt(t_obs,df=df_obs,ncp=ncp_est)
	ppr=(p_larger-(1-power_est))/power_est
	KSD=ks.test(ppr,punif)$statistic
	return(KSD) }
	####################
	set.seed(1)

	#Define parameters (for second analysis, for first analysis I set n1 to 20, ki to 1000)
	n0<-20 # n0 is the smallest sample size considered
	n1<-200 # n1 is the largest sample size in the set
	d_mean<-0 # d_mean is the average of the true effect size being simulated, cohen-d
	d_sd<-0.0 # d_sd is the stadnard deviation of the true effect size being simulated. # to simulate studies from the exact same effect size we set d_sd=0
	ki<-10 # ki is the number of p<.05 studies of a given sample size, e.g., if ki=100 and n0=20 and n1=25, there will 100 n=20 studies, 100 n=21, 100 n=22, etc.

	#1) Generate the sample sizes to be used
	ni=rep(n0:n1,each=ki) #vector ni has one scalar per study, the first ki elements are n0, the next ki are n0+1....
	df=2*ni-2 #degrees of freedom, is a vector

	#2) true d
	di=rnorm(n=ki*(n1-n0+1),mean=d_mean,sd=d_sd)

	#3) Generate significant t-tests
	tc=qt(.975,df) #3.1 critical t-test that is significant with the sample size
	ncpi=sqrt(ni/2)*di #3.2 noncentarlity parameter used to draw from noncentral t, vector as different ncp for each n
	poweri=1-pt(tc,df=df,ncp=ncpi) #3.3 power is computed to use in the next line to generate only p<.05 t-values
	rp=runif(n=(n1-n0+1)*ki,min=1-poweri,max=1) #3.4 generates the randomly draw "p"-values from a uniform (p-values but computed with the noncentral
	# This latter step is a bit tricky if you are not familiar with noncentral distributions
	# Once we know power, we know that every t-value above the power_th percentile of them, under the ncp,
	# will give a p<.05 if we evaluate it with the central t so we draw only from that range.
	# Example. Say that for a given n=20 & d=5-->power =33%. To draw significant t-values under the null of d=.5, then,
	# we compute the inverse of randomly drawn uniform values between .67 and 1 (the biggest third) form the qt(rt(min=.67,max=1),ncp=sqrt(20/2)*.5, df=38). .

	t_publish=qt(p=rp,df=df,ncp=sqrt(ni/2)*di) #t-values associated with those uniform cdf values

	#4) Find d-hat for p-curve
	dp=optimize(loss,c(-.3,2),df_obs=df,t_obs=t_publish)$minimum #optimize misbehaves very far from the truth, so constrained to -.3,2

	#5) #Convert t-values into d-values using formula: d=2*t/sqrt(N) -shown in main paper, see footnote
	d_publish=(2*t_publish)/sqrt(df+2)

	#6) Meta-analysis estimates

	#6.1) Compute the variance of each d-value in the set.
	#the variance of a standardized coefficient depends only on sample size and the coefficient.
	#I use the formula provided in the Meta-analysis book by Hedges and Olkin (1985), "Statistical Methods for Meta-Analysis", page 80, Equation 8
	#var(g=1/n~ + (d*2)/(2(N-3.94))
	# Where:
	# g is what I refer to as d here, the standardized difference of means
	# n~ is n1n2/(n1+n2) and n1,n2 refer to sample sizes of each samples, if n1=n2 then n~=n2/2n=n/2 [sorry, ~ is not approximate, but tilde]
	# N is the total n, if n1=n2 then N=2n
	vard_publish= 2/ni +(d_publish*2)/(2(ni-3.94)) #this is var(d)

	#6.2) Run the standard meta-analysis routine
	naive=rma(yi=d_publish,vi=vard_publish,method="FE") #once again, you need to load metafor() to run RMA
	naive
	#this will run a fixed-effect meta-analysis (hence method="FE"
	#using as the d.v. the d_publish vector and as the variance, its variance vard_publish)
	#the result, naive, contains many values, not just the point estimate, we need that full set
	#for trim-and-fill correction

	#6.3) Apply the trim and fill correction to the estimate
	tf=trimfill(naive)

	#I've added the funnel plot (DL)
	funnel(naive)
	tf

	#6.4) PET analysis (added by DL)
	sed_publish=sqrt(vard_publish)
	SE=lm(d_publish~sed_publish, weights = 1/vard_publish)
	summary(SE)
	confint(SE)

	#6.5) PEESE analysis
	V<-lm(d_publish~vard_publish, weights = 1/vard_publish)
	summary(V)
	confint(V)