Skip to content

Instantly share code, notes, and snippets.

@Lakens
Last active April 23, 2021 02:38
Show Gist options
  • Save Lakens/c0708869b44588a7f6ef to your computer and use it in GitHub Desktop.
Save Lakens/c0708869b44588a7f6ef to your computer and use it in GitHub Desktop.
People find it difficult to think about random variation. Our mind is more strongly geared towards recognizing patterns than randomness. In this blogpost, you can practice with getting used to what random variation looks like, how to reduce it by running well-powered studies, and how to meta-analyze multiple small studies.
# # # # # # # # # # #
#Initial settings----
# # # # # # # # # # #
if(!require(ggplot2)){install.packages('ggplot2')}
library(ggplot2)
if(!require(MBESS)){install.packages('MBESS')}
library(MBESS)
if(!require(pwr)){install.packages('pwr')}
library(pwr)
if(!require(meta)){install.packages('meta')}
library(meta)
if(!require(metafor)){install.packages('metafor')}
library(metafor)
options(digits=10,scipen=999)
#Set color palette
cbbPalette<-c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
# # # # # # # # # #
#Assignment 1----
# # # # # # # # # #
#Simulate one group
n=10 #set sample size
x<-rnorm(n = n, mean = 100, sd = 15) #create sample from normal distribution
#plot data
ggplot(as.data.frame(x), aes(x)) +
geom_histogram(colour="black", fill="grey", aes(y=..density..), binwidth=2) +
stat_function(fun=dnorm, args=c(mean=100,sd=15), size=1, color="red", lty=2) +
# geom_density(fill=NA, colour="black", size = 1) +
xlab("IQ") + ylab("number of people") + ggtitle("Data") + theme_bw(base_size=20) +
theme(panel.grid.major.x = element_blank(), axis.text.y = element_blank(), panel.grid.minor.x = element_blank()) +
geom_vline(xintercept=mean(x), colour="gray20", linetype="dashed") +
coord_cartesian(xlim=c(50,150)) + scale_x_continuous(breaks=c(50,60,70,80,90,100,110,120,130,140,150)) +
annotate("text", x = mean(x), y = 0.02, label = paste("Mean = ",round(mean(x)),"\n","SD = ",round(sd(x)),sep=""))
# # # # # # # # # #
#Assignment 2----
# # # # # # # # # #
nSims <- 100000 #number of simulated experiments
n<-10 #sample size
M<-110 #Mean of the simulated data
SD<-15 #SD of the simulated data
p <-numeric(nSims) #set up empty container for all simulated p-values
for(i in 1:nSims){ #for each simulated experiment
x<-rnorm(n = n, mean = M, sd = SD) #
z<-t.test(x, mu=100) #perform the t-test against mu (set to value you want to test against)
p[i]<-z$p.value #get the p-value and store it
}
(sum(p < 0.05)/nSims)*100 #power (in percentage)
#plot pvalue distribution
ggplot(as.data.frame(p), aes(p)) +
geom_histogram(colour="black", fill="grey", binwidth = 0.05)
#Perform power analysis for one sample t-test
pwr.t.test(d=0.6667,n=10,sig.level=0.05,type="one.sample",alternative="two.sided")
# # # # # # # # # #
#Assignment 3----
# # # # # # # # # #
n1<-10 #set the sample size in group 1
n2<-10 #set the sample size in group 2
mx<-100 #set the mean in group 1
sdx<-15 #set the standard deviation in group 1
my<-106 #set the mean in group 2
sdy<-15 #set the standard deviation in group 2
#randomly draw data
x<-rnorm(n = n1, mean = mx, sd = sdx)
y<-rnorm(n = n2, mean = my, sd = sdy)
DV<-c(x,y) #combine the two samples into a single variable
IV<-as.factor(c(rep("1", n1), rep("2", n2))) #create the independent variable (1 and 2)
dataset<-as.data.frame(cbind(IV,DV)) #create a dataframe (to make the plot)
t.test(x, y, alternative = "two.sided", paired = FALSE, var.equal = TRUE, conf.level = 0.95) #t-test
#plot graph
ggplot(dataset, aes(DV, fill = as.factor(IV))) +
geom_histogram(alpha=0.4, binwidth=2, position="dodge", colour="black", aes(y = ..density..)) +
scale_fill_manual(values=cbbPalette, name = "Condition") +
stat_function(fun=dnorm, args=c(mean=mx,sd=sdx), size=1, color="#E69F00", lty=2) +
stat_function(fun=dnorm, args=c(mean=my,sd=sdy), size=1, color="#56B4E9", lty=2) +
xlab("IQ") + ylab("number of people") + ggtitle("Data") + theme_bw(base_size=20) +
theme(panel.grid.major.x = element_blank(), axis.text.y = element_blank(), panel.grid.minor.x = element_blank()) +
geom_vline(xintercept=mean(x), colour="black", linetype="dashed", size=1) +
geom_vline(xintercept=mean(y), colour="black", linetype="dashed", size=1) +
coord_cartesian(xlim=c(50,150)) + scale_x_continuous(breaks=c(50,60,70,80,90,100,110,120,130,140,150)) +
annotate("text", x = 70, y = 0.06, label = paste("Mean X = ",round(mean(x)),"\n","SD = ",round(sd(x)),sep="")) +
annotate("text", x = 130, y = 0.06, label = paste("Mean Y = ",round(mean(y)),"\n","SD = ",round(sd(y)),sep=""))
#Perform power analysis for two-sample t-test
pwr.t.test(d=0.6667,n=10,sig.level=0.05,type="two.sample",alternative="two.sided")
#Perform power analysis for dependent sample t-test
pwr.t.test(d=0.6667,n=10,sig.level=0.05,type="paired",alternative="two.sided")
# # # # # # # # # #
#Assignment 4----
# # # # # # # # # #
n <- 30 #set sample size
cor.true <- 0.55 #set true correlation
x <- rnorm(n = n, mean = 100, sd = 15)
#This formula creates a second sample with identical mean and sd, which is correlated with the first sample.
y <- (cor.true * (x-(mean(x))) + sqrt(1 - cor.true^2) * rnorm(n = n, mean = 100, sd = 15)) + 100*(1-sqrt(1 - cor.true^2))
dataset<-as.data.frame(cbind(x,y))
#Plot graph
ggplot(dataset, aes(x=x, y=y)) +
geom_point(size=2) + # Use hollow circles
geom_smooth(method=lm, colour="#E69F00", size = 1, fill = "#56B4E9") + # Add linear regression line
geom_abline(intercept = (100-(cor.true*100)), slope=cor.true, colour="black", linetype="dotted", size=1) +
coord_cartesian(xlim=c(40,160), ylim=c(40,160)) +
scale_x_continuous(breaks=c(40, 50,60,70,80,90,100,110,120,130,140,150,160)) + scale_y_continuous(breaks=c(40, 50,60,70,80,90,100,110,120,130,140,150,160)) +
xlab("IQ twin 1") + ylab("IQ twin 2") + ggtitle(paste("Correlation = ",round(cor(x,y),digits=2),sep="")) + theme_bw(base_size=20) +
theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank())
# # # # # # # # # #
#Assignment 5----
# # # # # # # # # #
#power analysis for correlation
n <- 30
cor.true <- 0.55
nSims<-100000
#This formula creates a second sample with identical mean and sd, which is correlated with the first sample.
p <-numeric(nSims) #set up empty container for all simulated p-values
for(i in 1:nSims){ #for each simulated experiment
x <- rnorm(n = n, mean = 100, sd = 15)
y <- (cor.true * (x-(mean(x))) + sqrt(1 - cor.true^2) * rnorm(n = n, mean = 100, sd = 15)) + 100*(1-sqrt(1 - cor.true^2))
z<-cor.test(x,y) #perform the correlation test
p[i]<-z$p.value #get the p-value and store it
}
(sum(p < 0.05)/nSims)*100 #power (in percentage)
#plot pvalue distribution
ggplot(as.data.frame(p), aes(p)) +
geom_histogram(colour="black", fill="grey", binwidth = 0.05)
#Perform power analysis
pwr.r.test(r=0.55,n=30,sig.level=0.05,alternative="two.sided")
# # # # # # # # # #
#Assignment 6----
# # # # # # # # # #
#set.seed(1000)
mx<-106 #Set mean in sample 1
sdx<-15 #Set standard deviation in sample 1
my<-100 #Set mean in sample 2
sdy<-15 #Set standard deviation in sample 2
nSims <- 1 #number of simulated experiments
es.d <-numeric(nSims) #set up empty container for all simulated ES (cohen's d)
SSn1 <-numeric(nSims) #set up empty container for random sample sizes group 1
SSn2 <-numeric(nSims) #set up empty container for random sample sizes group 2
for(i in 1:nSims){ #for each simulated experiment
SampleSize<-sample(20:50, 1) #randomly draw a sample between 20 and 50
x<-rnorm(n = SampleSize, mean = mx, sd = sdx) #produce simulated participants
y<-rnorm(n = SampleSize, mean = my, sd = sdy) #produce simulated participants
SSn1[i]<-SampleSize #save sample size group 1
SSn2[i]<-SampleSize #save sample size group 2
es.d[i]<-smd(Mean.1= mean(x), Mean.2=mean(y), s.1=sd(x), s.2=sd(y), n.1=SampleSize, n.2=SampleSize, Unbiased=TRUE) #Use MBESS to calc d unbiased (Hedges g)
}
#Insert effect sizes and sample sizes
n1<-c(SSn1)
n2<-c(SSn2)
J<-1-3/(4*( SSn1+ SSn2-2)-1) #correction for bias
es.d.v <-(((SSn1+SSn2)/(SSn1*SSn2))+(es.d^2/(2*(SSn1+SSn2))))*J^2 #Some formulas add *((n+n)/(n+n-2)) - we don't so that results are consistent with meta-analysis
#Calculate Standard Errors ES
d.se<-sqrt(es.d.v)
#calculate meta-analysis
meta<-metagen(es.d, d.se)
meta #show meta-analysis
#compare against a t-test
t.test(x, y, alternative = "two.sided", paired = FALSE, var.equal = TRUE, conf.level = 0.95) #t-test to compare against meta-analysis
forest(meta, leftcols=c("studlab"), studlab=TRUE, xlab="Hedges' g", col.square="#E69F00", col.square.lines="black", col.i="black", fontsize=14)
# # # # # # # # # #
#Assignment 7----
# # # # # # # # # #
#Small scale meta-analysis----
#Insert effect sizes and sample sizes
es.d<-c(0.44, 0.7, 0.28, 0.35)
n1<-c(60, 35, 23, 80)
n2<-c(60, 36, 25, 80)
#Calculate Variance ES
J<-1-3/(4*(n1+n2-2)-1) #Correction for bias
es.d.v <-(((n1+n2)/(n1*n2))+(es.d^2/(2*(n1+n2))))*J^2
#Calculate Standard Errors ES
d.se<-c(sqrt(es.d.v))
meta<-metagen(es.d, d.se, comb.fixed=FALSE)
meta
#Forest Plot
#If you add studies, make sure to match number of labels in studlab variable.
forest(meta, leftcols=c("studlab"), studlab=TRUE, hetstat=FALSE, xlab="Hedges' g",
col.square="#E69F00", col.square.lines="black", col.diamond="#56B4E9", col.diamond.lines="black", col.i="black", fontsize=14)
# # # # # # # # # #
#Assignment 8----
# # # # # # # # # #
#set.seed(1000)
mx<-106 #Set mean in sample 1
sdx<-15 #Set standard deviation in sample 1
my<-100 #Set mean in sample 2
sdy<-15 #Set standard deviation in sample 2
nSims <- 8 #number of simulated experiments
es.d <-numeric(nSims) #set up empty container for all simulated ES (cohen's d)
SSn1 <-numeric(nSims) #set up empty container for random sample sizes group 1
SSn2 <-numeric(nSims) #set up empty container for random sample sizes group 2
for(i in 1:nSims){ #for each simulated experiment
SampleSize<-sample(100:150, 1) #randomly draw a sample between 20 and 50
x<-rnorm(n = SampleSize, mean = mx, sd = sdx) #produce simulated participants
y<-rnorm(n = SampleSize, mean = my, sd = sdy) #produce simulated participants
SSn1[i]<-SampleSize #save sample size group 1
SSn2[i]<-SampleSize #save sample size group 2
es.d[i]<-smd(Mean.1= mean(x), Mean.2=mean(y), s.1=sd(x), s.2=sd(y), n.1=SampleSize, n.2=SampleSize, Unbiased=TRUE) #Use MBESS to calc d unbiased (Hedges g)
}
#Insert effect sizes and sample sizes
n1<-c(SSn1)
n2<-c(SSn2)
J<-1-3/(4*( SSn1+ SSn2-2)-1) #correction for bias
es.d.v <-(((SSn1+SSn2)/(SSn1*SSn2))+(es.d^2/(2*(SSn1+SSn2))))*J^2 #Some formulas add *((n+n)/(n+n-2)) - we don't so that results are consistent with meta-analysis
#Calculate Standard Errors ES
d.se<-sqrt(es.d.v)
#calculate meta-analysis
meta<-metagen(es.d, d.se)
meta #show meta-analysis
#compare against a t-test
t.test(x, y, alternative = "two.sided", paired = FALSE, var.equal = TRUE, conf.level = 0.95) #t-test to compare against meta-analysis
forest(meta, leftcols=c("studlab"), studlab=TRUE, xlab="Hedges' g", col.square="#E69F00", col.square.lines="black", col.i="black", fontsize=14)
# # # # # # # # # #
#Assignment 9----
# # # # # # # # # #
#Small scale meta-analysis----
#Insert effect sizes and sample sizes
es.d<-c(0.44, 0.7, 0.28, 0.35)
n1<-c(60, 35, 23, 80)
n2<-c(60, 36, 25, 80)
#Calculate Variance ES
J<-1-3/(4*(n1+n2-2)-1) #Correction for bias
es.d.v <-(((n1+n2)/(n1*n2))+(es.d^2/(2*(n1+n2))))*J^2
#Calculate Standard Errors ES
d.se<-c(sqrt(es.d.v))
meta<-metagen(es.d, d.se, comb.fixed=FALSE)
meta
#Forest Plot
#If you add studies, make sure to match number of labels in studlab variable.
forest(meta, leftcols=c("studlab"), studlab=TRUE, hetstat=FALSE, xlab="Hedges' g",
col.square="#E69F00", col.square.lines="black", col.diamond="#56B4E9", col.diamond.lines="black", col.i="black", fontsize=14)
funnel(meta, lty.fixed = 3, lty.random = 5, studlab=TRUE, xlim = c(-1, 1), xlab = "Effect size", ylab = "Standard Error (SE)", cex = .75, col = 1, bg = 1, contour=c(0.00001, 0.95), col.contour=c("grey", "white"), main = "All Studies")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment