Last active
April 23, 2021 02:38
-
-
Save Lakens/c0708869b44588a7f6ef to your computer and use it in GitHub Desktop.
People find it difficult to think about random variation. Our mind is more strongly geared towards recognizing patterns than randomness. In this blogpost, you can practice with getting used to what random variation looks like, how to reduce it by running well-powered studies, and how to meta-analyze multiple small studies.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# # # # # # # # # # # | |
#Initial settings---- | |
# # # # # # # # # # # | |
if(!require(ggplot2)){install.packages('ggplot2')} | |
library(ggplot2) | |
if(!require(MBESS)){install.packages('MBESS')} | |
library(MBESS) | |
if(!require(pwr)){install.packages('pwr')} | |
library(pwr) | |
if(!require(meta)){install.packages('meta')} | |
library(meta) | |
if(!require(metafor)){install.packages('metafor')} | |
library(metafor) | |
options(digits=10,scipen=999) | |
#Set color palette | |
cbbPalette<-c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7") | |
# # # # # # # # # # | |
#Assignment 1---- | |
# # # # # # # # # # | |
#Simulate one group | |
n=10 #set sample size | |
x<-rnorm(n = n, mean = 100, sd = 15) #create sample from normal distribution | |
#plot data | |
ggplot(as.data.frame(x), aes(x)) + | |
geom_histogram(colour="black", fill="grey", aes(y=..density..), binwidth=2) + | |
stat_function(fun=dnorm, args=c(mean=100,sd=15), size=1, color="red", lty=2) + | |
# geom_density(fill=NA, colour="black", size = 1) + | |
xlab("IQ") + ylab("number of people") + ggtitle("Data") + theme_bw(base_size=20) + | |
theme(panel.grid.major.x = element_blank(), axis.text.y = element_blank(), panel.grid.minor.x = element_blank()) + | |
geom_vline(xintercept=mean(x), colour="gray20", linetype="dashed") + | |
coord_cartesian(xlim=c(50,150)) + scale_x_continuous(breaks=c(50,60,70,80,90,100,110,120,130,140,150)) + | |
annotate("text", x = mean(x), y = 0.02, label = paste("Mean = ",round(mean(x)),"\n","SD = ",round(sd(x)),sep="")) | |
# # # # # # # # # # | |
#Assignment 2---- | |
# # # # # # # # # # | |
nSims <- 100000 #number of simulated experiments | |
n<-10 #sample size | |
M<-110 #Mean of the simulated data | |
SD<-15 #SD of the simulated data | |
p <-numeric(nSims) #set up empty container for all simulated p-values | |
for(i in 1:nSims){ #for each simulated experiment | |
x<-rnorm(n = n, mean = M, sd = SD) # | |
z<-t.test(x, mu=100) #perform the t-test against mu (set to value you want to test against) | |
p[i]<-z$p.value #get the p-value and store it | |
} | |
(sum(p < 0.05)/nSims)*100 #power (in percentage) | |
#plot pvalue distribution | |
ggplot(as.data.frame(p), aes(p)) + | |
geom_histogram(colour="black", fill="grey", binwidth = 0.05) | |
#Perform power analysis for one sample t-test | |
pwr.t.test(d=0.6667,n=10,sig.level=0.05,type="one.sample",alternative="two.sided") | |
# # # # # # # # # # | |
#Assignment 3---- | |
# # # # # # # # # # | |
n1<-10 #set the sample size in group 1 | |
n2<-10 #set the sample size in group 2 | |
mx<-100 #set the mean in group 1 | |
sdx<-15 #set the standard deviation in group 1 | |
my<-106 #set the mean in group 2 | |
sdy<-15 #set the standard deviation in group 2 | |
#randomly draw data | |
x<-rnorm(n = n1, mean = mx, sd = sdx) | |
y<-rnorm(n = n2, mean = my, sd = sdy) | |
DV<-c(x,y) #combine the two samples into a single variable | |
IV<-as.factor(c(rep("1", n1), rep("2", n2))) #create the independent variable (1 and 2) | |
dataset<-as.data.frame(cbind(IV,DV)) #create a dataframe (to make the plot) | |
t.test(x, y, alternative = "two.sided", paired = FALSE, var.equal = TRUE, conf.level = 0.95) #t-test | |
#plot graph | |
ggplot(dataset, aes(DV, fill = as.factor(IV))) + | |
geom_histogram(alpha=0.4, binwidth=2, position="dodge", colour="black", aes(y = ..density..)) + | |
scale_fill_manual(values=cbbPalette, name = "Condition") + | |
stat_function(fun=dnorm, args=c(mean=mx,sd=sdx), size=1, color="#E69F00", lty=2) + | |
stat_function(fun=dnorm, args=c(mean=my,sd=sdy), size=1, color="#56B4E9", lty=2) + | |
xlab("IQ") + ylab("number of people") + ggtitle("Data") + theme_bw(base_size=20) + | |
theme(panel.grid.major.x = element_blank(), axis.text.y = element_blank(), panel.grid.minor.x = element_blank()) + | |
geom_vline(xintercept=mean(x), colour="black", linetype="dashed", size=1) + | |
geom_vline(xintercept=mean(y), colour="black", linetype="dashed", size=1) + | |
coord_cartesian(xlim=c(50,150)) + scale_x_continuous(breaks=c(50,60,70,80,90,100,110,120,130,140,150)) + | |
annotate("text", x = 70, y = 0.06, label = paste("Mean X = ",round(mean(x)),"\n","SD = ",round(sd(x)),sep="")) + | |
annotate("text", x = 130, y = 0.06, label = paste("Mean Y = ",round(mean(y)),"\n","SD = ",round(sd(y)),sep="")) | |
#Perform power analysis for two-sample t-test | |
pwr.t.test(d=0.6667,n=10,sig.level=0.05,type="two.sample",alternative="two.sided") | |
#Perform power analysis for dependent sample t-test | |
pwr.t.test(d=0.6667,n=10,sig.level=0.05,type="paired",alternative="two.sided") | |
# # # # # # # # # # | |
#Assignment 4---- | |
# # # # # # # # # # | |
n <- 30 #set sample size | |
cor.true <- 0.55 #set true correlation | |
x <- rnorm(n = n, mean = 100, sd = 15) | |
#This formula creates a second sample with identical mean and sd, which is correlated with the first sample. | |
y <- (cor.true * (x-(mean(x))) + sqrt(1 - cor.true^2) * rnorm(n = n, mean = 100, sd = 15)) + 100*(1-sqrt(1 - cor.true^2)) | |
dataset<-as.data.frame(cbind(x,y)) | |
#Plot graph | |
ggplot(dataset, aes(x=x, y=y)) + | |
geom_point(size=2) + # Use hollow circles | |
geom_smooth(method=lm, colour="#E69F00", size = 1, fill = "#56B4E9") + # Add linear regression line | |
geom_abline(intercept = (100-(cor.true*100)), slope=cor.true, colour="black", linetype="dotted", size=1) + | |
coord_cartesian(xlim=c(40,160), ylim=c(40,160)) + | |
scale_x_continuous(breaks=c(40, 50,60,70,80,90,100,110,120,130,140,150,160)) + scale_y_continuous(breaks=c(40, 50,60,70,80,90,100,110,120,130,140,150,160)) + | |
xlab("IQ twin 1") + ylab("IQ twin 2") + ggtitle(paste("Correlation = ",round(cor(x,y),digits=2),sep="")) + theme_bw(base_size=20) + | |
theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank()) | |
# # # # # # # # # # | |
#Assignment 5---- | |
# # # # # # # # # # | |
#power analysis for correlation | |
n <- 30 | |
cor.true <- 0.55 | |
nSims<-100000 | |
#This formula creates a second sample with identical mean and sd, which is correlated with the first sample. | |
p <-numeric(nSims) #set up empty container for all simulated p-values | |
for(i in 1:nSims){ #for each simulated experiment | |
x <- rnorm(n = n, mean = 100, sd = 15) | |
y <- (cor.true * (x-(mean(x))) + sqrt(1 - cor.true^2) * rnorm(n = n, mean = 100, sd = 15)) + 100*(1-sqrt(1 - cor.true^2)) | |
z<-cor.test(x,y) #perform the correlation test | |
p[i]<-z$p.value #get the p-value and store it | |
} | |
(sum(p < 0.05)/nSims)*100 #power (in percentage) | |
#plot pvalue distribution | |
ggplot(as.data.frame(p), aes(p)) + | |
geom_histogram(colour="black", fill="grey", binwidth = 0.05) | |
#Perform power analysis | |
pwr.r.test(r=0.55,n=30,sig.level=0.05,alternative="two.sided") | |
# # # # # # # # # # | |
#Assignment 6---- | |
# # # # # # # # # # | |
#set.seed(1000) | |
mx<-106 #Set mean in sample 1 | |
sdx<-15 #Set standard deviation in sample 1 | |
my<-100 #Set mean in sample 2 | |
sdy<-15 #Set standard deviation in sample 2 | |
nSims <- 1 #number of simulated experiments | |
es.d <-numeric(nSims) #set up empty container for all simulated ES (cohen's d) | |
SSn1 <-numeric(nSims) #set up empty container for random sample sizes group 1 | |
SSn2 <-numeric(nSims) #set up empty container for random sample sizes group 2 | |
for(i in 1:nSims){ #for each simulated experiment | |
SampleSize<-sample(20:50, 1) #randomly draw a sample between 20 and 50 | |
x<-rnorm(n = SampleSize, mean = mx, sd = sdx) #produce simulated participants | |
y<-rnorm(n = SampleSize, mean = my, sd = sdy) #produce simulated participants | |
SSn1[i]<-SampleSize #save sample size group 1 | |
SSn2[i]<-SampleSize #save sample size group 2 | |
es.d[i]<-smd(Mean.1= mean(x), Mean.2=mean(y), s.1=sd(x), s.2=sd(y), n.1=SampleSize, n.2=SampleSize, Unbiased=TRUE) #Use MBESS to calc d unbiased (Hedges g) | |
} | |
#Insert effect sizes and sample sizes | |
n1<-c(SSn1) | |
n2<-c(SSn2) | |
J<-1-3/(4*( SSn1+ SSn2-2)-1) #correction for bias | |
es.d.v <-(((SSn1+SSn2)/(SSn1*SSn2))+(es.d^2/(2*(SSn1+SSn2))))*J^2 #Some formulas add *((n+n)/(n+n-2)) - we don't so that results are consistent with meta-analysis | |
#Calculate Standard Errors ES | |
d.se<-sqrt(es.d.v) | |
#calculate meta-analysis | |
meta<-metagen(es.d, d.se) | |
meta #show meta-analysis | |
#compare against a t-test | |
t.test(x, y, alternative = "two.sided", paired = FALSE, var.equal = TRUE, conf.level = 0.95) #t-test to compare against meta-analysis | |
forest(meta, leftcols=c("studlab"), studlab=TRUE, xlab="Hedges' g", col.square="#E69F00", col.square.lines="black", col.i="black", fontsize=14) | |
# # # # # # # # # # | |
#Assignment 7---- | |
# # # # # # # # # # | |
#Small scale meta-analysis---- | |
#Insert effect sizes and sample sizes | |
es.d<-c(0.44, 0.7, 0.28, 0.35) | |
n1<-c(60, 35, 23, 80) | |
n2<-c(60, 36, 25, 80) | |
#Calculate Variance ES | |
J<-1-3/(4*(n1+n2-2)-1) #Correction for bias | |
es.d.v <-(((n1+n2)/(n1*n2))+(es.d^2/(2*(n1+n2))))*J^2 | |
#Calculate Standard Errors ES | |
d.se<-c(sqrt(es.d.v)) | |
meta<-metagen(es.d, d.se, comb.fixed=FALSE) | |
meta | |
#Forest Plot | |
#If you add studies, make sure to match number of labels in studlab variable. | |
forest(meta, leftcols=c("studlab"), studlab=TRUE, hetstat=FALSE, xlab="Hedges' g", | |
col.square="#E69F00", col.square.lines="black", col.diamond="#56B4E9", col.diamond.lines="black", col.i="black", fontsize=14) | |
# # # # # # # # # # | |
#Assignment 8---- | |
# # # # # # # # # # | |
#set.seed(1000) | |
mx<-106 #Set mean in sample 1 | |
sdx<-15 #Set standard deviation in sample 1 | |
my<-100 #Set mean in sample 2 | |
sdy<-15 #Set standard deviation in sample 2 | |
nSims <- 8 #number of simulated experiments | |
es.d <-numeric(nSims) #set up empty container for all simulated ES (cohen's d) | |
SSn1 <-numeric(nSims) #set up empty container for random sample sizes group 1 | |
SSn2 <-numeric(nSims) #set up empty container for random sample sizes group 2 | |
for(i in 1:nSims){ #for each simulated experiment | |
SampleSize<-sample(100:150, 1) #randomly draw a sample between 20 and 50 | |
x<-rnorm(n = SampleSize, mean = mx, sd = sdx) #produce simulated participants | |
y<-rnorm(n = SampleSize, mean = my, sd = sdy) #produce simulated participants | |
SSn1[i]<-SampleSize #save sample size group 1 | |
SSn2[i]<-SampleSize #save sample size group 2 | |
es.d[i]<-smd(Mean.1= mean(x), Mean.2=mean(y), s.1=sd(x), s.2=sd(y), n.1=SampleSize, n.2=SampleSize, Unbiased=TRUE) #Use MBESS to calc d unbiased (Hedges g) | |
} | |
#Insert effect sizes and sample sizes | |
n1<-c(SSn1) | |
n2<-c(SSn2) | |
J<-1-3/(4*( SSn1+ SSn2-2)-1) #correction for bias | |
es.d.v <-(((SSn1+SSn2)/(SSn1*SSn2))+(es.d^2/(2*(SSn1+SSn2))))*J^2 #Some formulas add *((n+n)/(n+n-2)) - we don't so that results are consistent with meta-analysis | |
#Calculate Standard Errors ES | |
d.se<-sqrt(es.d.v) | |
#calculate meta-analysis | |
meta<-metagen(es.d, d.se) | |
meta #show meta-analysis | |
#compare against a t-test | |
t.test(x, y, alternative = "two.sided", paired = FALSE, var.equal = TRUE, conf.level = 0.95) #t-test to compare against meta-analysis | |
forest(meta, leftcols=c("studlab"), studlab=TRUE, xlab="Hedges' g", col.square="#E69F00", col.square.lines="black", col.i="black", fontsize=14) | |
# # # # # # # # # # | |
#Assignment 9---- | |
# # # # # # # # # # | |
#Small scale meta-analysis---- | |
#Insert effect sizes and sample sizes | |
es.d<-c(0.44, 0.7, 0.28, 0.35) | |
n1<-c(60, 35, 23, 80) | |
n2<-c(60, 36, 25, 80) | |
#Calculate Variance ES | |
J<-1-3/(4*(n1+n2-2)-1) #Correction for bias | |
es.d.v <-(((n1+n2)/(n1*n2))+(es.d^2/(2*(n1+n2))))*J^2 | |
#Calculate Standard Errors ES | |
d.se<-c(sqrt(es.d.v)) | |
meta<-metagen(es.d, d.se, comb.fixed=FALSE) | |
meta | |
#Forest Plot | |
#If you add studies, make sure to match number of labels in studlab variable. | |
forest(meta, leftcols=c("studlab"), studlab=TRUE, hetstat=FALSE, xlab="Hedges' g", | |
col.square="#E69F00", col.square.lines="black", col.diamond="#56B4E9", col.diamond.lines="black", col.i="black", fontsize=14) | |
funnel(meta, lty.fixed = 3, lty.random = 5, studlab=TRUE, xlim = c(-1, 1), xlab = "Effect size", ylab = "Standard Error (SE)", cex = .75, col = 1, bg = 1, contour=c(0.00001, 0.95), col.contour=c("grey", "white"), main = "All Studies") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment