Created
March 3, 2021 23:36
-
-
Save BioSciEconomist/8a34e0b2e17d6af15ddc46793dc6c0f3 to your computer and use it in GitHub Desktop.
Type M and S errors with simulation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# *----------------------------------------------------------------- | |
# | PROGRAM NAME: ex typeMS.R | |
# | DATE: 3/2/21 | |
# | CREATED BY: MATT BOGARD | |
# | PROJECT FILE: | |
# *---------------------------------------------------------------- | |
# | PURPOSE: example estimate type M and S error using simulated data | |
# *---------------------------------------------------------------- | |
# References: | |
# Gelman, A., & Carlin, J. (2014). Beyond Power Calculations: Assessing Type S (Sign) and Type M (Magnitude) Errors. | |
# Perspectives on Psychological Science, 9(6), 641-651. | |
# https://www.rdocumentation.org/packages/retrodesign/versions/0.1.0/vignettes/Intro_To_retrodesign.Rmd | |
# https://data.library.virginia.edu/assessing-type-s-and-type-m-errors/ | |
library(MASS) | |
library(dplyr) | |
# rm(list=ls()) # get rid of any existing data if necessary | |
options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation | |
# part 1: simualate 'real population' | |
# part 2: conduct power analysis for simulated effect sizes - how large does our sample | |
# have to be to detect the hypothesized effect with 5% significance and 95% power | |
# part 3: scenario 1: analysis of a random sample under high power | |
# part 4: scenario 2: analysis of a random sample under low power | |
# part 5: type M and S errors for a low powered sample (using the TypeMS function) | |
# part 6: simulations giving similar results to the TypeMS function | |
#---------------------------------------- | |
# part 1: simulate some data | |
#---------------------------------------- | |
# | |
# treatent group | |
# | |
mu <- 95 # mean | |
sd <- 75 # standard deviation | |
# simulate outcome based on parameter assumptions above | |
set.seed(1234567) | |
y <- rnorm(10000,mu,sd) | |
# check descriptive results | |
summary(y) | |
sd(y) | |
hist(y, breaks = 50) | |
# simulate ID and treatment indicator | |
ID <- seq(1,10000) | |
D <- rep(1,10000) | |
trt <- data.frame(ID,y,D) # combine metrics | |
# | |
# control group | |
# | |
mu <- 100 # mean | |
sd <- 75 # standard deviation | |
# simulate outcome based on parameter assumptions above | |
set.seed(1234567) | |
y <- rnorm(10000,mu,sd) | |
# check descriptive results | |
summary(y) | |
sd(y) | |
hist(y, breaks = 50) | |
# simulate ID and treatment indicator | |
ID <- seq(10001,20000) | |
D <- rep(0,10000) | |
ctrl <- data.frame(ID,y,D) # combine metrics | |
# | |
# create population data frame | |
# | |
df <- rbind(trt,ctrl) | |
# check descriptives | |
summary(df) | |
sd(df$y) | |
df%>% | |
group_by(D) %>% | |
summarize(y = mean(y), | |
n = n()) | |
hist(df$y, breaks = 50) | |
summary(lm(y ~ D, data = df)) # regression on full model (notice impact is -5 units) | |
#--------------------------------- | |
# part 2: power analysis | |
#-------------------------------- | |
library(pwr) | |
# making common error variance equal to population variance | |
# define our effect size parameter | |
# d = |mu1 - mu2| / sigma | |
mu1 <- 95 # hypothesized treatment mean | |
mu0 <- 100 # hypothesized control or baseline mean | |
sigma <- 75 # assuming common variance | |
effect_size <- abs(mu1 - mu0)/sigma | |
pwr.t.test(n =, d = effect_size, sig.level = .05 , power = .95 ,type = c("two.sample")) | |
# Two-sample t test power calculation | |
# | |
# n = 5849 | |
# d = 0.06667 | |
# sig.level = 0.05 | |
# power = 0.95 | |
# alternative = two.sided | |
# | |
# NOTE: n is number in *each* group | |
#------------------------------- | |
# part 3: high powered sample | |
#------------------------------ | |
set.seed(1234567) | |
df2 <- df %>% | |
group_by(D) %>% | |
sample_n(6000) | |
summary(df2) # check | |
summary(lm(y~D,data = df2)) # this is very close to our populatiion value and highly significant | |
#------------------------------- | |
# part 4: low powered sample | |
#------------------------------ | |
pwr.t.test(n =, d = effect_size, sig.level = .05 , power = .10 ,type = c("two.sample")) | |
# Two-sample t test power calculation | |
# | |
# n = 192.5 | |
# d = 0.06667 | |
# sig.level = 0.05 | |
# power = 0.1 | |
# alternative = two.sided | |
# | |
# NOTE: n is number in *each* group | |
# with a sample size of 193 we only have a 10% chance of detecting a difference in means | |
# like our hypothesized population with a 5% level of significance | |
set.seed(1234567) | |
df2 <- df %>% | |
group_by(D) %>% | |
sample_n(193) | |
summary(df2) # check | |
summary(lm(y~D,data = df2)) | |
#------------------------------ | |
# part 5: type M and S error analysis for a low powered sample | |
#----------------------------- | |
# References: | |
# Gelman, A., & Carlin, J. (2014). Beyond Power Calculations: Assessing Type S (Sign) and Type M (Magnitude) Errors. | |
# Perspectives on Psychological Science, 9(6), 641-651. | |
# https://www.rdocumentation.org/packages/retrodesign/versions/0.1.0/vignettes/Intro_To_retrodesign.Rmd | |
# https://data.library.virginia.edu/assessing-type-s-and-type-m-errors/ | |
# function from UVA | |
TypeMS <- function(A, s, alpha=.05, df=Inf, n.sims=10000){ | |
z <- qt(1-alpha/2, df) | |
p.hi <- 1 - pt(z-A/s, df) | |
p.lo <- pt(-z-A/s, df) | |
power <- p.hi + p.lo | |
typeS <- p.lo/power | |
estimate <- A + s*rt(n.sims,df) | |
significant <- abs(estimate) > s*z | |
exaggeration <- mean(abs(estimate)[significant])/A | |
return(list(power=power, typeS=typeS, exaggeration=exaggeration)) | |
} | |
# A = true effect size measured in actual units of outcome (population) | |
# s = standard error of the estimate (population) | |
# alpha = level of significance | |
# df = degrees of freedom, infinite if normal distribution | |
# get standard error | |
se <- sqrt(sigma^2 * 2 / 193) # pooled SE with n = 193 from each group | |
# (similar to the same SE we got from our low powered regression) | |
TypeMS(A =5, s= se) | |
# $power | |
# [1] 0.1004 # this turns out to be the power we said we had with n = 193 per group | |
# | |
# $typeS | |
# [1] 0.04446 # if we get a 'significant' result, it will have the wrong sign about 4% of the time | |
# | |
# $exaggeration | |
# [1] 3.698 # if we get a 'significant' result, it will on average be exaggerated by a factor of 3.7 | |
#----------------------------------- | |
# part 6: simulation to show errors in sign and magnitude with our data | |
#----------------------------------- | |
# search for posible random samples from our population that gives significant results | |
pvals <- c() # initialize vector for p-values | |
betas <- c() # initialize vector for estimated treatment effects | |
for (i in 1:10000) { | |
set.seed(i) | |
sample1 <- trt[sample(nrow(trt), 193, replace = T), ] # sample from treatment group simulated above | |
sample2 <- ctrl[sample(nrow(ctrl), 193, replace = T), ] # sample from control group simulated above | |
df2 <- rbind(sample1,sample2) | |
fit <- lm(y~D,data = df2) | |
summ <- summary(fit) | |
pvals <- c(pvals,coef(summ)[8]) # get p-value from this bs sample | |
betas <- c(betas,coef(summ)[2]) | |
} | |
# combine results | |
seeds <- seq(1,10000) | |
results <- data.frame(seeds,betas,pvals) | |
summary(results) # check | |
hist(results$betas, breaks = 50) | |
# find samples where we found significant differences | |
examples <- results[results$pvals < .05,] # n = 990 ~ 10% which is what we are powered to detect | |
head(examples) | |
# flag sign and magnitude errors | |
examples$sign <- ifelse(examples$betas > 0,1,0) | |
examples$ratio <- abs(examples$betas)/5 | |
summary(examples) | |
hist(examples$betas, breaks = 50) | |
# the average value for 'sign' indicates that every time we got a significant result | |
# the sign was wrong about 5.56% of the time, and the magnitude of the estimate | |
# on average was exaggerated by a factor of 3.6 times - very close to the results | |
# we got from the TypeMS function above | |
# what did the results look like with some of these samples | |
head(examples[order(examples$betas),]) | |
set.seed(7088) | |
sample1 <- trt[sample(nrow(trt), 193, replace = T), ] # sample from treatment group | |
sample2 <- ctrl[sample(nrow(ctrl), 193, replace = T), ] # sample from control group | |
df2 <- rbind(sample1,sample2) | |
summary(df2) # check | |
summary(lm(y~D,data = df2)) | |
# check another | |
head(examples[order(-examples$betas),]) | |
set.seed(5817) | |
sample1 <- trt[sample(nrow(trt), 193, replace = T), ] # sample from treatment group | |
sample2 <- ctrl[sample(nrow(ctrl), 193, replace = T), ] # sample from control group | |
df2 <- rbind(sample1,sample2) | |
summary(df2) # check | |
summary(lm(y~D,data = df2)) | |
# check the largest p-value that is significant | |
head(examples[order(-examples$pvals),]) | |
set.seed(5516) | |
sample1 <- trt[sample(nrow(trt), 193, replace = T), ] # sample from treatment group | |
sample2 <- ctrl[sample(nrow(ctrl), 193, replace = T), ] # sample from control group | |
df2 <- rbind(sample1,sample2) | |
summary(df2) # check | |
summary(lm(y~D,data = df2)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment