BioSciEconomist · March 3, 2021 23:36
diff --git a/ex typeMS.R b/ex typeMS.R
 # *-----------------------------------------------------------------
 # | PROGRAM NAME: ex typeMS.R
 # | DATE: 3/2/21
 # | CREATED BY: MATT BOGARD 
 # | PROJECT FILE:        
 # *----------------------------------------------------------------
 # | PURPOSE: example estimate type M and S error using simulated data
 # *----------------------------------------------------------------

 # References:
 # Gelman, A., & Carlin, J. (2014). Beyond Power Calculations: Assessing Type S (Sign) and Type M (Magnitude) Errors.
 # Perspectives on Psychological Science, 9(6), 641-651. 

 # https://www.rdocumentation.org/packages/retrodesign/versions/0.1.0/vignettes/Intro_To_retrodesign.Rmd
 # https://data.library.virginia.edu/assessing-type-s-and-type-m-errors/ 

 library(MASS)
 library(dplyr)

 # rm(list=ls()) # get rid of any existing data if necessary

 options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation


 # part 1: simualate 'real population'
 # part 2: conduct power analysis for simulated effect sizes - how large does our sample
 #         have to be to detect the hypothesized effect with 5% significance and 95% power
 # part 3: scenario 1: analysis of a random sample under high power
 # part 4: scenario 2: analysis of a random sample under low power
 # part 5: type M and S errors for a low powered sample (using the TypeMS function) 
 # part 6: simulations giving similar results to the TypeMS function

 #----------------------------------------
 # part 1: simulate some data
 #----------------------------------------

 #
 # treatent group
 #

 mu <- 95  # mean
 sd <- 75 # standard deviation

 # simulate outcome based on parameter assumptions above
 set.seed(1234567)
 y <- rnorm(10000,mu,sd)

 # check descriptive results
 summary(y)
 sd(y)
 hist(y, breaks = 50)

 # simulate ID and treatment indicator
 ID <- seq(1,10000)
 D <- rep(1,10000) 

 trt <- data.frame(ID,y,D) # combine metrics

 #
 # control group
 #

 mu <- 100  # mean
 sd <- 75 # standard deviation

 # simulate outcome based on parameter assumptions above
 set.seed(1234567)
 y <- rnorm(10000,mu,sd)

 # check descriptive results
 summary(y)
 sd(y)
 hist(y, breaks = 50)

 # simulate ID and treatment indicator
 ID <- seq(10001,20000)
 D <- rep(0,10000) 

 ctrl <- data.frame(ID,y,D) # combine metrics

 #
 # create population data frame
 #

 df <- rbind(trt,ctrl)

 # check descriptives
 summary(df)
 sd(df$y)
 df%>% 
  group_by(D) %>%
  summarize(y = mean(y),
            n = n()) 
 hist(df$y, breaks = 50)

 summary(lm(y ~ D, data = df)) # regression on full model (notice impact is -5 units)

 #---------------------------------
 # part 2: power analysis
 #--------------------------------


 library(pwr)

 # making common error variance equal to population variance

 # define our effect size parameter
 # d = |mu1 - mu2| / sigma

 mu1 <- 95 # hypothesized treatment mean
 mu0 <- 100 # hypothesized control or baseline mean
 sigma <- 75 # assuming common variance 

 effect_size <- abs(mu1 - mu0)/sigma


 pwr.t.test(n =, d = effect_size, sig.level = .05 , power = .95 ,type = c("two.sample"))  


 # Two-sample t test power calculation 
 # 
 # n = 5849
 # d = 0.06667
 # sig.level = 0.05
 # power = 0.95
 # alternative = two.sided
 # 
 # NOTE: n is number in *each* group


 #-------------------------------
 # part 3: high powered sample
 #------------------------------

 set.seed(1234567)
 df2 <- df %>%
  group_by(D) %>%
  sample_n(6000)

 summary(df2) # check

 summary(lm(y~D,data = df2)) # this is very close to our populatiion value and highly significant



 #-------------------------------
 # part 4: low powered sample
 #------------------------------

 pwr.t.test(n =, d = effect_size, sig.level = .05 , power = .10 ,type = c("two.sample")) 


 # Two-sample t test power calculation 
 # 
 # n = 192.5
 # d = 0.06667
 # sig.level = 0.05
 # power = 0.1
 # alternative = two.sided
 # 
 # NOTE: n is number in *each* group


 # with a sample size of 193 we only have a 10% chance of detecting a difference in means
 # like our hypothesized population with a 5% level of significance

 set.seed(1234567)
 df2 <- df %>%
  group_by(D) %>%
  sample_n(193)

 summary(df2) # check

 summary(lm(y~D,data = df2))


 #------------------------------
 # part 5: type M and S error analysis for a low powered sample
 #-----------------------------

 # References:
 # Gelman, A., & Carlin, J. (2014). Beyond Power Calculations: Assessing Type S (Sign) and Type M (Magnitude) Errors.
 # Perspectives on Psychological Science, 9(6), 641-651. 

 # https://www.rdocumentation.org/packages/retrodesign/versions/0.1.0/vignettes/Intro_To_retrodesign.Rmd
 # https://data.library.virginia.edu/assessing-type-s-and-type-m-errors/ 



 # function from UVA 
 TypeMS <- function(A, s, alpha=.05, df=Inf, n.sims=10000){
  z <- qt(1-alpha/2, df)
  p.hi <- 1 - pt(z-A/s, df)
  p.lo <- pt(-z-A/s, df)
  power <- p.hi + p.lo
  typeS <- p.lo/power
  estimate <- A + s*rt(n.sims,df)
  significant <- abs(estimate) > s*z
  exaggeration <- mean(abs(estimate)[significant])/A
  return(list(power=power, typeS=typeS, exaggeration=exaggeration))
 }

 # A = true effect size measured in actual units of outcome (population)
 # s = standard error of the estimate (population)
 # alpha = level of significance
 # df = degrees of freedom, infinite if normal distribution

 # get standard error
 se <- sqrt(sigma^2 * 2 / 193) # pooled SE with n = 193 from each group 
                              # (similar to the same SE we got from our low powered regression)


 TypeMS(A =5, s= se)

 # $power
 # [1] 0.1004  # this turns out to be the power we said we had with n = 193 per group
 # 
 # $typeS
 # [1] 0.04446 # if we get a 'significant' result, it will have the wrong sign about 4% of the time
 # 
 # $exaggeration
 # [1] 3.698   # if we get a 'significant' result, it will on average be exaggerated by a factor of 3.7


 #-----------------------------------
 # part 6: simulation to show errors in sign and magnitude with our data
 #-----------------------------------

 # search for posible random samples from our population that gives significant results
 pvals <- c() # initialize vector for p-values
 betas <- c() # initialize vector for estimated treatment effects
 for (i in 1:10000) {
  set.seed(i)
  sample1 <- trt[sample(nrow(trt), 193, replace = T), ] # sample from treatment group simulated above
  sample2 <- ctrl[sample(nrow(ctrl), 193, replace = T), ]  # sample from control group simulated above
  df2 <- rbind(sample1,sample2)
  fit <- lm(y~D,data = df2)
  summ <- summary(fit)
  pvals <- c(pvals,coef(summ)[8]) # get p-value from this bs sample
  betas <- c(betas,coef(summ)[2])
 }

 # combine results
 seeds <- seq(1,10000)
 results <- data.frame(seeds,betas,pvals)
 summary(results) # check
 hist(results$betas, breaks = 50)

 # find samples where we found significant differences
 examples <- results[results$pvals < .05,] # n = 990 ~ 10% which is what we are powered to detect
 head(examples)

 # flag sign and magnitude errors
 examples$sign <- ifelse(examples$betas > 0,1,0)
 examples$ratio <- abs(examples$betas)/5
 summary(examples)
 hist(examples$betas, breaks = 50)

 # the average value for 'sign' indicates that every time we got a significant result
 # the sign was wrong about 5.56% of the time, and the magnitude of the estimate 
 # on average was exaggerated by a factor of 3.6 times - very close to the results
 # we got from the TypeMS function above


 # what did the results look like with some of these samples
 head(examples[order(examples$betas),])

 set.seed(7088)
 sample1 <- trt[sample(nrow(trt), 193, replace = T), ] # sample from treatment group
 sample2 <- ctrl[sample(nrow(ctrl), 193, replace = T), ]  # sample from control group
 df2 <- rbind(sample1,sample2)
 summary(df2) # check
 summary(lm(y~D,data = df2))

 # check another 
 head(examples[order(-examples$betas),])
 set.seed(5817)
 sample1 <- trt[sample(nrow(trt), 193, replace = T), ] # sample from treatment group
 sample2 <- ctrl[sample(nrow(ctrl), 193, replace = T), ]  # sample from control group
 df2 <- rbind(sample1,sample2)
 summary(df2) # check
 summary(lm(y~D,data = df2))

 # check the largest p-value that is significant
 head(examples[order(-examples$pvals),])
 set.seed(5516)
 sample1 <- trt[sample(nrow(trt), 193, replace = T), ] # sample from treatment group
 sample2 <- ctrl[sample(nrow(ctrl), 193, replace = T), ]  # sample from control group
 df2 <- rbind(sample1,sample2)
 summary(df2) # check
 summary(lm(y~D,data = df2))
	# *-----------------------------------------------------------------
	# \| PROGRAM NAME: ex typeMS.R
	# \| DATE: 3/2/21
	# \| CREATED BY: MATT BOGARD
	# \| PROJECT FILE:
	# *----------------------------------------------------------------
	# \| PURPOSE: example estimate type M and S error using simulated data
	# *----------------------------------------------------------------

	# References:
	# Gelman, A., & Carlin, J. (2014). Beyond Power Calculations: Assessing Type S (Sign) and Type M (Magnitude) Errors.
	# Perspectives on Psychological Science, 9(6), 641-651.

	# https://www.rdocumentation.org/packages/retrodesign/versions/0.1.0/vignettes/Intro_To_retrodesign.Rmd
	# https://data.library.virginia.edu/assessing-type-s-and-type-m-errors/

	library(MASS)
	library(dplyr)

	# rm(list=ls()) # get rid of any existing data if necessary

	options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation


	# part 1: simualate 'real population'
	# part 2: conduct power analysis for simulated effect sizes - how large does our sample
	# have to be to detect the hypothesized effect with 5% significance and 95% power
	# part 3: scenario 1: analysis of a random sample under high power
	# part 4: scenario 2: analysis of a random sample under low power
	# part 5: type M and S errors for a low powered sample (using the TypeMS function)
	# part 6: simulations giving similar results to the TypeMS function

	#----------------------------------------
	# part 1: simulate some data
	#----------------------------------------

	#
	# treatent group
	#

	mu <- 95 # mean
	sd <- 75 # standard deviation

	# simulate outcome based on parameter assumptions above
	set.seed(1234567)
	y <- rnorm(10000,mu,sd)

	# check descriptive results
	summary(y)
	sd(y)
	hist(y, breaks = 50)

	# simulate ID and treatment indicator
	ID <- seq(1,10000)
	D <- rep(1,10000)

	trt <- data.frame(ID,y,D) # combine metrics

	#
	# control group
	#

	mu <- 100 # mean
	sd <- 75 # standard deviation

	# simulate outcome based on parameter assumptions above
	set.seed(1234567)
	y <- rnorm(10000,mu,sd)

	# check descriptive results
	summary(y)
	sd(y)
	hist(y, breaks = 50)

	# simulate ID and treatment indicator
	ID <- seq(10001,20000)
	D <- rep(0,10000)

	ctrl <- data.frame(ID,y,D) # combine metrics

	#
	# create population data frame
	#

	df <- rbind(trt,ctrl)

	# check descriptives
	summary(df)
	sd(df$y)
	df%>%
	group_by(D) %>%
	summarize(y = mean(y),
	n = n())
	hist(df$y, breaks = 50)

	summary(lm(y ~ D, data = df)) # regression on full model (notice impact is -5 units)

	#---------------------------------
	# part 2: power analysis
	#--------------------------------


	library(pwr)

	# making common error variance equal to population variance

	# define our effect size parameter
	# d = \|mu1 - mu2\| / sigma

	mu1 <- 95 # hypothesized treatment mean
	mu0 <- 100 # hypothesized control or baseline mean
	sigma <- 75 # assuming common variance

	effect_size <- abs(mu1 - mu0)/sigma


	pwr.t.test(n =, d = effect_size, sig.level = .05 , power = .95 ,type = c("two.sample"))


	# Two-sample t test power calculation
	#
	# n = 5849
	# d = 0.06667
	# sig.level = 0.05
	# power = 0.95
	# alternative = two.sided
	#
	# NOTE: n is number in each group


	#-------------------------------
	# part 3: high powered sample
	#------------------------------

	set.seed(1234567)
	df2 <- df %>%
	group_by(D) %>%
	sample_n(6000)

	summary(df2) # check

	summary(lm(y~D,data = df2)) # this is very close to our populatiion value and highly significant



	#-------------------------------
	# part 4: low powered sample
	#------------------------------

	pwr.t.test(n =, d = effect_size, sig.level = .05 , power = .10 ,type = c("two.sample"))


	# Two-sample t test power calculation
	#
	# n = 192.5
	# d = 0.06667
	# sig.level = 0.05
	# power = 0.1
	# alternative = two.sided
	#
	# NOTE: n is number in each group


	# with a sample size of 193 we only have a 10% chance of detecting a difference in means
	# like our hypothesized population with a 5% level of significance

	set.seed(1234567)
	df2 <- df %>%
	group_by(D) %>%
	sample_n(193)

	summary(df2) # check

	summary(lm(y~D,data = df2))


	#------------------------------
	# part 5: type M and S error analysis for a low powered sample
	#-----------------------------

	# References:
	# Gelman, A., & Carlin, J. (2014). Beyond Power Calculations: Assessing Type S (Sign) and Type M (Magnitude) Errors.
	# Perspectives on Psychological Science, 9(6), 641-651.

	# https://www.rdocumentation.org/packages/retrodesign/versions/0.1.0/vignettes/Intro_To_retrodesign.Rmd
	# https://data.library.virginia.edu/assessing-type-s-and-type-m-errors/



	# function from UVA
	TypeMS <- function(A, s, alpha=.05, df=Inf, n.sims=10000){
	z <- qt(1-alpha/2, df)
	p.hi <- 1 - pt(z-A/s, df)
	p.lo <- pt(-z-A/s, df)
	power <- p.hi + p.lo
	typeS <- p.lo/power
	estimate <- A + s*rt(n.sims,df)
	significant <- abs(estimate) > s*z
	exaggeration <- mean(abs(estimate)[significant])/A
	return(list(power=power, typeS=typeS, exaggeration=exaggeration))
	}

	# A = true effect size measured in actual units of outcome (population)
	# s = standard error of the estimate (population)
	# alpha = level of significance
	# df = degrees of freedom, infinite if normal distribution

	# get standard error
	se <- sqrt(sigma^2 * 2 / 193) # pooled SE with n = 193 from each group
	# (similar to the same SE we got from our low powered regression)


	TypeMS(A =5, s= se)

	# $power
	# [1] 0.1004 # this turns out to be the power we said we had with n = 193 per group
	#
	# $typeS
	# [1] 0.04446 # if we get a 'significant' result, it will have the wrong sign about 4% of the time
	#
	# $exaggeration
	# [1] 3.698 # if we get a 'significant' result, it will on average be exaggerated by a factor of 3.7


	#-----------------------------------
	# part 6: simulation to show errors in sign and magnitude with our data
	#-----------------------------------

	# search for posible random samples from our population that gives significant results
	pvals <- c() # initialize vector for p-values
	betas <- c() # initialize vector for estimated treatment effects
	for (i in 1:10000) {
	set.seed(i)
	sample1 <- trt[sample(nrow(trt), 193, replace = T), ] # sample from treatment group simulated above
	sample2 <- ctrl[sample(nrow(ctrl), 193, replace = T), ] # sample from control group simulated above
	df2 <- rbind(sample1,sample2)
	fit <- lm(y~D,data = df2)
	summ <- summary(fit)
	pvals <- c(pvals,coef(summ)[8]) # get p-value from this bs sample
	betas <- c(betas,coef(summ)[2])
	}

	# combine results
	seeds <- seq(1,10000)
	results <- data.frame(seeds,betas,pvals)
	summary(results) # check
	hist(results$betas, breaks = 50)

	# find samples where we found significant differences
	examples <- results[results$pvals < .05,] # n = 990 ~ 10% which is what we are powered to detect
	head(examples)

	# flag sign and magnitude errors
	examples$sign <- ifelse(examples$betas > 0,1,0)
	examples$ratio <- abs(examples$betas)/5
	summary(examples)
	hist(examples$betas, breaks = 50)

	# the average value for 'sign' indicates that every time we got a significant result
	# the sign was wrong about 5.56% of the time, and the magnitude of the estimate
	# on average was exaggerated by a factor of 3.6 times - very close to the results
	# we got from the TypeMS function above


	# what did the results look like with some of these samples
	head(examples[order(examples$betas),])

	set.seed(7088)
	sample1 <- trt[sample(nrow(trt), 193, replace = T), ] # sample from treatment group
	sample2 <- ctrl[sample(nrow(ctrl), 193, replace = T), ] # sample from control group
	df2 <- rbind(sample1,sample2)
	summary(df2) # check
	summary(lm(y~D,data = df2))

	# check another
	head(examples[order(-examples$betas),])
	set.seed(5817)
	sample1 <- trt[sample(nrow(trt), 193, replace = T), ] # sample from treatment group
	sample2 <- ctrl[sample(nrow(ctrl), 193, replace = T), ] # sample from control group
	df2 <- rbind(sample1,sample2)
	summary(df2) # check
	summary(lm(y~D,data = df2))

	# check the largest p-value that is significant
	head(examples[order(-examples$pvals),])
	set.seed(5516)
	sample1 <- trt[sample(nrow(trt), 193, replace = T), ] # sample from treatment group
	sample2 <- ctrl[sample(nrow(ctrl), 193, replace = T), ] # sample from control group
	df2 <- rbind(sample1,sample2)
	summary(df2) # check
	summary(lm(y~D,data = df2))