BioSciEconomist · November 14, 2021 22:23
diff --git a/Power Analysis for ITT.py b/Power Analysis for ITT.py
 # *-----------------------------------------------------------------
 # | PROGRAM NAME: Power Analysis for ITT.py
 # | DATE: 4/14/20
 # | CREATED BY: MATT BOGARD 
 # | PROJECT FILE:      
 # *----------------------------------------------------------------
 # | PURPOSE: examples of power calculations considering ITT analysis 
 # *----------------------------------------------------------------

 # reference: https://blogs.worldbank.org/impactevaluations/power-calculations-101-dealing-with-incomplete-take-up

 # DISCUSSION PAPER
 # No. 6059
 # USING RANDOMIZATION IN
 # DEVELOPMENT ECONOMICS
 # RESEARCH: A TOOLKIT
 # Esther Duflo, Rachel Glennerster
 # and Michael Kremer
 # DEVELOPMENT ECONOMICS
 # CENTER FOR ECONOMIC POLICY RESEARCH

 import pandas as pd
 import numpy as np


 pd.set_option('display.float_format', '{:.2f}'.format) # suppress sci notation

 import statsmodels
 from statsmodels.stats import power
 import numpy as np

 #
 # example 1: full engagement
 #


 sigmaA = 1000 # standard deviation for group A
 sigmaB = 1000 # standard deviation for group B (for now assume common variance between groups)
 Q0 = .5 # proportion in the control group (.5 implies equal sized groups)

 sigma_pooled = np.sqrt((np.square(sigmaA)*(1/Q0 -1 ) + np.square(sigmaB)*(1/(1-Q0) -1 ) )/((1/Q0 -1 ) + (1/(1-Q0) -1 ))) # pooled standard deviation

 eng = 1 # expected engagement

 diff = 100*eng # this is the expected difference in means we are testing considering engagement 
                # 
 d = abs(diff)/sigma_pooled

 print(sigma_pooled)

 print(diff,d)


 statsmodels.stats.power.tt_ind_solve_power(effect_size= d, nobs1=None, alpha=.05, power= .9, ratio=1.0, alternative='two-sided')


 #
 # example 2: 50% engagement
 #

 # If p = 0.5 (e.g. 0% of the control group take the intervention and 50% of the treatment
 # group do), the sample size needed is 1/(.5^2) = 4 times as large as it would be 
 # with 100% compliance

 # Now suppose instead that you expect only half of those you invite to training to show up; 
 # and that the training then still leads to a 10% increase for those you actually train. 
 # Then the mean profits for the treated group will be 1000 + 0.5*100 = 1050, and

 sigmaA = 1000 # standard deviation for group A
 sigmaB = 1000 # standard deviation for group B (for now assume common variance between groups)
 Q0 = .5 # proportion in the control group (.5 implies equal sized groups)

 sigma_pooled = np.sqrt((np.square(sigmaA)*(1/Q0 -1 ) + np.square(sigmaB)*(1/(1-Q0) -1 ) )/((1/Q0 -1 ) + (1/(1-Q0) -1 ))) # pooled standard deviation

 eng = .5 # expected engagement

 diff = 100*eng # this is the expected difference in means we are testing considering engagement 
                # 
 d = abs(diff)/sigma_pooled

 print(sigma_pooled)

 print(diff,d)

 statsmodels.stats.power.tt_ind_solve_power(effect_size= d, nobs1=None, alpha=.05, power= .9, ratio=1.0, alternative='two-sided')

 #
 # engagement scenarios
 #

 diff = 100 # expected difference in means
 sigmaA = 1000 # standard deviation for group A
 sigmaB = 1000 # standard deviation for group B (for now assume common variance between groups)
 sigma_pooled = np.sqrt((np.square(sigmaA)*(1/Q0 -1 ) + np.square(sigmaB)*(1/(1-Q0) -1 ) )/((1/Q0 -1 ) + (1/(1-Q0) -1 ))) # pooled standard deviation
 Q0 = .5 # proportion in the control group (.5 implies equal sized groups)
 eng = [.5,.25,.10]

 # define effect size function
 def effect_sizes(eng,diff):
    return abs(diff*eng)/sigma_pooled

 d = [effect_sizes(i,diff) for i in eng] # calculate range of effect sizes given engagement

 # define sample size function
 def sample_size(d):
    return statsmodels.stats.power.tt_ind_solve_power(effect_size= d, nobs1=None, alpha=.05, power= .9, ratio=1.0, alternative='two-sided')

 nSizes = [sample_size(i) for i in d] 

 print(nSizes)
	# *-----------------------------------------------------------------
	# \| PROGRAM NAME: Power Analysis for ITT.py
	# \| DATE: 4/14/20
	# \| CREATED BY: MATT BOGARD
	# \| PROJECT FILE:
	# *----------------------------------------------------------------
	# \| PURPOSE: examples of power calculations considering ITT analysis
	# *----------------------------------------------------------------

	# reference: https://blogs.worldbank.org/impactevaluations/power-calculations-101-dealing-with-incomplete-take-up

	# DISCUSSION PAPER
	# No. 6059
	# USING RANDOMIZATION IN
	# DEVELOPMENT ECONOMICS
	# RESEARCH: A TOOLKIT
	# Esther Duflo, Rachel Glennerster
	# and Michael Kremer
	# DEVELOPMENT ECONOMICS
	# CENTER FOR ECONOMIC POLICY RESEARCH

	import pandas as pd
	import numpy as np


	pd.set_option('display.float_format', '{:.2f}'.format) # suppress sci notation

	import statsmodels
	from statsmodels.stats import power
	import numpy as np

	#
	# example 1: full engagement
	#


	sigmaA = 1000 # standard deviation for group A
	sigmaB = 1000 # standard deviation for group B (for now assume common variance between groups)
	Q0 = .5 # proportion in the control group (.5 implies equal sized groups)

	sigma_pooled = np.sqrt((np.square(sigmaA)(1/Q0 -1 ) + np.square(sigmaB)(1/(1-Q0) -1 ) )/((1/Q0 -1 ) + (1/(1-Q0) -1 ))) # pooled standard deviation

	eng = 1 # expected engagement

	diff = 100*eng # this is the expected difference in means we are testing considering engagement
	#
	d = abs(diff)/sigma_pooled

	print(sigma_pooled)

	print(diff,d)


	statsmodels.stats.power.tt_ind_solve_power(effect_size= d, nobs1=None, alpha=.05, power= .9, ratio=1.0, alternative='two-sided')


	#
	# example 2: 50% engagement
	#

	# If p = 0.5 (e.g. 0% of the control group take the intervention and 50% of the treatment
	# group do), the sample size needed is 1/(.5^2) = 4 times as large as it would be
	# with 100% compliance

	# Now suppose instead that you expect only half of those you invite to training to show up;
	# and that the training then still leads to a 10% increase for those you actually train.
	# Then the mean profits for the treated group will be 1000 + 0.5*100 = 1050, and

	sigmaA = 1000 # standard deviation for group A
	sigmaB = 1000 # standard deviation for group B (for now assume common variance between groups)
	Q0 = .5 # proportion in the control group (.5 implies equal sized groups)

	sigma_pooled = np.sqrt((np.square(sigmaA)(1/Q0 -1 ) + np.square(sigmaB)(1/(1-Q0) -1 ) )/((1/Q0 -1 ) + (1/(1-Q0) -1 ))) # pooled standard deviation

	eng = .5 # expected engagement

	diff = 100*eng # this is the expected difference in means we are testing considering engagement
	#
	d = abs(diff)/sigma_pooled

	print(sigma_pooled)

	print(diff,d)

	statsmodels.stats.power.tt_ind_solve_power(effect_size= d, nobs1=None, alpha=.05, power= .9, ratio=1.0, alternative='two-sided')

	#
	# engagement scenarios
	#

	diff = 100 # expected difference in means
	sigmaA = 1000 # standard deviation for group A
	sigmaB = 1000 # standard deviation for group B (for now assume common variance between groups)
	sigma_pooled = np.sqrt((np.square(sigmaA)(1/Q0 -1 ) + np.square(sigmaB)(1/(1-Q0) -1 ) )/((1/Q0 -1 ) + (1/(1-Q0) -1 ))) # pooled standard deviation
	Q0 = .5 # proportion in the control group (.5 implies equal sized groups)
	eng = [.5,.25,.10]

	# define effect size function
	def effect_sizes(eng,diff):
	return abs(diff*eng)/sigma_pooled

	d = [effect_sizes(i,diff) for i in eng] # calculate range of effect sizes given engagement

	# define sample size function
	def sample_size(d):
	return statsmodels.stats.power.tt_ind_solve_power(effect_size= d, nobs1=None, alpha=.05, power= .9, ratio=1.0, alternative='two-sided')

	nSizes = [sample_size(i) for i in d]

	print(nSizes)