BioSciEconomist · June 7, 2022 00:19
diff --git a/Power Analysis Binary.py b/Power Analysis Binary.py
 # *-----------------------------------------------------------------
 # | PROGRAM NAME: Power Analysis Binary.py
 # | DATE: 09/30/21
 # | CREATED BY: MATT BOGARD
 # | PROJECT FILE:       
 # *----------------------------------------------------------------
 # | PURPOSE: power calculation and sample size scenarios for binary outcomes
 # *----------------------------------------------------------------
  
 import pandas as pd
 import numpy as np
 import statsmodels
 import statsmodels.stats.power as smp
 import statsmodels.stats.api as sms
 from statsmodels.stats.proportion import proportion_effectsize
  
 #-------------------------------------
 # basic power calculation
 #-------------------------------------
  
 # this is full compliance or TOT level (as if everyone that gets the email takes action regardless of clickthrough)
  
 p1 = .25 # baseline outcome proportion
 p2 = .30 # this is the observed proportion we need to see in the treatment group to meet our definition of success
  
 # effect size
 h = sms.proportion_effectsize(p2,p1)
 print(h)
  
 pwr = .80 # desired level of fidelity/power
  
 fp = .10 # specify alpha or our acceptable 'false positive' rate
  
  
 # Calculate the required sample size
  
 # while R has a power calculation package specific to proportions, python relies on the z-test equivalent
 # https://www.statsmodels.org/devel/stats.html
 # https://www.statsmodels.org/devel/generated/statsmodels.stats.power.NormalIndPower.html#statsmodels.stats.power.NormalIndPower
  
  
 # Parameters and Assumptions
  
 # NormalIndPower.solve_power(effect_size=None, nobs1=None, alpha=None, power=None, ratio=1.0, alternative='two-sided')
 # solve for any one parameter of the power of a two sample z-test
 # exactly one needs to be None, all others need numeric values
 # effect_size# standardized effect size
 # nobs1: number of observations of sample 1. The number of observations of sample two is ratio times the size of sample 1, i.e. nobs2 = nobs1 * ratio ratio can be set to zero in order to get the power for a one sample test.
 # alpha:float in interval (0,1) significance level, e.g. 0.05, is the probability of a type I error, that is wrong rejections if the Null Hypothesis is true.
 # power:float in interval (0,1) power of the test, e.g. 0.8, is one minus the probability of a type II error. Power is the probability that the test correctly rejects the Null Hypothesis if the Alternative Hypothesis is true.
 # ratio:ratio of the number of observations in sample 2 relative to sample 1. see description of nobs1 The default for ratio is 1; to solve for ration given the other arguments it has to be explicitly set to None.
 # alternative:str, ‘two-sided’ (default), ‘larger’, ‘smaller’
 # extra argument to choose whether the power is calculated for a two-sided (default) or one sided test.
 # The one-sided test can be either ‘larger’, ‘smaller’.
  
  
 result = smp.NormalIndPower().solve_power(effect_size =h,nobs1=None, power=pwr, alpha= fp, alternative='two-sided',
                                           ratio = 1)
  
 print('Sample Size Per Group: %.3f' % result)
  
  
 #----------------------------------
 # adjustments for non-compliance
 #--------------------------------
  
 # considering that many will not see the emails or open to actually be 'nudged' by our design
 # only a few people will be driving the result - that means the observed difference we expect
 # to see at the test vs control level will be diluted
 # (i.e. a 3% impact on those that are nudged by engaging content will show up as a very small
 # diluted effect when comparing the difference between the randomized test and control group
 # overall)
  
 # for these reasons we have to adjust sample sizes for expected engagement
  
 # Reference: Friedman LM, Furberg CD, DeMets DL. Fundamentals of Clinical Trials. 3. New York, NY: Springer; 1998.
 #  Adjusting sample size to compensate for nonadherence; pp. 107–108
  
 # "Using Randomization in Development Economics Research: A Toolkit",
 # Duflo, Esther and Glennerster, Rachel and Kremer, Michael,
 # National Bureau of Economic Research, Working Paper,Technical Working Paper Series,
 # No. 333", 2006.
  
 N = 985 # required sample size per group with full engagement
 Eng = .03 # expected level of engagement (in this case click-through rates)
 Ro = (1- Eng) # expected proportion of dropouts or subjects that fail to engage
 Ri = 0 # expected proportion of 'drop ins' or subjects in the control group that somehow get treatment
  
 N_ITT = N/((1-Ro-Ri)**2) # adjusted N
 print(N_ITT) # sample size given non-compliance
  
 # note this is the same as 1/k^2 implied in the Duflo guide to field experiments assuming one sided non-compliance
 # where k = level of engagement
  
 N/Eng**2
  
  
 #
 # sample size scenarios
 #
  
 p1 = .25  # baseline outcome
 p2 = np.arange(.26, .36, 0.01) # hypothetical range of treatment group proportions
 pwr = .80 # desired level of power
 fp = .10  # desired false positive rate (alpha)
  
 # define effect size function (calcualtes effect sizes for given treatment and control scenarios)
 def effect_sizes(p1,p2):
     return 2 * (np.arcsin(np.sqrt(p2)) - np.arcsin(np.sqrt(p1)))
  
 # define sample size function (calculates sample size given effect size 'd' and required power and alpha)
 def sample_size(d):
     return smp.NormalIndPower().solve_power(d,nobs1=None, power=pwr, alpha=fp, alternative='two-sided', ratio = 1)
  
 d = [effect_sizes(p1,i) for i in p2] # calculate range of effect sizes for given baseline and treatment scenarios
 nSizes = [sample_size(i) for i in d] # calcuate range of required sample sizes per group
  
  
 # adjustment for non-compliance / engagement
  
 # define function to adjust for one-sided non-compliance (requires entering expected engagement)
 def adj_sizes(eng,Ri,n):
     # eng = level or proportion of engagement
     # Ri = proportion of control group that gets treatment (or similar treatment)
     Ro = (1- eng) # proportion of dropouts or non-engagers in the treatment goup
     return n/((1-Ro-Ri)**2) # sample size adjustment
  
  
 nSizesAdj = [adj_sizes(.03,0,i) for i in nSizes] # adjusted for engagement level (!!!check manually entered engagment level!!)
  
  
 df = pd.DataFrame(columns=['Control','Treatment','Difference','EffectSize','RequiredNPerGroup','Engagement','AdjustedN'])
 df['Control'] = [p1 for i in range(len(d))]
 df['Treatment'] = p2
 df['Difference'] =  df['Treatment'] - df['Control']
 df['EffectSize'] = np.round(d,2)
 df['RequiredNPerGroup'] = np.round(nSizes)
 df['Engagement'] = .03 # specify expected engagement
 df['AdjustedN'] = np.round(nSizesAdj)
 df.head(len(df))
	# *-----------------------------------------------------------------
	# \| PROGRAM NAME: Power Analysis Binary.py
	# \| DATE: 09/30/21
	# \| CREATED BY: MATT BOGARD
	# \| PROJECT FILE:
	# *----------------------------------------------------------------
	# \| PURPOSE: power calculation and sample size scenarios for binary outcomes
	# *----------------------------------------------------------------

	import pandas as pd
	import numpy as np
	import statsmodels
	import statsmodels.stats.power as smp
	import statsmodels.stats.api as sms
	from statsmodels.stats.proportion import proportion_effectsize

	#-------------------------------------
	# basic power calculation
	#-------------------------------------

	# this is full compliance or TOT level (as if everyone that gets the email takes action regardless of clickthrough)

	p1 = .25 # baseline outcome proportion
	p2 = .30 # this is the observed proportion we need to see in the treatment group to meet our definition of success

	# effect size
	h = sms.proportion_effectsize(p2,p1)
	print(h)

	pwr = .80 # desired level of fidelity/power

	fp = .10 # specify alpha or our acceptable 'false positive' rate


	# Calculate the required sample size

	# while R has a power calculation package specific to proportions, python relies on the z-test equivalent
	# https://www.statsmodels.org/devel/stats.html
	# https://www.statsmodels.org/devel/generated/statsmodels.stats.power.NormalIndPower.html#statsmodels.stats.power.NormalIndPower


	# Parameters and Assumptions

	# NormalIndPower.solve_power(effect_size=None, nobs1=None, alpha=None, power=None, ratio=1.0, alternative='two-sided')
	# solve for any one parameter of the power of a two sample z-test
	# exactly one needs to be None, all others need numeric values
	# effect_size# standardized effect size
	# nobs1: number of observations of sample 1. The number of observations of sample two is ratio times the size of sample 1, i.e. nobs2 = nobs1 * ratio ratio can be set to zero in order to get the power for a one sample test.
	# alpha:float in interval (0,1) significance level, e.g. 0.05, is the probability of a type I error, that is wrong rejections if the Null Hypothesis is true.
	# power:float in interval (0,1) power of the test, e.g. 0.8, is one minus the probability of a type II error. Power is the probability that the test correctly rejects the Null Hypothesis if the Alternative Hypothesis is true.
	# ratio:ratio of the number of observations in sample 2 relative to sample 1. see description of nobs1 The default for ratio is 1; to solve for ration given the other arguments it has to be explicitly set to None.
	# alternative:str, ‘two-sided’ (default), ‘larger’, ‘smaller’
	# extra argument to choose whether the power is calculated for a two-sided (default) or one sided test.
	# The one-sided test can be either ‘larger’, ‘smaller’.


	result = smp.NormalIndPower().solve_power(effect_size =h,nobs1=None, power=pwr, alpha= fp, alternative='two-sided',
	ratio = 1)

	print('Sample Size Per Group: %.3f' % result)


	#----------------------------------
	# adjustments for non-compliance
	#--------------------------------

	# considering that many will not see the emails or open to actually be 'nudged' by our design
	# only a few people will be driving the result - that means the observed difference we expect
	# to see at the test vs control level will be diluted
	# (i.e. a 3% impact on those that are nudged by engaging content will show up as a very small
	# diluted effect when comparing the difference between the randomized test and control group
	# overall)

	# for these reasons we have to adjust sample sizes for expected engagement

	# Reference: Friedman LM, Furberg CD, DeMets DL. Fundamentals of Clinical Trials. 3. New York, NY: Springer; 1998.
	# Adjusting sample size to compensate for nonadherence; pp. 107–108

	# "Using Randomization in Development Economics Research: A Toolkit",
	# Duflo, Esther and Glennerster, Rachel and Kremer, Michael,
	# National Bureau of Economic Research, Working Paper,Technical Working Paper Series,
	# No. 333", 2006.

	N = 985 # required sample size per group with full engagement
	Eng = .03 # expected level of engagement (in this case click-through rates)
	Ro = (1- Eng) # expected proportion of dropouts or subjects that fail to engage
	Ri = 0 # expected proportion of 'drop ins' or subjects in the control group that somehow get treatment

	N_ITT = N/((1-Ro-Ri)**2) # adjusted N
	print(N_ITT) # sample size given non-compliance

	# note this is the same as 1/k^2 implied in the Duflo guide to field experiments assuming one sided non-compliance
	# where k = level of engagement

	N/Eng**2


	#
	# sample size scenarios
	#

	p1 = .25 # baseline outcome
	p2 = np.arange(.26, .36, 0.01) # hypothetical range of treatment group proportions
	pwr = .80 # desired level of power
	fp = .10 # desired false positive rate (alpha)

	# define effect size function (calcualtes effect sizes for given treatment and control scenarios)
	def effect_sizes(p1,p2):
	return 2 * (np.arcsin(np.sqrt(p2)) - np.arcsin(np.sqrt(p1)))

	# define sample size function (calculates sample size given effect size 'd' and required power and alpha)
	def sample_size(d):
	return smp.NormalIndPower().solve_power(d,nobs1=None, power=pwr, alpha=fp, alternative='two-sided', ratio = 1)

	d = [effect_sizes(p1,i) for i in p2] # calculate range of effect sizes for given baseline and treatment scenarios
	nSizes = [sample_size(i) for i in d] # calcuate range of required sample sizes per group


	# adjustment for non-compliance / engagement

	# define function to adjust for one-sided non-compliance (requires entering expected engagement)
	def adj_sizes(eng,Ri,n):
	# eng = level or proportion of engagement
	# Ri = proportion of control group that gets treatment (or similar treatment)
	Ro = (1- eng) # proportion of dropouts or non-engagers in the treatment goup
	return n/((1-Ro-Ri)**2) # sample size adjustment


	nSizesAdj = [adj_sizes(.03,0,i) for i in nSizes] # adjusted for engagement level (!!!check manually entered engagment level!!)


	df = pd.DataFrame(columns=['Control','Treatment','Difference','EffectSize','RequiredNPerGroup','Engagement','AdjustedN'])
	df['Control'] = [p1 for i in range(len(d))]
	df['Treatment'] = p2
	df['Difference'] = df['Treatment'] - df['Control']
	df['EffectSize'] = np.round(d,2)
	df['RequiredNPerGroup'] = np.round(nSizes)
	df['Engagement'] = .03 # specify expected engagement
	df['AdjustedN'] = np.round(nSizesAdj)
	df.head(len(df))