tvladeck · August 21, 2018 16:37
diff --git a/max-diff-power-analysis.R b/max-diff-power-analysis.R
 library(foreach)
 library(doParallel)
 library(tidyverse)
 library(magrittr)

 registerDoParallel(40)

 runs <- 100

 N_test <- c(250, 300, 350, 400, 500, 600) # sample in each group
 lift_test <- c(.2, .3, .4, .5)
 # .2 ~ raising average prob from 20% to 22.5%
 # .3 ~ to 24.4%
 # .4 ~ to 26%
 # .5 ~ to 27%

 times_immigration_seen <- 3

 power_grid <- foreach(N=N_test, 
                      .combine = cbind) %:%
  foreach(lift = lift_test, 
          .combine = c) %dopar% {
            result <- replicate(
              n = runs,
              expr = {
                
                # number of rows of data in each case
                nn <- N * times_immigration_seen
                
                #### BASE ####
                
                # this is the immigration variable in the non-treatment case
                immigration_base <- rnorm(nn)
                
                # all other coefficients summed together will be 
                # sum of normally distributed variables
                # then we have to take the sumexp of them to fit into the softmax function
                other_covars_base <- 1:nn %>% 
                  map(~ rnorm(4)) %>% 
                  map(~ exp(.x)) %>%
                  map(~ reduce(.x, sum)) %>% 
                  unlist
                
                prob_base <- exp(immigration_base)/(exp(immigration_base) + other_covars_base)
                
                #### TREATMENT ####
                
                immigration_treatment <- rnorm(nn) + lift * rnorm(nn, mean = 1)
                
                other_covars_treatment <- 1:nn %>% 
                  map(~ rnorm(4)) %>% 
                  map(~ exp(.x)) %>%
                  map(~ reduce(.x, sum)) %>% 
                  unlist
                
                prob_treatment <- exp(immigration_treatment)/(exp(immigration_treatment) + other_covars_treatment)
                
                
                #### Diagnostics ####
                mean(prob_treatment)
                mean(prob_base)
                mean(prob_treatment) / mean(prob_base)
                
                #### concatenating the two cases ####
                
                prob <- c(prob_base, prob_treatment)
                
                # immigration_utils <- c(immigration_base, immigration_treatment)
                
                # this is our indicator variable
                xtest <- c(rep(0, nn), rep(1, nn))
                
                # this gets us our dichotomous outcome variables
                runis <- runif(nn*2,0,1)
                ytest <- ifelse(runis < prob,1,0)
                
                # build the model
                model <- glm(ytest ~ xtest, family = "binomial")
                
                # extract relevant coefficient and see its p-value
                summary(model)$coefficients[2,4] < .05
              }
            )
            
            power = sum(result) / runs
          }

 colnames(power_grid) <- N_test
 rownames(power_grid) <- c(
  "20 to 22%",
  "to 24.5%",
  "to 26%",
  "to 27%"
 )
	library(foreach)
	library(doParallel)
	library(tidyverse)
	library(magrittr)

	registerDoParallel(40)

	runs <- 100

	N_test <- c(250, 300, 350, 400, 500, 600) # sample in each group
	lift_test <- c(.2, .3, .4, .5)
	# .2 ~ raising average prob from 20% to 22.5%
	# .3 ~ to 24.4%
	# .4 ~ to 26%
	# .5 ~ to 27%

	times_immigration_seen <- 3

	power_grid <- foreach(N=N_test,
	.combine = cbind) %:%
	foreach(lift = lift_test,
	.combine = c) %dopar% {
	result <- replicate(
	n = runs,
	expr = {

	# number of rows of data in each case
	nn <- N * times_immigration_seen

	#### BASE ####

	# this is the immigration variable in the non-treatment case
	immigration_base <- rnorm(nn)

	# all other coefficients summed together will be
	# sum of normally distributed variables
	# then we have to take the sumexp of them to fit into the softmax function
	other_covars_base <- 1:nn %>%
	map(~ rnorm(4)) %>%
	map(~ exp(.x)) %>%
	map(~ reduce(.x, sum)) %>%
	unlist

	prob_base <- exp(immigration_base)/(exp(immigration_base) + other_covars_base)

	#### TREATMENT ####

	immigration_treatment <- rnorm(nn) + lift * rnorm(nn, mean = 1)

	other_covars_treatment <- 1:nn %>%
	map(~ rnorm(4)) %>%
	map(~ exp(.x)) %>%
	map(~ reduce(.x, sum)) %>%
	unlist

	prob_treatment <- exp(immigration_treatment)/(exp(immigration_treatment) + other_covars_treatment)


	#### Diagnostics ####
	mean(prob_treatment)
	mean(prob_base)
	mean(prob_treatment) / mean(prob_base)

	#### concatenating the two cases ####

	prob <- c(prob_base, prob_treatment)

	# immigration_utils <- c(immigration_base, immigration_treatment)

	# this is our indicator variable
	xtest <- c(rep(0, nn), rep(1, nn))

	# this gets us our dichotomous outcome variables
	runis <- runif(nn*2,0,1)
	ytest <- ifelse(runis < prob,1,0)

	# build the model
	model <- glm(ytest ~ xtest, family = "binomial")

	# extract relevant coefficient and see its p-value
	summary(model)$coefficients[2,4] < .05
	}
	)

	power = sum(result) / runs
	}

	colnames(power_grid) <- N_test
	rownames(power_grid) <- c(
	"20 to 22%",
	"to 24.5%",
	"to 26%",
	"to 27%"
	)