Dpananos · August 22, 2024 14:41
diff --git a/simulation.R b/simulation.R
 library(WeightIt)
 library(tidyverse)
 library(furrr)

 plan(multisession, workers = 10)

 sim <- function(n, l1, l2, l3, l4, ate, prop_br, ps_model_formula, simnum, ...) {
  

  withr::with_seed(simnum, {
    
    x1 <- rnorm(n)
    x2 <- rnorm(n)
    x3 <- rnorm(n)
    ps <- plogis( qlogis(prop_br) + l1*x1 + l2*x2)
    a <- rbinom(n, 1, ps)
    
    mu <- ate*a + l3*x2 + l4*x3
    y <- mu + rnorm(n, 0, 1)
  
  d <- tibble(x1, x2, x3, a, ps, y)
  
  # propensity model
  w <- weightit(formula=as.formula(ps_model_formula), data=d, estimand = 'ATE')
  
  lm_weightit(y~a, data=d, weightit = w) %>% 
    coef %>% 
    pluck('a') -> est_ate

  est_marginal <- lm(y~a, data=d) %>% 
                   coef %>% 
                   pluck('a')
  

  })

  tibble(simnum, n, l1, l2, l3, l4, ate, prop_br, ps_model_formula, est_ate, est_marginal)
 }

 grid <- crossing(n = c(2000),
         l1 = seq(0, 3.0, 0.1),
         l2 = 0.5,
         l3 = 0.5, 
         l4 = 0.25,
         ate = 0.5,
         prop_br = c(0.1),
         ps_model_formula = c('a ~ x2', 'a ~ x2 + x1', 'a ~ x1 + x2 + x3', 'a ~ x2 + x3'),
         simnum = 1:2000)


 future_pmap_dfr(grid, sim, .progress = T) %>% 
      arrow::write_parquet('simulations.parquet')





 arrow::read_parquet('simulations.parquet') %>% 
  group_by(l1, ps_model_formula) %>% 
  summarise(
    bias = mean(est_ate - ate) / ate,
    stderr = sd(est_ate-ate) / sqrt(n())
  ) %>% 
  mutate(
    contains_instrument = grepl('x1', ps_model_formula),
    contains_predictor = grepl('x3', ps_model_formula)
  ) %>% 
  ggplot(aes(l1, bias, color=contains_instrument)) + 
  geom_line() + 
  geom_ribbon(aes(l1, bias, ymax = bias + 2*stderr, ymin = bias - 2*stderr, fill=contains_instrument), alpha = 0.5) + 
  facet_grid(~contains_predictor, labeller = label_both) + 
  
  theme(aspect.ratio = 1/1.61) + 
  labs(x='Strength of Instrument on Logit Scale', 
       y = 'Bias Between Estimated and True ATE')


 arrow::read_parquet('simulations.parquet') %>% 
  group_by(l1, ps_model_formula) %>% 
  summarise(
    bias = mean(est_ate - ate) / ate,
    sttev = sd(est_ate-ate)
  ) %>% 
  mutate(
    contains_instrument = grepl('x1', ps_model_formula),
    contains_predictor = grepl('x3', ps_model_formula)
  )  %>% 
  ggplot(aes(l1, sttev, color=contains_instrument)) + 
  geom_line() + 
  facet_grid(~contains_predictor, labeller = label_both) + 
  theme(aspect.ratio = 1/1.61) + 
  labs(x='Strength of Instrument on Logit Scale', 
       y = 'Standard Deviation of Estimate')
	library(WeightIt)
	library(tidyverse)
	library(furrr)

	plan(multisession, workers = 10)

	sim <- function(n, l1, l2, l3, l4, ate, prop_br, ps_model_formula, simnum, ...) {


	withr::with_seed(simnum, {

	x1 <- rnorm(n)
	x2 <- rnorm(n)
	x3 <- rnorm(n)
	ps <- plogis( qlogis(prop_br) + l1x1 + l2x2)
	a <- rbinom(n, 1, ps)

	mu <- atea + l3x2 + l4*x3
	y <- mu + rnorm(n, 0, 1)

	d <- tibble(x1, x2, x3, a, ps, y)

	# propensity model
	w <- weightit(formula=as.formula(ps_model_formula), data=d, estimand = 'ATE')

	lm_weightit(y~a, data=d, weightit = w) %>%
	coef %>%
	pluck('a') -> est_ate

	est_marginal <- lm(y~a, data=d) %>%
	coef %>%
	pluck('a')


	})

	tibble(simnum, n, l1, l2, l3, l4, ate, prop_br, ps_model_formula, est_ate, est_marginal)
	}

	grid <- crossing(n = c(2000),
	l1 = seq(0, 3.0, 0.1),
	l2 = 0.5,
	l3 = 0.5,
	l4 = 0.25,
	ate = 0.5,
	prop_br = c(0.1),
	ps_model_formula = c('a ~ x2', 'a ~ x2 + x1', 'a ~ x1 + x2 + x3', 'a ~ x2 + x3'),
	simnum = 1:2000)


	future_pmap_dfr(grid, sim, .progress = T) %>%
	arrow::write_parquet('simulations.parquet')





	arrow::read_parquet('simulations.parquet') %>%
	group_by(l1, ps_model_formula) %>%
	summarise(
	bias = mean(est_ate - ate) / ate,
	stderr = sd(est_ate-ate) / sqrt(n())
	) %>%
	mutate(
	contains_instrument = grepl('x1', ps_model_formula),
	contains_predictor = grepl('x3', ps_model_formula)
	) %>%
	ggplot(aes(l1, bias, color=contains_instrument)) +
	geom_line() +
	geom_ribbon(aes(l1, bias, ymax = bias + 2stderr, ymin = bias - 2stderr, fill=contains_instrument), alpha = 0.5) +
	facet_grid(~contains_predictor, labeller = label_both) +

	theme(aspect.ratio = 1/1.61) +
	labs(x='Strength of Instrument on Logit Scale',
	y = 'Bias Between Estimated and True ATE')


	arrow::read_parquet('simulations.parquet') %>%
	group_by(l1, ps_model_formula) %>%
	summarise(
	bias = mean(est_ate - ate) / ate,
	sttev = sd(est_ate-ate)
	) %>%
	mutate(
	contains_instrument = grepl('x1', ps_model_formula),
	contains_predictor = grepl('x3', ps_model_formula)
	) %>%
	ggplot(aes(l1, sttev, color=contains_instrument)) +
	geom_line() +
	facet_grid(~contains_predictor, labeller = label_both) +
	theme(aspect.ratio = 1/1.61) +
	labs(x='Strength of Instrument on Logit Scale',
	y = 'Standard Deviation of Estimate')