Skip to content

Instantly share code, notes, and snippets.

@Dpananos
Created August 22, 2024 14:41
Show Gist options
  • Save Dpananos/85f53e90377347b693897f5a76cdc90a to your computer and use it in GitHub Desktop.
Save Dpananos/85f53e90377347b693897f5a76cdc90a to your computer and use it in GitHub Desktop.
Simulations of including instruments in a propensity score model
library(WeightIt)
library(tidyverse)
library(furrr)
plan(multisession, workers = 10)
sim <- function(n, l1, l2, l3, l4, ate, prop_br, ps_model_formula, simnum, ...) {
withr::with_seed(simnum, {
x1 <- rnorm(n)
x2 <- rnorm(n)
x3 <- rnorm(n)
ps <- plogis( qlogis(prop_br) + l1*x1 + l2*x2)
a <- rbinom(n, 1, ps)
mu <- ate*a + l3*x2 + l4*x3
y <- mu + rnorm(n, 0, 1)
d <- tibble(x1, x2, x3, a, ps, y)
# propensity model
w <- weightit(formula=as.formula(ps_model_formula), data=d, estimand = 'ATE')
lm_weightit(y~a, data=d, weightit = w) %>%
coef %>%
pluck('a') -> est_ate
est_marginal <- lm(y~a, data=d) %>%
coef %>%
pluck('a')
})
tibble(simnum, n, l1, l2, l3, l4, ate, prop_br, ps_model_formula, est_ate, est_marginal)
}
grid <- crossing(n = c(2000),
l1 = seq(0, 3.0, 0.1),
l2 = 0.5,
l3 = 0.5,
l4 = 0.25,
ate = 0.5,
prop_br = c(0.1),
ps_model_formula = c('a ~ x2', 'a ~ x2 + x1', 'a ~ x1 + x2 + x3', 'a ~ x2 + x3'),
simnum = 1:2000)
future_pmap_dfr(grid, sim, .progress = T) %>%
arrow::write_parquet('simulations.parquet')
arrow::read_parquet('simulations.parquet') %>%
group_by(l1, ps_model_formula) %>%
summarise(
bias = mean(est_ate - ate) / ate,
stderr = sd(est_ate-ate) / sqrt(n())
) %>%
mutate(
contains_instrument = grepl('x1', ps_model_formula),
contains_predictor = grepl('x3', ps_model_formula)
) %>%
ggplot(aes(l1, bias, color=contains_instrument)) +
geom_line() +
geom_ribbon(aes(l1, bias, ymax = bias + 2*stderr, ymin = bias - 2*stderr, fill=contains_instrument), alpha = 0.5) +
facet_grid(~contains_predictor, labeller = label_both) +
theme(aspect.ratio = 1/1.61) +
labs(x='Strength of Instrument on Logit Scale',
y = 'Bias Between Estimated and True ATE')
arrow::read_parquet('simulations.parquet') %>%
group_by(l1, ps_model_formula) %>%
summarise(
bias = mean(est_ate - ate) / ate,
sttev = sd(est_ate-ate)
) %>%
mutate(
contains_instrument = grepl('x1', ps_model_formula),
contains_predictor = grepl('x3', ps_model_formula)
) %>%
ggplot(aes(l1, sttev, color=contains_instrument)) +
geom_line() +
facet_grid(~contains_predictor, labeller = label_both) +
theme(aspect.ratio = 1/1.61) +
labs(x='Strength of Instrument on Logit Scale',
y = 'Standard Deviation of Estimate')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment