Created
March 20, 2016 22:26
-
-
Save khakieconomics/9dd785c241a1ee0b6f32 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Some fake data | |
library(dplyr); library(rstan) | |
# Write out the data generation with known parameters | |
# Set the number of individuals | |
n_ind <- 50 | |
# Set up the observations | |
my_tidy_data <- expand.grid(individual = 1:n_ind, outcome_measure_type = 1:10) %>% | |
as_data_frame() %>% | |
arrange(individual, outcome_measure_type) | |
# Which individuals get treated? | |
individual_treatment <- data_frame(individual = 1:n_ind, treatment = sample(c(0, 1), n_ind, replace = T)) | |
# individual effects (0 mean) | |
# scale | |
tau1 <- c(0.5, 0.25) | |
# correlation | |
omega1 <- matrix(c(1, -0.2, -0.2, 1), 2, 2) | |
# covariance matrix | |
S1 <- diag(tau1) %*% omega1 %*% diag(tau1) | |
# Put together the individual coefficients | |
individual_treatment <- individual_treatment %>% | |
bind_cols(MASS::mvrnorm(n_ind, mu = c(0, 0), Sigma = S1) %>% as.data.frame) %>% | |
rename(ind_intercept = V1, | |
ind_TE = V2) | |
# Now let's put together the outcome type effects in the same way | |
set.seed(4321) | |
outcome_measure_type_effects <- data_frame(outcome_measure_type = 1:10) | |
# individual effects (0 mean) | |
# scale | |
tau2 <- c(0.5, 1) | |
# correlation | |
omega2 <- matrix(c(1, -0.5, -0.5, 1), 2, 2) | |
# covariance matrix | |
S2 <- diag(tau2) %*% omega2 %*% diag(tau2) | |
# Put together the outcome | |
# We'll set the true treatment effects | |
outcome_measure_type_effects <- outcome_measure_type_effects %>% bind_cols(MASS::mvrnorm(10, mu = c(1,1), Sigma = S2) %>% as.data.frame) %>% | |
rename(outcome_intercept = V1, | |
outcome_TE = V2) | |
my_tidy_data <- left_join(my_tidy_data, individual_treatment) %>% | |
left_join(outcome_measure_type_effects) %>% | |
mutate(outcome_score = ind_intercept + outcome_intercept + (ind_TE + outcome_TE)*treatment + rnorm(n(), 0, 10)) | |
# We know the actual treatment effect is 1, but we get a very noisy estimate. | |
summary(lm(outcome_score~ treatment, data = my_tidy_data)) | |
# What's more, if we run OLS on each group, we're going to get some crazy estimates. | |
library(broom) | |
est_tes_sep_models <- my_tidy_data %>% group_by(outcome_measure_type) %>% | |
do(tidy(lm(outcome_score~ treatment, data = .)))%>% | |
ungroup %>% | |
filter(term == "treatment") | |
est_tes_sep_models | |
# So why don't we estimate a multilevel model in Stan? ==== | |
model_code <- " | |
data { | |
int N; // number of observations | |
int I; // number of individuals | |
int J; // number of outcome types | |
int Ind[N]; // individual identifiers | |
int outcome_measure[N]; // vector of outcome measures (pain, anxiety, etc.) | |
int treatment[N]; // binary indicating treatment | |
vector[N] outcome_score; | |
} | |
parameters { | |
// Regression coefficients | |
vector[2] mu_bj; // outcome intercept and slope | |
// Covariance matrices | |
matrix[I, 2] z1; | |
matrix[J, 2] z2; | |
cholesky_factor_corr[2] L1; | |
cholesky_factor_corr[2] L2; | |
vector<lower = 0>[2] tau1; // Scale of individual shocks | |
vector<lower = 0>[2] tau2; // Scale of outcome type shocks | |
// regression sd | |
real sigma; | |
} | |
transformed parameters { | |
matrix[I, 2] bi; // individual effects | |
matrix[J, 2] bj; // outcome effects | |
bi <- (diag_pre_multiply(tau1, L1)*z1')'; | |
bj <- rep_matrix(to_row_vector(mu_bj), J) + (diag_pre_multiply(tau2, L2)*z2')'; | |
} | |
model { | |
// priors | |
tau1 ~ cauchy(0, 1); | |
tau2 ~ cauchy(0, 1); | |
sigma ~ cauchy(0, 3); | |
L1 ~ lkj_corr_cholesky(2); | |
L2 ~ lkj_corr_cholesky(2); | |
mu_bj ~ normal(0, 5); | |
for(i in 1:I) { | |
z1[i] ~ normal(0, 1); | |
} | |
for(j in 1:J) { | |
z2[j] ~ normal(0, 1); | |
} | |
for(n in 1:N){ | |
outcome_score[n] ~ normal(bi[Ind[n],1] + bj[outcome_measure[n], 1] + (bi[Ind[n], 2] + bj[outcome_measure[n], 2])* treatment[n], sigma); | |
} | |
} | |
" | |
data_list <- list(N = nrow(my_tidy_data), I = n_ind, J = 10, Ind = my_tidy_data$individual, outcome_measure = my_tidy_data$outcome_measure_type, treatment = my_tidy_data$treatment, outcome_score = my_tidy_data$outcome_score) | |
fitted_model2 <- stan(model_code = model_code, data = data_list, cores = 4, iter = 600) | |
# have a look at estimates --------------------------------------------------------------------- | |
# Did it estimate well? | |
shinystan::launch_shinystan(fitted_model2) | |
# Is our treatment effect estimated? | |
library(reshape2); library(ggplot2) | |
extract(fitted_model2, pars = c("mu_bj", "tau1", "tau2"), permuted = F) %>% plyr::adply(2) %>% select(-chains)%>% melt() %>% | |
ggplot(aes(x = value)) + | |
facet_grid(variable~.) + | |
geom_density(fill = "orange", alpha = 1) | |
# Final plot of estimates across each outcome type, actuals, and poorly-estimated effects using OLS on subsets of the data | |
extract(fitted_model2, pars = c("bj"), permuted = F) %>% plyr::adply(2) %>% select(-chains)%>% melt() %>% | |
filter(grepl("2]", variable)) %>% | |
group_by(variable) %>% | |
summarise(median = median(value), | |
lower = quantile(value, 0.025), | |
upper = quantile(value, 0.975)) %>% | |
ggplot(aes(x = variable)) + | |
geom_linerange(aes(ymin = lower, ymax = upper)) + | |
geom_point(aes(y = median)) + | |
geom_point(aes(y = outcome_measure_type_effects$outcome_TE), colour = "red") + | |
geom_point(aes(y = est_tes_sep_models$estimate), colour = "blue") + | |
ggtitle("Estimated treatment effects across different outcomes\nblue = effect from separate models\nred = true effect") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment