Created
November 7, 2021 16:09
-
-
Save JimGrange/e6a7db041940be5cf383776087cef7c9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(rstan) | |
rstan_options(auto_write = TRUE) | |
#--- declare the data | |
# n_c = total number of people who drink caffeine | |
# n_nc = total number of people who do not drink caffeine | |
# p_c = observed proportion of people who drink caffeine that have a favourite mug | |
# p_nc = observed proportion of people who drink caffeine that DO NOT have a favourite mug | |
stan_data <- list( | |
n_c = 1258, | |
n_nc = 280, | |
p_c = 966, | |
p_nc = 168 | |
) | |
#--- declare the model code in Stan | |
stan_code <- " | |
data{ | |
int n_c; // number of people who drink caffeine | |
int n_nc; // number of people who don't drink caffeine | |
int p_c; // proportion of caffeine drinkers with favourite mug | |
int p_nc; // proportion of caffeine drinkers with no favourite mug | |
} | |
parameters{ | |
real theta_c; // population proportion of caffeine w/fave mug | |
real theta_nc; // population proportion of non-caffeine w/fave mug | |
} | |
model{ // read the model section from the bottom--up! | |
// priors | |
theta_c ~ beta(5, 5); // set the prior for theta_c | |
theta_nc ~ beta(5, 5); // set the prior for theta_nc | |
// likelihood | |
p_c ~ binomial(n_c, theta_c); // our generating model for caffeine | |
p_nc ~ binomial(n_nc, theta_nc); // our generating model for no caffeine | |
} | |
generated quantities { | |
real theta_delta; // difference in theta values | |
theta_delta = theta_c - theta_nc; | |
} | |
" | |
#--- fit the model | |
fit <- stan(model_code = stan_code, | |
data = stan_data, | |
iter = 5000, | |
warmup = 1000, | |
chains = 4, | |
cores = 4, | |
seed = 42) | |
#--- extract & plot the posterior predictions | |
posterior <- as.matrix(fit) | |
posterior <- tibble( | |
theta_c = posterior[, 1], | |
theta_nc = posterior[, 2], | |
theta_delta = posterior[, 3] | |
) %>% | |
select(theta_c, theta_nc, theta_delta) %>% | |
pivot_longer(cols = theta_c:theta_delta, | |
names_to = "parameter") | |
main_plot <- posterior %>% | |
filter(parameter %in% c("theta_c", "theta_nc")) %>% | |
ggplot(aes(x = value, group = parameter)) + | |
geom_density(aes(fill = parameter, | |
colour = parameter), | |
alpha = 0.6) + | |
geom_hline(yintercept = 0) + | |
theme_bw() + | |
labs(x = "theta", y = "Density", | |
title = "Estimates of proportion with favourite mug") + | |
theme(legend.position = "none") + | |
theme(text = element_text(size = 20)) + | |
scale_y_continuous(limits = c(0, 40)) + | |
annotate("text", | |
label = "Drink Caffeine", | |
x = 0.7, | |
y = 35, | |
size = 6) + | |
geom_curve(aes(x = 0.7, | |
y = 33, | |
xend = 0.74, | |
yend = 25), | |
arrow = arrow(length = unit(0.07, "inch")), | |
size = 0.8, | |
color = "gray20", | |
curvature = 0.2) + | |
annotate("text", | |
label = "Don't \nDrink Caffeine", | |
x = 0.55, | |
y = 22, | |
size = 6) + | |
geom_curve(aes(x = 0.55, | |
y = 20, | |
xend = 0.58, | |
yend = 14), | |
arrow = arrow(length = unit(0.07, "inch")), | |
size = 0.8, | |
color = "gray20", | |
curvature = 0.2) | |
difference_plot <- posterior %>% | |
filter(parameter == "theta_delta") %>% | |
ggplot(aes(x = value)) + | |
geom_density(alpha = 0.6, | |
fill = "skyblue", | |
colour = "skyblue") + | |
labs(x = "theta", | |
y = "Density", | |
title = "Estimate of the difference in theta values") + | |
theme_bw() + | |
theme(text = element_text(size = 20)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment