-
-
Save RobertMyles/7ec75cf021a9690aac16f56f256f89d7 to your computer and use it in GitHub Desktop.
Code for a ggplot2/gganimate version of Cory's Bayesian updating animation: https://gist.github.com/cjbayesian/3373348
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# packages | |
library("ggplot2") | |
# devtools::install_github("dgrtwo/gganimate") | |
library("gganimate") | |
library("dplyr") | |
# set distribution parameters for simulation | |
p = 0.5 | |
N = 100 | |
y_lim = 20 | |
a_a = 2 | |
a_b = 10 | |
b_a = 8 | |
b_b = 3 | |
# set the sequence for curve to solve over | |
x = seq(0,1,length.out = 101) # deafult for curve() | |
# simulated outcome | |
outcomes <- sample(1:0,N,prob=c(p,1-p),replace=TRUE) | |
# successes | |
success <- cumsum(outcomes) | |
# failures, not used, but could be part of ggplot2::annotate | |
failures <- cumsum(ifelse(outcomes == 1, 0, 1)) | |
# fit distirbution for starting point | |
obsA <- data.frame(x = x, y = dbeta(x,a_a,a_b), sim_dist = 0, model = "Observer A") | |
obsB <- data.frame(x = x, y = dbeta(x,b_a,b_b), sim_dist = 0, model = "Observer B") | |
# dim variables to hold distributions | |
beta1 <- NULL | |
beta2 <- NULL | |
# loop over N to create alternate parametrization of beta distributions for each of N | |
for(sim_dist in 1:N){ | |
# fit a distirbution based on success at each step in sim_dist | |
beta1_df <- data.frame(x = x, | |
y = dbeta(x,a_a+success[sim_dist]+1,a_b+(sim_dist-success[sim_dist])+1), | |
model = "Beta 1", | |
sim_dist = sim_dist) | |
# bind to previous loop | |
beta1 <- rbind(beta1, beta1_df) | |
# fit another dist based on alternate parameters for same success | |
beta2_df <- data.frame(x = x, | |
y = dbeta(x,b_a+success[sim_dist]+1,b_b+(sim_dist-success[sim_dist])+1), | |
model = "Beta 2", | |
sim_dist = sim_dist) | |
# bind with previosu loop | |
beta2 <- rbind(beta2, beta2_df) | |
} | |
## Start data manipulation for plotting | |
# key here is that the entire sequence of both beta1/beta2 for each of N are | |
# repeated N times. This adds up fast!!! | |
# the point is to allow for transparency (alpha). | |
# at each "frame" (here = epochs) in the animation, the entire set of densities are drawn | |
# but the alpha for lines of future epochs are set to 0. | |
# This works, but it is not elegant and makes for very large data.frames | |
# probably a better approach out there. | |
# bind the simulated densities | |
plot_dat <- rbind(beta1,beta2) | |
# repate the sequence N times | |
plot_dat2 <- do.call("rbind", replicate(N, plot_dat, simplify = FALSE)) | |
# add the epoch label that acts as the animation frames | |
plot_dat2$epoch <- rep(c(1:N), each = (nrow(plot_dat))) | |
# add successes and failures. supposed to be for plotting, but | |
# using anotation with gganimate is really slow. I bailed on it. | |
plot_dat2$Successes <- rep(success, each = (nrow(plot_dat))) | |
plot_dat2$Failures <- rep(failures, each = (nrow(plot_dat))) | |
# format final plot data | |
# for each epoch, do the following... | |
plot_dat2 <- group_by(plot_dat2, epoch) %>% | |
# calculate the distance between the sim_dist index and the epoch | |
# where sim_dist == epoch is the current density | |
mutate(dist = abs(sim_dist - epoch), | |
# distance function from epoch to sim_dist to set vis to the desired alpha | |
# stole this from @drob, http://varianceexplained.org/files/bandwidth.html | |
vis = ((1 - (dist / max(dist))) ^ 3) ^ 3, | |
# set the current line (sim_dist) to vis = 1 | |
vis = ifelse(sim_dist == epoch, 1, vis), | |
# zero out epoch > sim_dist (in the future) | |
vis = ifelse(sim_dist > epoch, 0, vis)) %>% | |
ungroup() | |
# the ggplot + gganimate | |
p3 <- ggplot() + | |
geom_line(data = plot_dat2, | |
aes(x = x, y = y, color = model, | |
group = interaction(model, sim_dist), | |
sim_dist = epoch, alpha = vis)) + | |
scale_alpha(aes(vis), range = c(0,1)) + | |
geom_line(data = obsA, aes(x = x, y = y, color = model), | |
alpha = 0.8, linetype = 2) + | |
geom_line(data = obsB, aes(x = x, y = y, color = model), | |
alpha = 0.8, linetype = 2) + | |
geom_hline(aes(color = "True P", yintercept = 0), size = 0) + | |
geom_vline(color = "gray50", xintercept = 0.5, linetype = 2, size = 0.5) + | |
scale_color_manual(breaks = c("Observer A", "Observer B", "True P"), | |
values = c("orange", "purple", "orange", "purple", "gray50")) + | |
scale_x_continuous(breaks = seq(0,1,0.2), labels = seq(0,1,0.2), limits = c(0,1)) + | |
guides(model = "legend", alpha = FALSE) + | |
# xlim(c(0,1)) + | |
ylim(c(0,y_lim)) + | |
ylab("Posterior Density") + | |
xlab("p")+ | |
theme_bw() + | |
theme(legend.position = c(0, 1), | |
legend.justification = c(0, 1), | |
legend.background = element_rect(colour = "black", fill = "white", size = 0.2), | |
legend.title=element_blank(), | |
panel.grid.major = element_blank(), | |
panel.grid.minor = element_blank(), | |
panel.border = element_rect(colour = "black", size = 0.2)) | |
gg_animate(p3, file = 'beta_dist1.gif', interval = 0.075, ani.width=400, ani.height=400, title_sim_dist = FALSE) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi Robert, this code is using the old API. Would you mind uploading a new one?