twolodzko · March 26, 2018 14:22
diff --git a/bayesian-ab-example.R b/bayesian-ab-example.R

 library(ggplot2)
 set.seed(123)

 # simulated data
 n <- 100
 nx <- ny <- 500
 x <- rbinom(n, nx, 0.22)
 y <- rbinom(n, ny, 0.21)

 plot(x,type = "l", ylim = c(80, 140))
 lines(y, col = "gray")

 # posterior parameters for pi_X
 post_x_a <- 1 + cumsum(x) 
 post_x_b <- 1 + cumsum(nx - x) 

 # posterior parameters for pi_Y
 post_y_a <- 1 + cumsum(y) 
 post_y_b <- 1 + cumsum(ny - y) 

 post_df <- data.frame(
  # posterior mean and quantiles for pi_X
  x_mu = post_x_a / (post_x_a + post_x_b),
  x_ymin = qbeta(0.025, post_x_a, post_x_b),
  x_ymax = qbeta(0.975, post_x_a, post_x_b),
  # posterior mean and quantiles for pi_Y
  y_mu = post_y_a / (post_y_a + post_y_b),
  y_ymin = qbeta(0.025, post_y_a, post_y_b),
  y_ymax = qbeta(0.975, post_y_a, post_y_b)
 )

 ggplot(aes(y = x_mu, x = 1:n), data = post_df) +
  geom_line(color = "blue") +
  geom_ribbon(aes(
    ymin = x_ymin,
    ymax = x_ymax
  ), alpha = 0.3, fill = "lightblue") +
  geom_line(aes(y = y_mu, x = 1:n), color = "red") +
  geom_ribbon(aes(
    ymin = y_ymin,
    ymax = y_ymax
  ), alpha = 0.3, fill = "red") +
  xlab("") + ylab(expression(list(pi[X], pi[Y]))) +
  theme_minimal()

 # m=50000 random draws from the posterior distribution
 m <- 50000
 post_x_sim <- matrix(rbeta(n*m, post_x_a, post_x_b), nrow = m, byrow = TRUE)
 post_y_sim <- matrix(rbeta(n*m, post_y_a, post_y_b), nrow = m, byrow = TRUE)
 # posterior distribution of d = pi_X - pi_y
 post_xy_diff <- post_x_sim - post_y_sim
 post_sim_df <- data.frame(
  # posterior mean
  diff_mu = colMeans(post_xy_diff),
  # posterior quantiles
  diff_ymin = apply(post_xy_diff, 2, function(x) quantile(x, probs = 0.025)),
  diff_ymax = apply(post_xy_diff, 2, function(x) quantile(x, probs = 0.975))
 )

 ggplot(aes(y = diff_mu, x = 1:n), data = post_sim_df) +
  geom_line(color = "darkgray") +
  geom_ribbon(aes(
    ymin = diff_ymin,
    ymax = diff_ymax
  ), alpha = 0.3, fill = "lightgray") +
  xlab("") + ylab(expression(d == pi[X] - pi[Y])) +
  theme_minimal()


 library(extraDistr)

 m <- 50000
 post_x_sim <- matrix(rbbinom(n*m, n, post_x_a, post_x_b), nrow = m, byrow = TRUE)
 post_y_sim <- matrix(rbbinom(n*m, n, post_y_a, post_y_b), nrow = m, byrow = TRUE)
 post_xy_diff <- post_x_sim - post_y_sim
 post_sim_df <- data.frame(
  # posterior mean
  diff_mu = colMeans(post_xy_diff),
  # posterior quantiles
  diff_ymin = apply(post_xy_diff, 2, function(x) quantile(x, probs = 0.025)),
  diff_ymax = apply(post_xy_diff, 2, function(x) quantile(x, probs = 0.975)),
  # posterior probability of pi_X > pi_Y
  diff_prob = apply(post_xy_diff, 2, function(x) mean(x > 0))
 )

 ggplot(aes(y = diff_prob, x = 1:n), data = post_sim_df) +
  geom_line(color = "darkgray") +
  xlab("") + ylab(expression(Pr(X > Y))) +
  theme_minimal()

	library(ggplot2)
	set.seed(123)

	# simulated data
	n <- 100
	nx <- ny <- 500
	x <- rbinom(n, nx, 0.22)
	y <- rbinom(n, ny, 0.21)

	plot(x,type = "l", ylim = c(80, 140))
	lines(y, col = "gray")

	# posterior parameters for pi_X
	post_x_a <- 1 + cumsum(x)
	post_x_b <- 1 + cumsum(nx - x)

	# posterior parameters for pi_Y
	post_y_a <- 1 + cumsum(y)
	post_y_b <- 1 + cumsum(ny - y)

	post_df <- data.frame(
	# posterior mean and quantiles for pi_X
	x_mu = post_x_a / (post_x_a + post_x_b),
	x_ymin = qbeta(0.025, post_x_a, post_x_b),
	x_ymax = qbeta(0.975, post_x_a, post_x_b),
	# posterior mean and quantiles for pi_Y
	y_mu = post_y_a / (post_y_a + post_y_b),
	y_ymin = qbeta(0.025, post_y_a, post_y_b),
	y_ymax = qbeta(0.975, post_y_a, post_y_b)
	)

	ggplot(aes(y = x_mu, x = 1:n), data = post_df) +
	geom_line(color = "blue") +
	geom_ribbon(aes(
	ymin = x_ymin,
	ymax = x_ymax
	), alpha = 0.3, fill = "lightblue") +
	geom_line(aes(y = y_mu, x = 1:n), color = "red") +
	geom_ribbon(aes(
	ymin = y_ymin,
	ymax = y_ymax
	), alpha = 0.3, fill = "red") +
	xlab("") + ylab(expression(list(pi[X], pi[Y]))) +
	theme_minimal()

	# m=50000 random draws from the posterior distribution
	m <- 50000
	post_x_sim <- matrix(rbeta(n*m, post_x_a, post_x_b), nrow = m, byrow = TRUE)
	post_y_sim <- matrix(rbeta(n*m, post_y_a, post_y_b), nrow = m, byrow = TRUE)
	# posterior distribution of d = pi_X - pi_y
	post_xy_diff <- post_x_sim - post_y_sim
	post_sim_df <- data.frame(
	# posterior mean
	diff_mu = colMeans(post_xy_diff),
	# posterior quantiles
	diff_ymin = apply(post_xy_diff, 2, function(x) quantile(x, probs = 0.025)),
	diff_ymax = apply(post_xy_diff, 2, function(x) quantile(x, probs = 0.975))
	)

	ggplot(aes(y = diff_mu, x = 1:n), data = post_sim_df) +
	geom_line(color = "darkgray") +
	geom_ribbon(aes(
	ymin = diff_ymin,
	ymax = diff_ymax
	), alpha = 0.3, fill = "lightgray") +
	xlab("") + ylab(expression(d == pi[X] - pi[Y])) +
	theme_minimal()


	library(extraDistr)

	m <- 50000
	post_x_sim <- matrix(rbbinom(n*m, n, post_x_a, post_x_b), nrow = m, byrow = TRUE)
	post_y_sim <- matrix(rbbinom(n*m, n, post_y_a, post_y_b), nrow = m, byrow = TRUE)
	post_xy_diff <- post_x_sim - post_y_sim
	post_sim_df <- data.frame(
	# posterior mean
	diff_mu = colMeans(post_xy_diff),
	# posterior quantiles
	diff_ymin = apply(post_xy_diff, 2, function(x) quantile(x, probs = 0.025)),
	diff_ymax = apply(post_xy_diff, 2, function(x) quantile(x, probs = 0.975)),
	# posterior probability of pi_X > pi_Y
	diff_prob = apply(post_xy_diff, 2, function(x) mean(x > 0))
	)

	ggplot(aes(y = diff_prob, x = 1:n), data = post_sim_df) +
	geom_line(color = "darkgray") +
	xlab("") + ylab(expression(Pr(X > Y))) +
	theme_minimal()
No results found