rasmusab · August 14, 2017 12:17
diff --git a/the-probability-my-son-will-be-stung-by-a-bumblebee.R b/the-probability-my-son-will-be-stung-by-a-bumblebee.R
 library(tidyverse)
 library(purrr)
 library(rstan)

 ### Defining the data ###
 #########################
 bumblebees <- c(1, 0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 
                0, 0, 1, 0, 0, 0, 0, 0, 0)
 toddler_steps <- c(26, 16, 37, 101, 12, 122, 90, 55, 56, 39, 55, 15, 45, 8)
 full_squares <- 174
 partial_squares <- 251
 squares <- (full_squares + partial_squares) / 2
 foot_cm2 <- squares / 4
 foot_m2 <- foot_cm2 / 100^2


 ### Plotting the priors ###
 ###########################
 cauchy_priors <- 
  data_frame(parameter = c(    "mu_bees", "mu_steps", "precision_steps"), 
             scale     = c( (4 + 4) / 100,     60,          1         ))
 cauchy_priors$x <- map(cauchy_priors$scale, ~ seq(0, .x * 4, length.out = 100))
 cauchy_priors <- unnest(cauchy_priors)
 cauchy_priors$density <- dcauchy(cauchy_priors$x, 0, cauchy_priors$scale)

 ggplot(cauchy_priors, aes(x, density, width = scale / 20, fill = parameter)) +
  geom_col() +
  geom_vline(aes(xintercept = scale), linetype = 2) + 
  facet_wrap(~ parameter, scales = "free") +
  #theme_minimal() +
  theme(legend.position = "none") +
  xlab("The priors (the dashed line shows the median of the half-Cauchy distribution)")

 ### Defining and fitting the model ###
 ######################################
 bee_model_code <- "
 data {
  int n_bumblebee_observations;
  int bumblebees[n_bumblebee_observations];
  int n_toddler_steps_observations;
  int toddler_steps[n_toddler_steps_observations];
  real foot_m2;
 }

 parameters {
  real<lower=0> mu_bees;
  real<lower=0> mu_steps;
  real<lower=0> precision_steps;
 }

 model {
  // Since we have contrained the parameters to be positive we get implicit
  // half-cauchy distributions even if we declare them to be 'full'-cauchy.
  mu_bees ~ cauchy(0, (4.0 + 4.0) / (10 * 10) ); 
  mu_steps ~ cauchy(0, 60);
  precision_steps ~ cauchy(0, 1); 
  bumblebees ~ poisson(mu_bees);
  toddler_steps ~ neg_binomial_2(mu_steps, precision_steps);
 }

 generated quantities {
  int stings_by_minute[60];
  int stings_by_hour;
  int pred_steps;
  real stepped_area;

  for(minute in 1:60) {
    pred_steps =  neg_binomial_2_rng(mu_steps, precision_steps);
    stepped_area = pred_steps * foot_m2;
    stings_by_minute[minute] = poisson_rng(mu_bees * stepped_area);
  }
  stings_by_hour = sum(stings_by_minute);
 }
 "

 data_list <- list(
  n_bumblebee_observations = length(bumblebees),
  bumblebees = bumblebees,
  n_toddler_steps_observations = length(toddler_steps),
  toddler_steps = toddler_steps,
  foot_m2 = foot_m2
 )
 fit <- stan(model_code = bee_model_code, data = data_list)

 ### Looking at the result ###
 #############################

 stan_hist(fit, c("mu_bees", "mu_steps", "precision_steps"))
 print(fit, c("mu_bees", "mu_steps", "precision_steps"))

 # Getting the sample representing the prob. distribution over stings/hour .
 stings_by_hour <- as.data.frame(fit)$stings_by_hour
 # Now actually calculating the prob of 0 stings, 1 stings, 2 stiongs, etc.
 strings_probability <- prop.table( table(stings_by_hour) )
 # Plotin' n' printin'
 barplot(strings_probability, col = "yellow",
        main = "Posterior predictive stings per hour", 
        xlab = "Number of stings", ylab = "Probability")
 round(strings_probability, 2)
	library(tidyverse)
	library(purrr)
	library(rstan)

	### Defining the data ###
	#########################
	bumblebees <- c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
	0, 0, 1, 0, 0, 0, 0, 0, 0)
	toddler_steps <- c(26, 16, 37, 101, 12, 122, 90, 55, 56, 39, 55, 15, 45, 8)
	full_squares <- 174
	partial_squares <- 251
	squares <- (full_squares + partial_squares) / 2
	foot_cm2 <- squares / 4
	foot_m2 <- foot_cm2 / 100^2


	### Plotting the priors ###
	###########################
	cauchy_priors <-
	data_frame(parameter = c( "mu_bees", "mu_steps", "precision_steps"),
	scale = c( (4 + 4) / 100, 60, 1 ))
	cauchy_priors$x <- map(cauchy_priors$scale, ~ seq(0, .x * 4, length.out = 100))
	cauchy_priors <- unnest(cauchy_priors)
	cauchy_priors$density <- dcauchy(cauchy_priors$x, 0, cauchy_priors$scale)

	ggplot(cauchy_priors, aes(x, density, width = scale / 20, fill = parameter)) +
	geom_col() +
	geom_vline(aes(xintercept = scale), linetype = 2) +
	facet_wrap(~ parameter, scales = "free") +
	#theme_minimal() +
	theme(legend.position = "none") +
	xlab("The priors (the dashed line shows the median of the half-Cauchy distribution)")

	### Defining and fitting the model ###
	######################################
	bee_model_code <- "
	data {
	int n_bumblebee_observations;
	int bumblebees[n_bumblebee_observations];
	int n_toddler_steps_observations;
	int toddler_steps[n_toddler_steps_observations];
	real foot_m2;
	}

	parameters {
	real<lower=0> mu_bees;
	real<lower=0> mu_steps;
	real<lower=0> precision_steps;
	}

	model {
	// Since we have contrained the parameters to be positive we get implicit
	// half-cauchy distributions even if we declare them to be 'full'-cauchy.
	mu_bees ~ cauchy(0, (4.0 + 4.0) / (10 * 10) );
	mu_steps ~ cauchy(0, 60);
	precision_steps ~ cauchy(0, 1);
	bumblebees ~ poisson(mu_bees);
	toddler_steps ~ neg_binomial_2(mu_steps, precision_steps);
	}

	generated quantities {
	int stings_by_minute[60];
	int stings_by_hour;
	int pred_steps;
	real stepped_area;

	for(minute in 1:60) {
	pred_steps = neg_binomial_2_rng(mu_steps, precision_steps);
	stepped_area = pred_steps * foot_m2;
	stings_by_minute[minute] = poisson_rng(mu_bees * stepped_area);
	}
	stings_by_hour = sum(stings_by_minute);
	}
	"

	data_list <- list(
	n_bumblebee_observations = length(bumblebees),
	bumblebees = bumblebees,
	n_toddler_steps_observations = length(toddler_steps),
	toddler_steps = toddler_steps,
	foot_m2 = foot_m2
	)
	fit <- stan(model_code = bee_model_code, data = data_list)

	### Looking at the result ###
	#############################

	stan_hist(fit, c("mu_bees", "mu_steps", "precision_steps"))
	print(fit, c("mu_bees", "mu_steps", "precision_steps"))

	# Getting the sample representing the prob. distribution over stings/hour .
	stings_by_hour <- as.data.frame(fit)$stings_by_hour
	# Now actually calculating the prob of 0 stings, 1 stings, 2 stiongs, etc.
	strings_probability <- prop.table( table(stings_by_hour) )
	# Plotin' n' printin'
	barplot(strings_probability, col = "yellow",
	main = "Posterior predictive stings per hour",
	xlab = "Number of stings", ylab = "Probability")
	round(strings_probability, 2)