bayesball · July 18, 2024 14:44
diff --git a/prediction_normal.R b/prediction_normal.R
 prediction_normal <- function(S, games, f_games){
  # data frame S contains batter, BIP, HR for
  # first part of season
  # games - average number of games already played
  # f_games - number of future games played
  
  # need to have the JAGS software installed
  # https://mcmc-jags.sourceforge.io/
  
  library(readr)
  library(dplyr)
  library(ggplot2)
  library(runjags)
  library(coda)
  library(purrr)
  library(tidyr)

  JAGS_data <- list(N = nrow(S),
                  HR = S$HR,
                  BIP = S$BIP)

  modelString <-"
  model {
  ## sampling
  for (i in 1:N){
     HR[i] ~ dbin(p[i], BIP[i])
     logit(p[i]) <- alpha[i]
     alpha[i] ~ dnorm(theta, invtau2)
  }
  theta ~ dnorm(0, 0.001)
  invtau2 <- 1 / (tau * tau)
  tau ~ dt(0, 1, 1) T(0, )
  }
  "
  
  # run JAGS, collecting 5000 draws from posterior
  posterior <- run.jags(modelString,
                      n.chains = 1,
                      data = JAGS_data,
                      monitor = c("theta", "alpha", "tau"),
                      adapt = 1000,
                      burnin = 2000,
                      sample = 5000)

  # obtain data frame of simulated draws
  posterior %>% as.mcmc() %>% 
  as.data.frame() -> post2
  
  # implements one posterior predictive simulation
  one_sim <- function(j, BIP){
    invlogit <- function(y){
      exp(y) / (1 + exp(y))
    }
    alpha <- post2[j, 2:(JAGS_data$N - 1)]
    p_sim <- unlist(invlogit(alpha))
    data.frame(Sim = j,
               batter = S$batter,
               HR = rbinom(JAGS_data$N, size = BIP, 
                   prob = p_sim))
  }

  # figure out future BIP
  S |> 
    mutate(fBIP = round(BIP * f_games / games)) -> S

  #  simulate 500 from the posterior predictive distribution
  HR_future <- map(sample(1:5000, 500), one_sim, 
                 S$fBIP) |> 
    list_rbind()

  #  add known HR counts
  inner_join(HR_future, S, by = "batter")  |> 
    mutate(HR_final = HR.x + HR.y) -> HR_future

  # interested in (1) max HR, (2) # >= 40, (3) # >= 30, 
  # (4) # >= 20
  HR_future |> 
    group_by(Sim) |> 
    summarize(HR_Leading = max(HR_final),
              HR_Count_GE_20 = sum(HR_final >= 20),
              HR_Count_GE_30 = sum(HR_final >= 30),
              HR_Count_GE_40 = sum(HR_final >= 40) ) -> S_future

  # pivot longer
  S_future |> 
    pivot_longer(
      cols = starts_with("HR"),
      names_to = "Type",
      values_to = "Measure"
    ) -> S_graph

  # interval estimates
  S_graph |> 
    group_by(Type) |> 
    summarize(LO = quantile(Measure, 0.05),
            HI = quantile(Measure, 0.95),
            Prob = mean(Measure >= LO & Measure <= HI)) ->
    interval_estimates

  # display of prediction distributions
  myplot <- ggplot(S_graph, aes(Measure)) +
    geom_density(color = "red") +
    facet_wrap(~ Type, scales = "free") +
    labs(title = "Predictions of Final Season Totals") +
    theme(text=element_text(size=18)) +
    theme(plot.title = element_text(colour = "blue", size = 18,
                  hjust = 0.5, vjust = 0.8, angle = 0))
    
    # output the interval estimates and the plot
    list(interval_estimates = interval_estimates,
         plot = myplot)
 }
	prediction_normal <- function(S, games, f_games){
	# data frame S contains batter, BIP, HR for
	# first part of season
	# games - average number of games already played
	# f_games - number of future games played

	# need to have the JAGS software installed
	# https://mcmc-jags.sourceforge.io/

	library(readr)
	library(dplyr)
	library(ggplot2)
	library(runjags)
	library(coda)
	library(purrr)
	library(tidyr)

	JAGS_data <- list(N = nrow(S),
	HR = S$HR,
	BIP = S$BIP)

	modelString <-"
	model {
	## sampling
	for (i in 1:N){
	HR[i] ~ dbin(p[i], BIP[i])
	logit(p[i]) <- alpha[i]
	alpha[i] ~ dnorm(theta, invtau2)
	}
	theta ~ dnorm(0, 0.001)
	invtau2 <- 1 / (tau * tau)
	tau ~ dt(0, 1, 1) T(0, )
	}
	"

	# run JAGS, collecting 5000 draws from posterior
	posterior <- run.jags(modelString,
	n.chains = 1,
	data = JAGS_data,
	monitor = c("theta", "alpha", "tau"),
	adapt = 1000,
	burnin = 2000,
	sample = 5000)

	# obtain data frame of simulated draws
	posterior %>% as.mcmc() %>%
	as.data.frame() -> post2

	# implements one posterior predictive simulation
	one_sim <- function(j, BIP){
	invlogit <- function(y){
	exp(y) / (1 + exp(y))
	}
	alpha <- post2[j, 2:(JAGS_data$N - 1)]
	p_sim <- unlist(invlogit(alpha))
	data.frame(Sim = j,
	batter = S$batter,
	HR = rbinom(JAGS_data$N, size = BIP,
	prob = p_sim))
	}

	# figure out future BIP
	S \|>
	mutate(fBIP = round(BIP * f_games / games)) -> S

	# simulate 500 from the posterior predictive distribution
	HR_future <- map(sample(1:5000, 500), one_sim,
	S$fBIP) \|>
	list_rbind()

	# add known HR counts
	inner_join(HR_future, S, by = "batter") \|>
	mutate(HR_final = HR.x + HR.y) -> HR_future

	# interested in (1) max HR, (2) # >= 40, (3) # >= 30,
	# (4) # >= 20
	HR_future \|>
	group_by(Sim) \|>
	summarize(HR_Leading = max(HR_final),
	HR_Count_GE_20 = sum(HR_final >= 20),
	HR_Count_GE_30 = sum(HR_final >= 30),
	HR_Count_GE_40 = sum(HR_final >= 40) ) -> S_future

	# pivot longer
	S_future \|>
	pivot_longer(
	cols = starts_with("HR"),
	names_to = "Type",
	values_to = "Measure"
	) -> S_graph

	# interval estimates
	S_graph \|>
	group_by(Type) \|>
	summarize(LO = quantile(Measure, 0.05),
	HI = quantile(Measure, 0.95),
	Prob = mean(Measure >= LO & Measure <= HI)) ->
	interval_estimates

	# display of prediction distributions
	myplot <- ggplot(S_graph, aes(Measure)) +
	geom_density(color = "red") +
	facet_wrap(~ Type, scales = "free") +
	labs(title = "Predictions of Final Season Totals") +
	theme(text=element_text(size=18)) +
	theme(plot.title = element_text(colour = "blue", size = 18,
	hjust = 0.5, vjust = 0.8, angle = 0))

	# output the interval estimates and the plot
	list(interval_estimates = interval_estimates,
	plot = myplot)
	}