Skip to content

Instantly share code, notes, and snippets.

@cbrown5
Last active May 7, 2024 11:27
Show Gist options
  • Save cbrown5/484564c24383fd20fbe0ef76bd4fd97c to your computer and use it in GitHub Desktop.
Save cbrown5/484564c24383fd20fbe0ef76bd4fd97c to your computer and use it in GitHub Desktop.
Simplified simulation of credible intervals (empirical bayes) in mgcv
#Functions to simulate predictions and CIs from a GAM
# CJ Brown
# 2023-03-24
#
# based on code in ?gam.predict and Wood, S.N. (2017) Generalized Additive Models: An
# Introduction with R (2nd edition). Chapman and
# Hall/CRC.
library(lazyeval)
rmvn <- function(n,mu,sig) { ## MVN random deviates
L <- mroot(sig);m <- ncol(L);
t(mu + L%*%matrix(rnorm(m*n),m,n))
}
simulate_gam_CIs <- function(model,
newdata,
forms = list(~x),
random_var = NULL,
offset = 0,
probs = c(0.025, 0.5, 0.975),
nsims = 1000,
func_to_apply = rep("quantile", length(forms))){
#Function to simulate credible intervals for an mgcv GAM
#
#Inputs:
# model: gam object
# newdata: data frame you want to predict too
# forms: list of formulas to evaluate on the linear predictor
# linear predictor is named 'x' internally.
# Can optionally be named list.
# Variables in the global environment can be
# used in the formula
# random_var: optional name variables you
# want to condition on, usually random effects
# you want to ignore/set to zero
# Can name multiple variables here as character vector.
# names must match variable names as they will appear
# in the lpmatrix. Beware duplicate matches (using grepl here)
# offset: numeric. offset to add back into predictions
# probs: probability quantiles for CIs
# func_to_apply: defaults to quantiles, other option is to sum
# provide a character with same length of forms to
# specify summary function
#
# Note: This function has been tested on guassian, poisson and negbin families
# It won't work for location-scale models (e.g. gaulss family).
#Output: returns a list of dataframes, were each dataframe
# is prediction intervals for each formula.
#
#Output: returns a list of dataframes, were each dataframe
# is prediction intervals for each formula.
nforms <- length(forms)
#Predicting
Xp <- predict(model, newdata = newdata, type="lpmatrix")
if (!is.null(random_var)){
for (ivar in random_var){
icol_rand <- grepl(ivar, colnames(Xp))
Xp[,icol_rand] <- 0
}
}
#simulate draws
br <- rmvn(nsims, coef(model), model$Vp)
#Setup some vectors/matrices to store random draws of
# predictions
effects <- lapply(1:nforms,
matrix,
ncol = nsims,
nrow = nrow(newdata),
nsims)
for (i in 1:nsims){
x <- Xp %*% br[i,] + offset ## replicate predictions
#adding offset back in. This only matters for mean estimates
#not important for the differences (sience the offset will cancel out)
for (iforms in 1:nforms){
effects[[iforms]][,i] <- f_eval(forms[[iforms]],
data = list(x=x))
}
}
# Compute CIs for each formula and cbind to original dataframe
effects2 <- lapply(1:nforms, function(i){
if (func_to_apply[i] == "quantile"){
ztemp <- data.frame(t(apply(effects[[i]], 1, quantile, probs = probs)))
} else {
ztemp <- data.frame(prob = apply(effects[[i]], 1, sum))/nsims
}
ztemp <- setNames(ztemp, paste0(names(forms)[[i]], names(ztemp)))
cbind(newdata, ztemp)
})
names(effects2) <- names(forms)
return(effects2)
}
### EXAMPLE
#
# m3.1 <- gam(Shoot_density ~ s(Week, by = Block, k = k_val) +
# Treatment + s(Week, by = Treatment, k = k_val) +
# s(Block, bs = "re") + offset(ln_Day0_density),
# family = "poisson", data = dat)
#
# #
# # Create new dataframe for predictions
# #
#
# newd <- with(dat, data.frame(Week = 6,
# Block = 1,
# Treatment = unique(Treatment),
# ln_Day0_density = mean(ln_Day0_density))
# )
#
# newd$pred <- predict(m3.1, newdata = newd, type = "response")
# newd
#
#
# #row IDs for treatments (so we don't mix them up)
# icontrol <- which(newd$Treatment == "Control")
# istatic <- which(newd$Treatment == "StaticStatic")
#
#
# #Predictions with credible intervals
# # A list of formulas to evaluate sample by sample
# # the function then takes quantiles over these outcomes
# # use names if you want the output dataframes and
# # columns for CIs to be named.
#
# forms <- c(effects = ~exp(x),
# control_mult = ~ exp(x - x[icontrol]),
# static_mult ~ exp(x - x[istatic]),
# prob_control = ~ x<x[icontrol],
# prob_static = ~ x<x[istatic])
#
# gout <- simulate_gam_CIs(m3.1,
# newdata = newd,
# forms = forms,
# random_var = "Block",
# offset = mean(dat$ln_Day0_density),
# probs = c(0.025, 0.5, 0.975),
# nsims = 1000)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment