Last active
May 7, 2024 11:27
-
-
Save cbrown5/484564c24383fd20fbe0ef76bd4fd97c to your computer and use it in GitHub Desktop.
Simplified simulation of credible intervals (empirical bayes) in mgcv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Functions to simulate predictions and CIs from a GAM | |
# CJ Brown | |
# 2023-03-24 | |
# | |
# based on code in ?gam.predict and Wood, S.N. (2017) Generalized Additive Models: An | |
# Introduction with R (2nd edition). Chapman and | |
# Hall/CRC. | |
library(lazyeval) | |
rmvn <- function(n,mu,sig) { ## MVN random deviates | |
L <- mroot(sig);m <- ncol(L); | |
t(mu + L%*%matrix(rnorm(m*n),m,n)) | |
} | |
simulate_gam_CIs <- function(model, | |
newdata, | |
forms = list(~x), | |
random_var = NULL, | |
offset = 0, | |
probs = c(0.025, 0.5, 0.975), | |
nsims = 1000, | |
func_to_apply = rep("quantile", length(forms))){ | |
#Function to simulate credible intervals for an mgcv GAM | |
# | |
#Inputs: | |
# model: gam object | |
# newdata: data frame you want to predict too | |
# forms: list of formulas to evaluate on the linear predictor | |
# linear predictor is named 'x' internally. | |
# Can optionally be named list. | |
# Variables in the global environment can be | |
# used in the formula | |
# random_var: optional name variables you | |
# want to condition on, usually random effects | |
# you want to ignore/set to zero | |
# Can name multiple variables here as character vector. | |
# names must match variable names as they will appear | |
# in the lpmatrix. Beware duplicate matches (using grepl here) | |
# offset: numeric. offset to add back into predictions | |
# probs: probability quantiles for CIs | |
# func_to_apply: defaults to quantiles, other option is to sum | |
# provide a character with same length of forms to | |
# specify summary function | |
# | |
# Note: This function has been tested on guassian, poisson and negbin families | |
# It won't work for location-scale models (e.g. gaulss family). | |
#Output: returns a list of dataframes, were each dataframe | |
# is prediction intervals for each formula. | |
# | |
#Output: returns a list of dataframes, were each dataframe | |
# is prediction intervals for each formula. | |
nforms <- length(forms) | |
#Predicting | |
Xp <- predict(model, newdata = newdata, type="lpmatrix") | |
if (!is.null(random_var)){ | |
for (ivar in random_var){ | |
icol_rand <- grepl(ivar, colnames(Xp)) | |
Xp[,icol_rand] <- 0 | |
} | |
} | |
#simulate draws | |
br <- rmvn(nsims, coef(model), model$Vp) | |
#Setup some vectors/matrices to store random draws of | |
# predictions | |
effects <- lapply(1:nforms, | |
matrix, | |
ncol = nsims, | |
nrow = nrow(newdata), | |
nsims) | |
for (i in 1:nsims){ | |
x <- Xp %*% br[i,] + offset ## replicate predictions | |
#adding offset back in. This only matters for mean estimates | |
#not important for the differences (sience the offset will cancel out) | |
for (iforms in 1:nforms){ | |
effects[[iforms]][,i] <- f_eval(forms[[iforms]], | |
data = list(x=x)) | |
} | |
} | |
# Compute CIs for each formula and cbind to original dataframe | |
effects2 <- lapply(1:nforms, function(i){ | |
if (func_to_apply[i] == "quantile"){ | |
ztemp <- data.frame(t(apply(effects[[i]], 1, quantile, probs = probs))) | |
} else { | |
ztemp <- data.frame(prob = apply(effects[[i]], 1, sum))/nsims | |
} | |
ztemp <- setNames(ztemp, paste0(names(forms)[[i]], names(ztemp))) | |
cbind(newdata, ztemp) | |
}) | |
names(effects2) <- names(forms) | |
return(effects2) | |
} | |
### EXAMPLE | |
# | |
# m3.1 <- gam(Shoot_density ~ s(Week, by = Block, k = k_val) + | |
# Treatment + s(Week, by = Treatment, k = k_val) + | |
# s(Block, bs = "re") + offset(ln_Day0_density), | |
# family = "poisson", data = dat) | |
# | |
# # | |
# # Create new dataframe for predictions | |
# # | |
# | |
# newd <- with(dat, data.frame(Week = 6, | |
# Block = 1, | |
# Treatment = unique(Treatment), | |
# ln_Day0_density = mean(ln_Day0_density)) | |
# ) | |
# | |
# newd$pred <- predict(m3.1, newdata = newd, type = "response") | |
# newd | |
# | |
# | |
# #row IDs for treatments (so we don't mix them up) | |
# icontrol <- which(newd$Treatment == "Control") | |
# istatic <- which(newd$Treatment == "StaticStatic") | |
# | |
# | |
# #Predictions with credible intervals | |
# # A list of formulas to evaluate sample by sample | |
# # the function then takes quantiles over these outcomes | |
# # use names if you want the output dataframes and | |
# # columns for CIs to be named. | |
# | |
# forms <- c(effects = ~exp(x), | |
# control_mult = ~ exp(x - x[icontrol]), | |
# static_mult ~ exp(x - x[istatic]), | |
# prob_control = ~ x<x[icontrol], | |
# prob_static = ~ x<x[istatic]) | |
# | |
# gout <- simulate_gam_CIs(m3.1, | |
# newdata = newd, | |
# forms = forms, | |
# random_var = "Block", | |
# offset = mean(dat$ln_Day0_density), | |
# probs = c(0.025, 0.5, 0.975), | |
# nsims = 1000) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment