ashenkin · July 27, 2016 09:51 · ashenkin · Aug 2, 2023
diff --git a/predict_scaled_glmer.r b/predict_scaled_glmer.r
 # We often fit LMM/GLMM's with scaled variables.  However, making predictions using those models isn't straightforward (at least to me!)
 # It turns out that you have to re-scale your prediction data using the same parameters used to scale your original data frame used to fit the model
 # See below, and pay special attention to the section where the new data are rescaled.

 library(lme4)
 library(VGAM)

 reps = 3000

 dbh = rexp(reps); dbh = dbh/max(dbh) * 100
 census_len = runif(reps, min = 0.8, max = 1.2)
 ann_surv_prob = .95 + .01 * dbh/max(dbh)
 ann_mort_prob = 1 - ann_surv_prob
 obs_surv_prob = 1 - ann_mort_prob^census_len
 surv = rbinom(reps, 1, obs_surv_prob)
 plot = factor(sample(LETTERS[1:5], reps, replace = T))
 mod_df = data.frame(plot = plot, surv = surv, dbh = dbh, census_len = census_len)


 # Modeling with scaled parameters - you must rescale the newdata predictors using the center/scale of the predictor data
  surv_mod_scale = glmer(surv ~ (1|plot) + scale(dbh) + offset(log(census_len)), data = mod_df, family = binomial(link = "cloglog"))
  
  # predicted annual survival probability for a 10cm DBH tree
  surv_prob_manual = cloglog(fixef(surv_mod_scale)[1] + 10 * fixef(surv_mod_scale)[2], inverse = T)
  surv_prob_manual  # this does not match.  must rescale as below.
  
  #################################################################################
  # This is the key section.  Make sure you rescale your new data frame properly! #
  #################################################################################
  dbh_center = attributes(surv_mod_scale@frame$"scale(dbh)")[["scaled:center"]]
  dbh_scale = attributes(surv_mod_scale@frame$"scale(dbh)")[["scaled:scale"]]
  surv_prob_manual_scale = cloglog(fixef(surv_mod_scale)[1] + scale(10, center = dbh_center, scale = dbh_scale) * fixef(surv_mod_scale)[2], inverse = T)
  surv_prob_manual_scale  # this matches the predict() output below #######
  
  surv_prob_predict_scale = predict(surv_mod_scale, newdata = data.frame(dbh = 10, census_len = 1), re.form = NA, type = "response")
  surv_prob_predict_scale

  
 # Modeling with unscaled parameters - manual & predict() match
  surv_mod_noscale = glmer(surv ~ (1|plot) + dbh + offset(log(census_len)), data = mod_df, family = binomial(link = "cloglog"))
  
  # predicted annual survival probability for a 10cm DBH tree
  surv_prob_manual_noscale = cloglog(fixef(surv_mod_noscale)[1] + 10 * fixef(surv_mod_noscale)[2], inverse = T)
  surv_prob_manual_noscale
  
  surv_prob_predict_noscale = predict(surv_mod_noscale, newdata = data.frame(dbh = 10, census_len = 1), re.form = NA, type = "response")
  surv_prob_predict_noscale
	# We often fit LMM/GLMM's with scaled variables. However, making predictions using those models isn't straightforward (at least to me!)
	# It turns out that you have to re-scale your prediction data using the same parameters used to scale your original data frame used to fit the model
	# See below, and pay special attention to the section where the new data are rescaled.

	library(lme4)
	library(VGAM)

	reps = 3000

	dbh = rexp(reps); dbh = dbh/max(dbh) * 100
	census_len = runif(reps, min = 0.8, max = 1.2)
	ann_surv_prob = .95 + .01 * dbh/max(dbh)
	ann_mort_prob = 1 - ann_surv_prob
	obs_surv_prob = 1 - ann_mort_prob^census_len
	surv = rbinom(reps, 1, obs_surv_prob)
	plot = factor(sample(LETTERS[1:5], reps, replace = T))
	mod_df = data.frame(plot = plot, surv = surv, dbh = dbh, census_len = census_len)


	# Modeling with scaled parameters - you must rescale the newdata predictors using the center/scale of the predictor data
	surv_mod_scale = glmer(surv ~ (1\|plot) + scale(dbh) + offset(log(census_len)), data = mod_df, family = binomial(link = "cloglog"))

	# predicted annual survival probability for a 10cm DBH tree
	surv_prob_manual = cloglog(fixef(surv_mod_scale)[1] + 10 * fixef(surv_mod_scale)[2], inverse = T)
	surv_prob_manual # this does not match. must rescale as below.

	#################################################################################
	# This is the key section. Make sure you rescale your new data frame properly! #
	#################################################################################
	dbh_center = attributes(surv_mod_scale@frame$"scale(dbh)")[["scaled:center"]]
	dbh_scale = attributes(surv_mod_scale@frame$"scale(dbh)")[["scaled:scale"]]
	surv_prob_manual_scale = cloglog(fixef(surv_mod_scale)[1] + scale(10, center = dbh_center, scale = dbh_scale) * fixef(surv_mod_scale)[2], inverse = T)
	surv_prob_manual_scale # this matches the predict() output below #######

	surv_prob_predict_scale = predict(surv_mod_scale, newdata = data.frame(dbh = 10, census_len = 1), re.form = NA, type = "response")
	surv_prob_predict_scale


	# Modeling with unscaled parameters - manual & predict() match
	surv_mod_noscale = glmer(surv ~ (1\|plot) + dbh + offset(log(census_len)), data = mod_df, family = binomial(link = "cloglog"))

	# predicted annual survival probability for a 10cm DBH tree
	surv_prob_manual_noscale = cloglog(fixef(surv_mod_noscale)[1] + 10 * fixef(surv_mod_noscale)[2], inverse = T)
	surv_prob_manual_noscale

	surv_prob_predict_noscale = predict(surv_mod_noscale, newdata = data.frame(dbh = 10, census_len = 1), re.form = NA, type = "response")
	surv_prob_predict_noscale