Created
July 27, 2016 09:51
-
-
Save ashenkin/8b3a32ced4ee599ed1d76f65a8d3c192 to your computer and use it in GitHub Desktop.
How to predict results from lme4's glmer when fit with scaled data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# We often fit LMM/GLMM's with scaled variables. However, making predictions using those models isn't straightforward (at least to me!) | |
# It turns out that you have to re-scale your prediction data using the same parameters used to scale your original data frame used to fit the model | |
# See below, and pay special attention to the section where the new data are rescaled. | |
library(lme4) | |
library(VGAM) | |
reps = 3000 | |
dbh = rexp(reps); dbh = dbh/max(dbh) * 100 | |
census_len = runif(reps, min = 0.8, max = 1.2) | |
ann_surv_prob = .95 + .01 * dbh/max(dbh) | |
ann_mort_prob = 1 - ann_surv_prob | |
obs_surv_prob = 1 - ann_mort_prob^census_len | |
surv = rbinom(reps, 1, obs_surv_prob) | |
plot = factor(sample(LETTERS[1:5], reps, replace = T)) | |
mod_df = data.frame(plot = plot, surv = surv, dbh = dbh, census_len = census_len) | |
# Modeling with scaled parameters - you must rescale the newdata predictors using the center/scale of the predictor data | |
surv_mod_scale = glmer(surv ~ (1|plot) + scale(dbh) + offset(log(census_len)), data = mod_df, family = binomial(link = "cloglog")) | |
# predicted annual survival probability for a 10cm DBH tree | |
surv_prob_manual = cloglog(fixef(surv_mod_scale)[1] + 10 * fixef(surv_mod_scale)[2], inverse = T) | |
surv_prob_manual # this does not match. must rescale as below. | |
################################################################################# | |
# This is the key section. Make sure you rescale your new data frame properly! # | |
################################################################################# | |
dbh_center = attributes(surv_mod_scale@frame$"scale(dbh)")[["scaled:center"]] | |
dbh_scale = attributes(surv_mod_scale@frame$"scale(dbh)")[["scaled:scale"]] | |
surv_prob_manual_scale = cloglog(fixef(surv_mod_scale)[1] + scale(10, center = dbh_center, scale = dbh_scale) * fixef(surv_mod_scale)[2], inverse = T) | |
surv_prob_manual_scale # this matches the predict() output below ####### | |
surv_prob_predict_scale = predict(surv_mod_scale, newdata = data.frame(dbh = 10, census_len = 1), re.form = NA, type = "response") | |
surv_prob_predict_scale | |
# Modeling with unscaled parameters - manual & predict() match | |
surv_mod_noscale = glmer(surv ~ (1|plot) + dbh + offset(log(census_len)), data = mod_df, family = binomial(link = "cloglog")) | |
# predicted annual survival probability for a 10cm DBH tree | |
surv_prob_manual_noscale = cloglog(fixef(surv_mod_noscale)[1] + 10 * fixef(surv_mod_noscale)[2], inverse = T) | |
surv_prob_manual_noscale | |
surv_prob_predict_noscale = predict(surv_mod_noscale, newdata = data.frame(dbh = 10, census_len = 1), re.form = NA, type = "response") | |
surv_prob_predict_noscale |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Oh right, sorry, misread your issue. This does run the model with scaled predictors (see line 21). You just have to be careful how to rescale the data for predictions. I put this together quite a while ago, so please excuse any haziness on my part.