Created
May 21, 2014 14:12
-
-
Save djhocking/fee60fabe16e86e46bd7 to your computer and use it in GitHub Desktop.
Bootstrap mixed effects logistic regression predictions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Function for getting bootstrapped glmer predictions in parallel | |
glmmBoot <- function(dat, form, R, nc){ | |
# dat = data for glmer (lme4) logistic regression | |
# form = formula of glmer equation for fitting | |
# R = total number of bootstrap draws - should be multiple of nc b/c divided among cores evenly | |
# nc = number of cores to use in parallel | |
library(parallel) | |
cl <- makeCluster(nc) # Request # cores | |
clusterExport(cl, c("dat", "form", "nc", "R"), envir = environment()) # Make these available to each core | |
clusterSetRNGStream(cl = cl, 1259) | |
out <- clusterEvalQ(cl, { | |
library(lme4) | |
library(boot) # used for the inv.logit function | |
b.star <- NULL | |
pres.star <- NULL | |
pred.star <- NULL | |
n <- length(dat$pres) | |
for(draw in 1:(R/nc)){ # within each core draw R/nc samples | |
df.star <- dat[sample(1:n, size=n, replace=T), ] # bootstrap data | |
mod <- glmer(form, family = binomial(link = "logit"), data = df.star, control = glmerControl(optimizer="bobyqa")) # regression on each bootstrapped data set | |
b.star <- rbind(b.star, coef(mod)) # combine coefficients from each draw (not currently used) | |
pres.star <- rbind(pres.star, df.star$pres) # combine "observed" presence-absences from each bootstrapped data iteration | |
pred.star <- rbind(pred.star, inv.logit(predict(mod, df.star, allow.new.levels = TRUE))) # # combine predicted presence probability from each draw | |
} | |
# make into lists formatted for the ROCR package | |
lab <- list(pres.star) | |
for(i in 1:dim(pres.star)[1]){ | |
lab[[i]] <- as.integer(pres.star[i, ]) | |
} | |
pred <- list(pred.star) | |
for(i in 1:dim(pred.star)[1]){ | |
pred[[i]] <- as.numeric(pred.star[i,]) | |
} | |
pred.all <- list(pred, lab) | |
names(pred.all) <- c("predictions", "observed") | |
return(pred.all) | |
}) # end cluster call | |
stopCluster(cl) | |
assign("out", out, envir = .GlobalEnv) # allow access outside function to help debugging | |
# combine lists from each core to format for ROCR | |
lab1 <- out[[1]]$observed | |
for(i in 2:length(out)){ | |
foo <- out[[i]]$observed | |
lab1 <- c(lab1, foo) | |
} | |
pred1 <- out[[1]]$predictions | |
for(i in 2:length(out)){ | |
foo <- out[[i]]$predictions | |
pred1 <- c(pred1, foo) | |
} | |
pred.mod <- list(pred1, lab1) | |
names(pred.mod) <- c("predictions", "observed") | |
return(pred.mod) | |
} # end function | |
# Run function for model glmm.M35 | |
system.time(pred.M35 <- glmmBoot(dat=df.fit, form=formula(glmm.M35), R=9, nc=3)) # takes ~2 min with 9 draws on 3 cores | |
library(ROCR) | |
p35 <- prediction(pred.M35$predictions, pred.M35$observed) | |
perf35 <- performance(p35, "tpr", "fpr") | |
par(mfrow = c(1,1)) | |
plot(perf35, lty=3, col="blue") | |
abline(0, 1) | |
plot(perf35, avg="vertical", lwd=2, col="black", spread.estimate="stderror", plotCI.lwd=2, add=TRUE) | |
# function to summarize AUC across all bootstrap draws | |
aucSummary <- function(pred.list, Mean=T, CI=c(0.025, 0.975), SE=F){ | |
library(AUC) | |
auc.out <- NULL | |
for(i in 1:length(pred.list$predictions)) { | |
auc.out[i] <- auc(roc(as.numeric(pred.list$predictions[[i]]), as.factor(pred.list$observed[[i]]))) | |
} | |
auc.CI <- quantile(auc.out, probs=CI) | |
auc.mean <- NA | |
auc.se <- NA | |
if(Mean == T){ | |
auc.mean <- mean(auc.out) | |
} | |
if(SE == T) { | |
auc.se <- sd(auc.out)/sqrt(length(auc.out)) | |
} | |
auc.summary <- data.frame(auc.mean, auc.CI[1], auc.CI[2], auc.se) | |
names(auc.summary) <- c('Mean', 'LCI', 'UCI', 'SE') | |
row.names(auc.summary) <- deparse(substitute(pred.list)) | |
return(auc.summary) | |
} # end function | |
auc35 <- aucSummary(pred.M35, SE=T) | |
pred.fit <- inv.logit(predict(glmm.M35, df.fit, allow.new.levels = TRUE)) | |
auc(roc(pred.fit, as.factor(df.fit$pres))) # 0.875 - why so diff than auc35?????????????????????????????? | |
# alternative ways to get it - still different | |
auc(roc(as.numeric(pred.list$predictions[[1]]), as.factor(pred.list$observed[[1]]))) | |
mean(unlist(performance(p35, "auc")@y.values)) |
AFerrero82
commented
Mar 22, 2016
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment