Created
May 21, 2014 14:12
-
-
Save djhocking/fee60fabe16e86e46bd7 to your computer and use it in GitHub Desktop.
Bootstrap mixed effects logistic regression predictions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Function for getting bootstrapped glmer predictions in parallel | |
glmmBoot <- function(dat, form, R, nc){ | |
# dat = data for glmer (lme4) logistic regression | |
# form = formula of glmer equation for fitting | |
# R = total number of bootstrap draws - should be multiple of nc b/c divided among cores evenly | |
# nc = number of cores to use in parallel | |
library(parallel) | |
cl <- makeCluster(nc) # Request # cores | |
clusterExport(cl, c("dat", "form", "nc", "R"), envir = environment()) # Make these available to each core | |
clusterSetRNGStream(cl = cl, 1259) | |
out <- clusterEvalQ(cl, { | |
library(lme4) | |
library(boot) # used for the inv.logit function | |
b.star <- NULL | |
pres.star <- NULL | |
pred.star <- NULL | |
n <- length(dat$pres) | |
for(draw in 1:(R/nc)){ # within each core draw R/nc samples | |
df.star <- dat[sample(1:n, size=n, replace=T), ] # bootstrap data | |
mod <- glmer(form, family = binomial(link = "logit"), data = df.star, control = glmerControl(optimizer="bobyqa")) # regression on each bootstrapped data set | |
b.star <- rbind(b.star, coef(mod)) # combine coefficients from each draw (not currently used) | |
pres.star <- rbind(pres.star, df.star$pres) # combine "observed" presence-absences from each bootstrapped data iteration | |
pred.star <- rbind(pred.star, inv.logit(predict(mod, df.star, allow.new.levels = TRUE))) # # combine predicted presence probability from each draw | |
} | |
# make into lists formatted for the ROCR package | |
lab <- list(pres.star) | |
for(i in 1:dim(pres.star)[1]){ | |
lab[[i]] <- as.integer(pres.star[i, ]) | |
} | |
pred <- list(pred.star) | |
for(i in 1:dim(pred.star)[1]){ | |
pred[[i]] <- as.numeric(pred.star[i,]) | |
} | |
pred.all <- list(pred, lab) | |
names(pred.all) <- c("predictions", "observed") | |
return(pred.all) | |
}) # end cluster call | |
stopCluster(cl) | |
assign("out", out, envir = .GlobalEnv) # allow access outside function to help debugging | |
# combine lists from each core to format for ROCR | |
lab1 <- out[[1]]$observed | |
for(i in 2:length(out)){ | |
foo <- out[[i]]$observed | |
lab1 <- c(lab1, foo) | |
} | |
pred1 <- out[[1]]$predictions | |
for(i in 2:length(out)){ | |
foo <- out[[i]]$predictions | |
pred1 <- c(pred1, foo) | |
} | |
pred.mod <- list(pred1, lab1) | |
names(pred.mod) <- c("predictions", "observed") | |
return(pred.mod) | |
} # end function | |
# Run function for model glmm.M35 | |
system.time(pred.M35 <- glmmBoot(dat=df.fit, form=formula(glmm.M35), R=9, nc=3)) # takes ~2 min with 9 draws on 3 cores | |
library(ROCR) | |
p35 <- prediction(pred.M35$predictions, pred.M35$observed) | |
perf35 <- performance(p35, "tpr", "fpr") | |
par(mfrow = c(1,1)) | |
plot(perf35, lty=3, col="blue") | |
abline(0, 1) | |
plot(perf35, avg="vertical", lwd=2, col="black", spread.estimate="stderror", plotCI.lwd=2, add=TRUE) | |
# function to summarize AUC across all bootstrap draws | |
aucSummary <- function(pred.list, Mean=T, CI=c(0.025, 0.975), SE=F){ | |
library(AUC) | |
auc.out <- NULL | |
for(i in 1:length(pred.list$predictions)) { | |
auc.out[i] <- auc(roc(as.numeric(pred.list$predictions[[i]]), as.factor(pred.list$observed[[i]]))) | |
} | |
auc.CI <- quantile(auc.out, probs=CI) | |
auc.mean <- NA | |
auc.se <- NA | |
if(Mean == T){ | |
auc.mean <- mean(auc.out) | |
} | |
if(SE == T) { | |
auc.se <- sd(auc.out)/sqrt(length(auc.out)) | |
} | |
auc.summary <- data.frame(auc.mean, auc.CI[1], auc.CI[2], auc.se) | |
names(auc.summary) <- c('Mean', 'LCI', 'UCI', 'SE') | |
row.names(auc.summary) <- deparse(substitute(pred.list)) | |
return(auc.summary) | |
} # end function | |
auc35 <- aucSummary(pred.M35, SE=T) | |
pred.fit <- inv.logit(predict(glmm.M35, df.fit, allow.new.levels = TRUE)) | |
auc(roc(pred.fit, as.factor(df.fit$pres))) # 0.875 - why so diff than auc35?????????????????????????????? | |
# alternative ways to get it - still different | |
auc(roc(as.numeric(pred.list$predictions[[1]]), as.factor(pred.list$observed[[1]]))) | |
mean(unlist(performance(p35, "auc")@y.values)) |
###########################
####bootMer-> boot AUC#####
###########################
library(lme4)
library(lattice)
data(cbpp)
#fit a model
cbpp$Y<-cbpp$incidence>=1
glmm<-glmer(Y~period + (1|herd), family=binomial, data=cbpp)
glmm
##### change "cbpp$Y" for the interested endpoint in the function
ROCFun <- function(fit) {
library(pROC)
pred<-predict(fit, type="response")
AUC<-as.numeric(auc(cbpp$Y, pred))
}
###run bootMer
system.time(AUC.boot <- bootMer(glmm,nsim=100,FUN=ROCFun,seed=101, use.u=TRUE,
type="parametric", parallel="multicore", ncpus=2))
AUC.boot
Call:
bootMer(x = glmm, FUN = ROCFun, nsim = 100, seed = 101, use.u = TRUE)
Bootstrap Statistics :
original bias std. error
t1* 0.7479947 -0.02104278 0.03489653
#Now it seems more reasonable, bias as "optimism"... but still do not know #if I am just doing a AUC with bootstrap CI
#test
(ROCFun(glmm))
#...
(boot.ci(AUC.boot, index =c(1,1), type="norm"))
roc(cbpp$Y, predict(glmm, type="response"))
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
system.time(pred.M35 <- glmmBoot(dat=newBD, RIPCV ~ Edat65 + COL + TAB_R + (1|TEMP) +(1|medalla), R=100, nc=2)) # takes ~2 min with 9 draws on 3 cores
Error durante el wrapup: 2 nodes produced errors; first error: Invalid grouping factor specification, TEMP
I don't know why is not working the random effects...