Skip to content

Instantly share code, notes, and snippets.

@geneorama
Created November 2, 2016 13:34
Show Gist options
  • Save geneorama/65b714ffde41ed0c9da57fb4c98ac22a to your computer and use it in GitHub Desktop.
Save geneorama/65b714ffde41ed0c9da57fb4c98ac22a to your computer and use it in GitHub Desktop.
Heap your criticisms here
grid_glmnet <- expand.grid(alpha = c(0, .2, .4, .6, .8, 1),
# lambda = seq(.01, .2, length = 40))
#lambda = rev(exp(seq(log(.00001), log(200), length=10)))
lambda = rev(exp(seq(log(.00001), log(20), length=20))))
results_glmnet <- list()
# g <- unique(dat$grp)[1]
# i <- 1
for(i in 1:nrow(grid_glmnet)){
print(i)
results_glmnet[[i]] <- list() ## Placeholder for ith parameter set
results_glmnet[[i]][["cv"]] <- list() ## Placeholder for CV results
results_glmnet[[i]][['param']] <- grid_glmnet[i,]
params <- results_glmnet[[i]][['param']]
for(g in unique(dat$grp)){
## Run model
xmat_train <- dat[!(grp == g), .SD, .SDcols=-c("grp","date","id", "wnv")]
ymat_train <- dat[!(grp == g), wnv]
m <- glmnet(x = as.matrix(xmat_train),
y = as.matrix(ymat_train),
family = "binomial",
alpha = params$alpha,
lambda = params$lambda)
xmat_test <- dat[(grp == g), .SD, .SDcols=-c("grp","date","id", "wnv")]
ymat_test <- dat[(grp == g), wnv]
yhat_test <- predict(m, as.matrix(xmat_test), type = "response")[,1]
## Store results
results_glmnet[[i]][["cv"]][[g]] <- list()
results_glmnet[[i]][["cv"]][[g]][["model"]] <- m
results_glmnet[[i]][["cv"]][[g]][["xmat_train"]] <- xmat_train
results_glmnet[[i]][["cv"]][[g]][["ymat_train"]] <- ymat_train
results_glmnet[[i]][["cv"]][[g]][["xmat_test"]] <- xmat_test
results_glmnet[[i]][["cv"]][[g]][["ymat_test"]] <- ymat_test
results_glmnet[[i]][["cv"]][[g]][["yhat_test"]] <- yhat_test
}
}
# rm(i, g, m, xmat_test, xmat_train, ymat_test, ymat_train)
lll()
## Example structure
# results_glmnet[[1]]$cv$year2008$model$a0
# str(results_glmnet, 2)
# sapply(results_glmnet, function(l) sapply(l$cv, function(x)x$model$lambda))
names(results_glmnet[[1]])
## Model metrics
a0_yearly <- t(sapply(results_glmnet, function(l)sapply(l$cv, function(x) unname(x$model$a0))))
nulldev_yearly <- t(sapply(results_glmnet, function(l)sapply(l$cv, function(x)x$model$nulldev)))
devratio_yearly <- t(sapply(results_glmnet, function(l)sapply(l$cv, function(x)x$model$dev.ratio)))
## Predictions and RMSE
preds <- lapply(results_glmnet, function(l) lapply(l$cv, function(x) x$yhat_test))
preds <- lapply(preds, unsplit, dat$grp)
preds <- do.call(cbind, preds)
ymat_test <- lapply(results_glmnet, function(l) lapply(l$cv, function(x) x$ymat_test))
ymat_test <- lapply(ymat_test, unsplit, dat$grp)
ymat_test <- do.call(cbind, ymat_test)
err <- (preds - ymat_test)
errsqrd <- (err) ^ 2
rmse_mean <- sqrt(apply(errsqrd, 2, mean))
rmse_yearly <- t(apply(errsqrd, 2, function(col)
sapply(split(col, dat$grp), function(e) sqrt(mean(e)))))
## AUC and ROC based on predictions
metrics <- lapply(results_glmnet, function(l) lapply(l$cv, function(x)
calculate_metrics(x$ymat_test, x$yhat_test)))
auc_yearly <- t(sapply(metrics, function(l) sapply(l, `[[`, "auc")))
roc_yearly <- t(sapply(metrics, function(l) sapply(l, `[[`, "roc")))
kappa_yearly <- t(sapply(metrics, function(l) sapply(l, `[[`, "kappa")))
## Overall results
a0_mean <- apply(a0_yearly, 1, mean)
nulldev_mean <- apply(nulldev_yearly, 1, mean)
devratio_mean <- apply(devratio_yearly, 1, mean)
auc_mean <- apply(auc_yearly, 1, mean)
roc_mean <- apply(roc_yearly, 1, mean)
kappa_mean <- apply(kappa_yearly, 1, mean)
result_summary <- data.table(grid_glmnet, a0_mean, nulldev_mean, devratio_mean,
auc_mean, roc_mean, rmse_mean)
result_summary
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment