Last active
January 27, 2018 12:09
-
-
Save ledell/9f0a7c7403396cd2dee5 to your computer and use it in GitHub Desktop.
h2oEnsemble R package demo (from h2o.ensemble docs)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A small-data example of binary classification on a local machine using H2O Ensemble | |
library(h2oEnsemble) | |
localH2O <- h2o.init(nthreads = -1) # Start an H2O cluster with nthreads = num cores on your machine | |
# Import a sample binary outcome train/test set into R | |
train <- read.table("http://www.stat.berkeley.edu/~ledell/data/higgs_10k.csv", sep=",") | |
test <- read.table("http://www.stat.berkeley.edu/~ledell/data/higgs_test_5k.csv", sep=",") | |
# Convert R data.frames into H2O parsed data objects | |
training_frame <- as.h2o(train) | |
validation_frame <- as.h2o(test) | |
y <- "V1" | |
x <- setdiff(names(training_frame), y) | |
family <- "binomial" | |
training_frame[,c(y)] <- as.factor(training_frame[,c(y)]) #Force Binary classification | |
validation_frame[,c(y)] <- as.factor(validation_frame[,c(y)]) | |
# Specify the base learner library & the metalearner | |
learner <- c("h2o.glm.wrapper", "h2o.randomForest.wrapper", | |
"h2o.gbm.wrapper", "h2o.deeplearning.wrapper") | |
metalearner <- "h2o.deeplearning.wrapper" | |
# Train the ensemble using 5-fold CV to generate level-one data | |
# More CV folds will take longer to train, but should increase performance | |
fit <- h2o.ensemble(x = x, y = y, | |
training_frame = training_frame, | |
family = family, | |
learner = learner, | |
metalearner = metalearner, | |
cvControl = list(V = 5, shuffle = TRUE)) | |
# Generate predictions on the test set | |
pred <- predict.h2o.ensemble(fit, validation_frame) | |
predictions <- as.data.frame(pred$pred)[,c("p1")] #p1 is P(Y==1) | |
labels <- as.data.frame(validation_frame[,c(y)])[,1] | |
# Ensemble test AUC | |
library(cvAUC) #Install from CRAN | |
cvAUC::AUC(predictions = predictions , labels = labels) | |
# 0.7888723 | |
# Base learner test AUC (for comparison) | |
L <- length(learner) | |
auc <- sapply(seq(L), function(l) AUC(predictions = as.data.frame(pred$basepred)[,l], labels = labels)) | |
data.frame(learner, auc) | |
# learner auc | |
#1 h2o.glm.wrapper 0.6871288 | |
#2 h2o.randomForest.wrapper 0.7711654 | |
#3 h2o.gbm.wrapper 0.7817075 | |
#4 h2o.deeplearning.wrapper 0.7425813 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment