Created
July 24, 2017 16:40
-
-
Save ledell/71e0b8861d4fa35b59dde2af282815a5 to your computer and use it in GitHub Desktop.
Demo of how to use grid search on H2O's XGBoost: http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/xgboost.html
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(h2o) | |
h2o.init() | |
# Load the HIGGS dataset | |
train <- h2o.importFile("https://s3.amazonaws.com/erin-data/higgs/higgs_train_10k.csv") | |
test <- h2o.importFile("https://s3.amazonaws.com/erin-data/higgs/higgs_test_5k.csv") | |
y <- "response" | |
x <- setdiff(names(train), y) | |
family <- "binomial" | |
#For binary classification, response should be a factor | |
train[,y] <- as.factor(train[,y]) | |
test[,y] <- as.factor(test[,y]) | |
# Some XGboost/GBM hyperparameters | |
hyper_params <- list(ntrees = seq(10, 1000, 1), | |
learn_rate = seq(0.0001, 0.2, 0.0001), | |
max_depth = seq(1, 20, 1), | |
sample_rate = seq(0.5, 1.0, 0.0001), | |
col_sample_rate = seq(0.2, 1.0, 0.0001)) | |
search_criteria <- list(strategy = "RandomDiscrete", | |
max_models = 10, | |
seed = 1) | |
# Train the grid | |
xgb_grid <- h2o.grid(algorithm = "xgboost", | |
x = x, y = y, | |
training_frame = train, | |
nfolds = 5, | |
seed = 1, | |
hyper_params = hyper_params, | |
search_criteria = search_criteria) | |
# Sort the grid by CV AUC | |
grid <- h2o.getGrid(grid_id = xgb_grid@grid_id, sort_by = "AUC", decreasing = TRUE) | |
grid_top_model <- grid@summary_table[1, "model_ids"] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks rknimmakayala,
thats's a little bit to much for me. I do it native in r via caret grid search. Works like a charme.