Last active
June 3, 2017 04:56
-
-
Save ledell/c0501c3a7449291d3d11bcec5d0c89d7 to your computer and use it in GitHub Desktop.
Demo of how to use the SigOpt API with H2O in R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Set API Key | |
Sys.setenv(SIGOPT_API_TOKEN="HERE") | |
# Start a local H2O cluster for training models | |
library(h2o) | |
h2o.init(nthreads = -1) | |
# Load a dataset | |
data(iris) | |
y <- "Species" | |
x <- 1:4 | |
train <- as.h2o(iris) | |
# Create a SigOpt experiment for the Random Forest parameters | |
library(SigOptR) | |
experiment <- create_experiment(list( | |
name="Random Forest (h2o)", | |
parameters=list( | |
list(name="mtries", type="int", bounds=list(min=1, max=ncol(iris)-1)), | |
list(name="ntrees", type="int", bounds=list(min=1, max=100)), | |
list(name="sample_rate", type="double", bounds=list(min=0.25, max=1.0)), | |
list(name="min_rows", type="int", bounds=list(min=1, max=10)) | |
) | |
)) | |
print(paste("Created experiment: https://sigopt.com/experiment", experiment$id, sep="/")) | |
evaluate_model <- function(assignments, training_frame, x, y) { | |
# Train and cross-validate a RF | |
rf_fit <- h2o.randomForest(x = x, | |
y = y, | |
training_frame = train, | |
nfolds = 5, | |
ntrees = assignments$ntrees, | |
mtries = assignments$mtries, | |
sample_rate = assignments$sample_rate, | |
min_rows = assignments$min_rows) | |
# Return cv accuracy | |
return(1-h2o.mean_per_class_error(rf_fit, xval = TRUE)) | |
} | |
for (i in 1:80) { | |
# Receive a Suggestion from SigOpt | |
suggestion <- create_suggestion(experiment$id) | |
# Evaluate the model locally | |
res <- evaluate_model(suggestion$assignments, train, x, y) | |
# Report an Observation (with standard deviation) back to SigOpt | |
create_observation(experiment$id, list(suggestion=suggestion$id, | |
value=res)) | |
} | |
# Re-fetch the experiment to get the best observed value and assignments | |
experiment <- fetch_experiment(experiment$id) | |
best_assignments <- experiment$progress$best_observation$assignments | |
# To wrap up the Experiment, fit the Random Forest on the best assigments | |
# and train on all available data | |
rf <- h2o.randomForest(x = x, | |
y = y, | |
training_frame = train, | |
nfolds = 5, | |
ntrees = best_assignments$ntrees, | |
mtries = best_assignments$mtries, | |
sample_rate = best_assignments$sample_rate, | |
min_rows = best_assignments$min_rows) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment