ledell · June 3, 2017 04:56
diff --git a/h2o_rf_sigopt_demo_iris.R b/h2o_rf_sigopt_demo_iris.R
 # Set API Key
 Sys.setenv(SIGOPT_API_TOKEN="HERE")

 # Start a local H2O cluster for training models
 library(h2o)
 h2o.init(nthreads = -1)

 # Load a dataset
 data(iris)
 y <- "Species"
 x <- 1:4
 train <- as.h2o(iris)


 # Create a SigOpt experiment for the Random Forest parameters
 library(SigOptR)
 experiment <- create_experiment(list(
  name="Random Forest (h2o)",
  parameters=list(
    list(name="mtries", type="int", bounds=list(min=1, max=ncol(iris)-1)),
    list(name="ntrees", type="int", bounds=list(min=1, max=100)),
    list(name="sample_rate", type="double", bounds=list(min=0.25, max=1.0)),
    list(name="min_rows", type="int", bounds=list(min=1, max=10))
  )
 ))
 print(paste("Created experiment: https://sigopt.com/experiment", experiment$id, sep="/"))


 evaluate_model <- function(assignments, training_frame, x, y) {
  # Train and cross-validate a RF
  rf_fit <- h2o.randomForest(x = x,
                             y = y,
                             training_frame = train,
                             nfolds = 5,
                             ntrees = assignments$ntrees,
                             mtries = assignments$mtries,
                             sample_rate = assignments$sample_rate,
                             min_rows = assignments$min_rows)
  # Return cv accuracy
  return(1-h2o.mean_per_class_error(rf_fit, xval = TRUE))
 }



 for (i in 1:80) {
  # Receive a Suggestion from SigOpt
  suggestion <- create_suggestion(experiment$id)
  
  # Evaluate the model locally
  res <- evaluate_model(suggestion$assignments, train, x, y)
  
  # Report an Observation (with standard deviation) back to SigOpt
  create_observation(experiment$id, list(suggestion=suggestion$id,
                                         value=res))
 }


 # Re-fetch the experiment to get the best observed value and assignments
 experiment <- fetch_experiment(experiment$id)
 best_assignments <- experiment$progress$best_observation$assignments





 # To wrap up the Experiment, fit the Random Forest on the best assigments
 # and train on all available data
 rf <- h2o.randomForest(x = x,
                       y = y,
                       training_frame = train,
                       nfolds = 5,
                       ntrees = best_assignments$ntrees,
                       mtries = best_assignments$mtries,
                       sample_rate = best_assignments$sample_rate,
                       min_rows = best_assignments$min_rows)
	# Set API Key
	Sys.setenv(SIGOPT_API_TOKEN="HERE")

	# Start a local H2O cluster for training models
	library(h2o)
	h2o.init(nthreads = -1)

	# Load a dataset
	data(iris)
	y <- "Species"
	x <- 1:4
	train <- as.h2o(iris)


	# Create a SigOpt experiment for the Random Forest parameters
	library(SigOptR)
	experiment <- create_experiment(list(
	name="Random Forest (h2o)",
	parameters=list(
	list(name="mtries", type="int", bounds=list(min=1, max=ncol(iris)-1)),
	list(name="ntrees", type="int", bounds=list(min=1, max=100)),
	list(name="sample_rate", type="double", bounds=list(min=0.25, max=1.0)),
	list(name="min_rows", type="int", bounds=list(min=1, max=10))
	)
	))
	print(paste("Created experiment: https://sigopt.com/experiment", experiment$id, sep="/"))


	evaluate_model <- function(assignments, training_frame, x, y) {
	# Train and cross-validate a RF
	rf_fit <- h2o.randomForest(x = x,
	y = y,
	training_frame = train,
	nfolds = 5,
	ntrees = assignments$ntrees,
	mtries = assignments$mtries,
	sample_rate = assignments$sample_rate,
	min_rows = assignments$min_rows)
	# Return cv accuracy
	return(1-h2o.mean_per_class_error(rf_fit, xval = TRUE))
	}



	for (i in 1:80) {
	# Receive a Suggestion from SigOpt
	suggestion <- create_suggestion(experiment$id)

	# Evaluate the model locally
	res <- evaluate_model(suggestion$assignments, train, x, y)

	# Report an Observation (with standard deviation) back to SigOpt
	create_observation(experiment$id, list(suggestion=suggestion$id,
	value=res))
	}


	# Re-fetch the experiment to get the best observed value and assignments
	experiment <- fetch_experiment(experiment$id)
	best_assignments <- experiment$progress$best_observation$assignments





	# To wrap up the Experiment, fit the Random Forest on the best assigments
	# and train on all available data
	rf <- h2o.randomForest(x = x,
	y = y,
	training_frame = train,
	nfolds = 5,
	ntrees = best_assignments$ntrees,
	mtries = best_assignments$mtries,
	sample_rate = best_assignments$sample_rate,
	min_rows = best_assignments$min_rows)