Created
December 30, 2018 19:55
-
-
Save saivarunk/077f5832005a0e453b25eac6bc070d70 to your computer and use it in GitHub Desktop.
Exploring H2O.ai AutoML
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table) | |
library(h2o) | |
# Load train and properties data | |
properties <- fread("../input/properties_2016.csv", header=TRUE, stringsAsFactors=FALSE, colClasses = list(character = 50)) | |
train <- fread("../input/train_2016_v2.csv") | |
training <- merge(properties, train, by="parcelid",all.y=TRUE) | |
# Initialise h20 | |
h2o.init(nthreads = -1, max_mem_size = "8g") | |
# Mark predictor and response variables | |
x <- names(training)[which(names(training)!="logerror")] | |
y <- "logerror" | |
# Import data into H2O | |
train <- as.h2o(training) | |
test <- as.h2o(properties) | |
# Fit H2O AutoML Mode; | |
aml <- h2o.automl(x = x, y = y, | |
training_frame = train, | |
max_runtime_secs = 1800, stopping_metric='MAE') | |
# Store the H2O AutoML Leaderboard | |
lb <- aml@leaderboard | |
lb | |
# Use Best Model in the leaderboard | |
aml@leader | |
# Generate Predictions using the leader Model | |
pred <- h2o.predict(aml, test) | |
predictions <- round(as.vector(pred), 4) | |
# Prepare predictions for submission file | |
result <- data.frame(cbind(properties$parcelid, predictions, predictions, | |
predictions, predictions, predictions, | |
predictions)) | |
colnames(result)<-c("parcelid","201610","201611","201612","201710","201711","201712") | |
options(scipen = 999) | |
# Wite results to submission file | |
write.csv(result, file = "submission_xgb_ensemble.csv", row.names = FALSE ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment