Created
October 1, 2017 21:31
-
-
Save saivarunk/904a27664b9bbf5c90a513afd00c8721 to your computer and use it in GitHub Desktop.
H2O Auto ML Starter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table) | |
library(h2o) | |
# Load train and properties data | |
properties <- fread("../input/properties_2016.csv", header=TRUE, stringsAsFactors=FALSE, colClasses = list(character = 50)) | |
train <- fread("../input/train_2016_v2.csv") | |
training <- merge(properties, train, by="parcelid",all.y=TRUE) | |
# Initialise h20 | |
h2o.init(nthreads = -1, max_mem_size = "8g") | |
# Mark predictor and response variables | |
x <- names(training)[which(names(training)!="logerror")] | |
y <- "logerror" | |
# Import data into H2O | |
train <- as.h2o(training) | |
test <- as.h2o(properties) | |
# Fit H2O AutoML Mode; | |
aml <- h2o.automl(x = x, y = y, | |
training_frame = train, | |
max_runtime_secs = 1800, stopping_metric='MAE') | |
# Store the H2O AutoML Leaderboard | |
lb <- aml@leaderboard | |
lb | |
# Use Best Model in the leaderboard | |
aml@leader | |
# Generate Predictions using the leader Model | |
pred <- h2o.predict(aml, test) | |
predictions <- round(as.vector(pred), 4) | |
# Prepare predictions for submission file | |
result <- data.frame(cbind(properties$parcelid, predictions, predictions, | |
predictions, predictions, predictions, | |
predictions)) | |
colnames(result)<-c("parcelid","201610","201611","201612","201710","201711","201712") | |
options(scipen = 999) | |
# Wite results to submission file | |
write.csv(result, file = "submission_xgb_ensemble.csv", row.names = FALSE ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment