Created
November 1, 2022 19:14
-
-
Save ivopbernardo/a2f444e19aab930c482946441a56f62d to your computer and use it in GitHub Desktop.
h2o R Example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load h2o | |
library(h2o) | |
library(ggplot2) | |
# Load Dataset - London Bike | |
london_bike <- read.csv('./london_merged.csv') | |
# Transforming Weather code and Season to factor | |
london_bike$weather_code <- as.factor(london_bike$weather_code) | |
london_bike$season <- as.factor(london_bike$season) | |
h2o.init() | |
# Convert london_bike to h2o frame | |
london_bike.h2o <- as.h2o(london_bike) | |
# Split london_bike into train_test split | |
london_bike_split <- h2o.splitFrame(data = london_bike.h2o, ratios = 0.8, seed = 1234) | |
training_data <- london_bike_split[[1]] | |
test_data <- london_bike_split[[2]] | |
# Training Linear regression | |
predictors <- c("t1", "t2", "hum", "wind_speed", "weather_code", "is_holiday", | |
"is_weekend", "season") | |
response <- "cnt" | |
# Model 1 | |
london_bike_model <- h2o.glm(x = predictors, | |
y = response, | |
training_frame = training_data) | |
test_predict <- h2o.predict(object = london_bike_model, | |
newdata = test_data) | |
predictions_x_real <- cbind( | |
as.data.frame(test_data$cnt), | |
as.data.frame(test_predict) | |
) | |
ggplot( | |
data = as.data.frame(predictions_x_real), | |
aes(x=cnt, y=predict) | |
) + geom_point(color='darkgreen') + xlab('Actual Label') + ylab('Predictions') | |
# Training Linear Regression using Regularization | |
london_bike_model_regularized <- h2o.glm(x = predictors, | |
y = response, | |
training_frame = training_data, | |
alpha = 1) | |
test_predict_regularized <- h2o.predict(object = london_bike_model_regularized, | |
newdata = test_data) | |
predictions_x_real_regularized <- cbind( | |
as.data.frame(test_data$cnt), | |
as.data.frame(test_predict_regularized) | |
) | |
ggplot( | |
data = as.data.frame(predictions_x_real_regularized), | |
aes(x=cnt, y=predict) | |
) + geom_point(color='darkgreen') + xlab('Actual Label') + ylab('Predictions') | |
# Evaluating models - Using validation_frame | |
london_bike_model <- h2o.glm(x = predictors, | |
y = response, | |
training_frame = training_data, | |
validation_frame = test_data) | |
# | |
h2o.rmse(london_bike_model, train=TRUE, valid=TRUE) | |
# Random Forest Example | |
london_bike_rf <- h2o.randomForest(x = predictors, | |
y = response, | |
ntrees = 25, | |
max_depth = 5, | |
training_frame = training_data, | |
validation_frame = test_data) | |
# Retrieving metrics for randomforest | |
h2o.rmse(london_bike_rf, train=TRUE, valid=TRUE) | |
h2o.r2(london_bike_rf, train=TRUE, valid=TRUE) | |
# Training Neural Network | |
nn_model <- h2o.deeplearning(x = predictors, | |
y = response, | |
hidden = c(6,6,4,7), | |
epochs = 1000, | |
train_samples_per_iteration = -1, | |
reproducible = TRUE, | |
activation = "Rectifier", | |
seed = 23123, | |
training_frame = training_data, | |
validation_frame = test_data) | |
# Neural Network Evaluation | |
h2o.rmse(nn_model, train=TRUE, valid=TRUE) | |
h2o.r2(nn_model, train=TRUE, valid=TRUE) | |
# Grid Search | |
rf_params <- list(ntrees = c(2, 5, 10, 15), | |
max_depth = c(3, 5, 9), | |
min_rows = c(5, 10, 100)) | |
# Train and validate a grid of randomForests | |
rf_grid <- h2o.grid("randomForest", | |
x = predictors, | |
y = response, | |
grid_id = "rf_grid", | |
training_frame = training_data, | |
validation_frame = test_data, | |
seed = 1, | |
hyper_params = rf_params) | |
h2o.getGrid(grid_id = "rf_grid", | |
sort_by = "r2", | |
decreasing = TRUE) | |
# Auto ML Routine | |
aml <- h2o.automl(x = predictors, | |
y = response, | |
training_frame = training_data, | |
validation_frame = test_data, | |
max_models = 15, | |
seed = 1) | |
# Explainability | |
london_bike_rf <- h2o.randomForest(x = predictors, | |
y = response, | |
ntrees = 25, | |
max_depth = 5, | |
training_frame = training_data, | |
validation_frame = test_data) | |
# Variable importance plot | |
h2o.varimp_plot(london_bike_rf) | |
# Shap Summary | |
h2o.shap_summary_plot(london_bike_rf, test_data) | |
# Shap Explain Row | |
h2o.shap_explain_row_plot(london_bike_rf, test_data, row_index = 4) | |
# Shap Explain Summer Row | |
h2o.shap_explain_row_plot(london_bike_rf, test_data, row_index = 830) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment