Created
July 23, 2018 04:59
-
-
Save Orbifold/0265ee7735356435125901969e3987ed to your computer and use it in GitHub Desktop.
Time series forecasting with H2O.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| install.packages("timetk") | |
| install.packages("tidyquant") | |
| library(h2o) # Awesome ML Library | |
| library(timetk) # Toolkit for working with time series in R | |
| library(tidyquant) # Loads tidyverse, financial pkgs, used to get data | |
| beer_sales_tbl <- tq_get("S4248SM144NCEN", get = "economic.data", from = "2010-01-01", to = "2017-10-27") | |
| beer_sales_tbl %>% | |
| ggplot(aes(date, price)) + | |
| # Train Region | |
| annotate("text", x = ymd("2012-01-01"), y = 7000, | |
| color = palette_light()[[1]], label = "Train Region") + | |
| # Validation Region | |
| geom_rect(xmin = as.numeric(ymd("2016-01-01")), | |
| xmax = as.numeric(ymd("2016-12-31")), | |
| ymin = 0, ymax = Inf, alpha = 0.02, | |
| fill = palette_light()[[3]]) + | |
| annotate("text", x = ymd("2016-07-01"), y = 7000, | |
| color = palette_light()[[1]], label = "Validation\nRegion") + | |
| # Test Region | |
| geom_rect(xmin = as.numeric(ymd("2017-01-01")), | |
| xmax = as.numeric(ymd("2017-08-31")), | |
| ymin = 0, ymax = Inf, alpha = 0.02, | |
| fill = palette_light()[[4]]) + | |
| annotate("text", x = ymd("2017-05-01"), y = 7000, | |
| color = palette_light()[[1]], label = "Test\nRegion") + | |
| # Data | |
| geom_line(col = palette_light()[1]) + | |
| geom_point(col = palette_light()[1]) + | |
| geom_ma(ma_fun = SMA, n = 12, size = 1) + | |
| # Aesthetics | |
| theme_tq() + | |
| scale_x_date(date_breaks = "1 year", date_labels = "%Y") + | |
| labs(title = "Beer Sales: 2007 through 2017", | |
| subtitle = "Train, Validation, and Test Sets Shown") | |
| beer_sales_tbl %>% glimpse() | |
| beer_sales_tbl_aug <- beer_sales_tbl %>% tk_augment_timeseries_signature() | |
| beer_sales_tbl_aug %>% glimpse() | |
| beer_sales_tbl_clean <- beer_sales_tbl_aug %>% | |
| select_if(~ !is.Date(.)) %>% | |
| select_if(~ !any(is.na(.))) %>% | |
| mutate_if(is.ordered, ~ as.character(.) %>% as.factor) | |
| beer_sales_tbl_clean %>% glimpse() | |
| train_tbl <- beer_sales_tbl_clean %>% filter(year < 2016) | |
| valid_tbl <- beer_sales_tbl_clean %>% filter(year == 2016) | |
| test_tbl <- beer_sales_tbl_clean %>% filter(year == 2017) | |
| h2o.init() | |
| h2o.no_progress() | |
| train_h2o <- as.h2o(train_tbl) | |
| valid_h2o <- as.h2o(valid_tbl) | |
| test_h2o <- as.h2o(test_tbl) | |
| y <- "price" | |
| x <- setdiff(names(train_h2o), y) | |
| automl_models_h2o <- h2o.automl( | |
| x = x, | |
| y = y, | |
| training_frame = train_h2o, | |
| validation_frame = valid_h2o, | |
| leaderboard_frame = test_h2o, | |
| max_runtime_secs = 60, | |
| stopping_metric = "deviance") | |
| automl_leader <- automl_models_h2o@leader | |
| pred_h2o <- h2o.predict(automl_leader, newdata = test_h2o) | |
| h2o.performance(automl_leader, newdata = test_h2o) | |
| error_tbl <- beer_sales_tbl %>% | |
| filter(lubridate::year(date) == 2017) %>% | |
| add_column(pred = pred_h2o %>% as.tibble() %>% pull(predict)) %>% | |
| rename(actual = price) %>% | |
| mutate( | |
| error = actual - pred, | |
| error_pct = error / actual | |
| ) | |
| error_tbl |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment