-
-
Save lejarx/31570bd6cb3354612606d671cfef45d6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(RemixAutoML) | |
library(data.table) | |
########################################### | |
# Prepare data for AutoTS()---- | |
########################################### | |
# Load Walmart Data from Dropbox---- | |
data <- data.table::fread("https://www.dropbox.com/s/2str3ek4f4cheqi/walmart_train.csv?dl=1") | |
# Subset for Stores / Departments with Full Series Available: (143 time points each)---- | |
data <- data[, Counts := .N, by = c("Store","Dept")][Counts == 143][, Counts := NULL] | |
# Subset Columns (remove IsHoliday column)---- | |
keep <- c("Store","Dept","Date","Weekly_Sales") | |
data <- data[, ..keep] | |
# Group Concatenation---- | |
data[, GroupVar := do.call(paste, c(.SD, sep = " ")), .SDcols = c("Store","Dept")] | |
data[, c("Store","Dept") := NULL] | |
# Grab Unique List of GroupVar---- | |
StoreDept <- unique(data[["GroupVar"]]) | |
########################################### | |
# AutoTS() Builds---- | |
########################################### | |
for(z in c(1,5,10,20,30)) { | |
TimerList <- list() | |
OutputList <- list() | |
l <- 0 | |
for(i in StoreDept) { | |
l <- l + 1 | |
temp <- data[GroupVar == eval(i)] | |
temp[, GroupVar := NULL] | |
TimerList[[i]] <- system.time( | |
OutputList[[i]] <- tryCatch({ | |
RemixAutoML::AutoTS( | |
temp, | |
TargetName = "Weekly_Sales", | |
DateName = "Date", | |
FCPeriods = 1, | |
HoldOutPeriods = z, | |
EvaluationMetric = "MAPE", | |
TimeUnit = "week", | |
Lags = 25, | |
SLags = 1, | |
NumCores = 4, | |
SkipModels = NULL, | |
StepWise = TRUE, | |
TSClean = TRUE, | |
ModelFreq = TRUE, | |
PrintUpdates = FALSE)}, | |
error = function(x) "Error in AutoTS run")) | |
print(l) | |
} | |
# Save Results When Done and Pull Them in After AutoCatBoostCARMA() Run---- | |
save(TimerList, file = paste0(getwd(),"/TimerList_FC_",z,"_.R")) | |
save(OutputList, file = paste0(getwd(),"/OutputList_FC_",z,".R")) | |
rm(OutputList, TimerList) | |
} | |
########################################### | |
# Prepare data for AutoCatBoostCARMA()---- | |
########################################### | |
# Load Walmart Data from Dropbox---- | |
data <- data.table::fread("https://www.dropbox.com/s/2str3ek4f4cheqi/walmart_train.csv?dl=1") | |
# Subset for Stores / Departments With Full Series (143 time points each)---- | |
data <- data[, Counts := .N, by = c("Store","Dept")][Counts == 143][, Counts := NULL] | |
# Subset Columns (remove IsHoliday column)---- | |
keep <- c("Store","Dept","Date","Weekly_Sales") | |
data <- data[, ..keep] | |
# Build AutoCatBoostCARMA Models---- | |
for(z in c(1,5,10,20,30)) { | |
CatBoostResults <- RemixAutoML::AutoCatBoostCARMA( | |
data, | |
TargetColumnName = "Weekly_Sales", | |
DateColumnName = "Date", | |
GroupVariables = c("Store","Dept"), | |
FC_Periods = 10, | |
TimeUnit = "week", | |
TargetTransformation = TRUE, | |
Lags = c(1:25,51,52,53), | |
MA_Periods = c(1:25,51,52,53), | |
CalendarVariables = TRUE, | |
TimeTrendVariable = TRUE, | |
HolidayVariable = TRUE, | |
DataTruncate = FALSE, | |
SplitRatios = c(1 - 60/143, 30/143, 30/143), | |
TaskType = "GPU", | |
EvalMetric = "RMSE", | |
GridTune = FALSE, | |
GridEvalMetric = "r2", | |
ModelCount = 2, | |
NTrees = 1500, | |
PartitionType = "timeseries", | |
Timer = TRUE) | |
# Output---- | |
CatBoostResults$TimeSeriesPlot | |
CatBoost_Results <- CatBoostResults$ModelInformation$EvaluationMetricsByGroup | |
data.table::fwrite(CatBoost_Results, paste0(getwd(),"/CatBoost_Results_",30,".csv")) | |
rm(CatBoost_Results,CatBoostResults) | |
} | |
########################################### | |
# Prepare data for AutoXGBoostCARMA()---- | |
########################################### | |
# Load Walmart Data from Dropbox---- | |
data <- data.table::fread("https://www.dropbox.com/s/2str3ek4f4cheqi/walmart_train.csv?dl=1") | |
# Subset for Stores / Departments With Full Series (143 time points each)---- | |
data <- data[, Counts := .N, by = c("Store","Dept")][Counts == 143][, Counts := NULL] | |
# Subset Columns (remove IsHoliday column)---- | |
keep <- c("Store","Dept","Date","Weekly_Sales") | |
data <- data[, ..keep] | |
for(z in c(1,5,10,20,30)) { | |
XGBoostResults <- RemixAutoML::AutoXGBoostCARMA( | |
data, | |
TargetColumnName = "Weekly_Sales", | |
DateColumnName = "Date", | |
GroupVariables = c("Store","Dept"), | |
FC_Periods = 2, | |
TimeUnit = "week", | |
TargetTransformation = TRUE, | |
Lags = c(1:25, 51, 52, 53), | |
MA_Periods = c(1:25, 51, 52, 53), | |
CalendarVariables = TRUE, | |
HolidayVariable = TRUE, | |
TimeTrendVariable = TRUE, | |
DataTruncate = FALSE, | |
SplitRatios = c(1 - (30+z)/143, 30/143, z/143), | |
TreeMethod = "hist", | |
EvalMetric = "MAE", | |
GridTune = FALSE, | |
GridEvalMetric = "mae", | |
ModelCount = 1, | |
NTrees = 5000, | |
PartitionType = "timeseries", | |
Timer = TRUE) | |
XGBoostResults$TimeSeriesPlot | |
XGBoost_Results <- XGBoostResults$ModelInformation$EvaluationMetricsByGroup | |
data.table::fwrite(XGBoost_Results, paste0(getwd(),"/XGBoost_Results",z,".csv")) | |
rm(XGBoost_Results) | |
} | |
########################################### | |
# Prepare data for AutoH2oDRFCARMA()---- | |
########################################### | |
# Load Walmart Data from Dropbox---- | |
data <- data.table::fread("https://www.dropbox.com/s/2str3ek4f4cheqi/walmart_train.csv?dl=1") | |
# Subset for Stores / Departments With Full Series (143 time points each)---- | |
data <- data[, Counts := .N, by = c("Store","Dept")][Counts == 143][, Counts := NULL] | |
# Subset Columns (remove IsHoliday column)---- | |
keep <- c("Store","Dept","Date","Weekly_Sales") | |
data <- data[, ..keep] | |
for(z in c(1,5,10,20,30)) { | |
H2oDRFResults <- AutoH2oDRFCARMA( | |
data, | |
TargetColumnName = "Weekly_Sales", | |
DateColumnName = "Date", | |
GroupVariables = c("Store","Dept"), | |
FC_Periods = 2, | |
TimeUnit = "week", | |
TargetTransformation = TRUE, | |
Lags = c(1:5, 51,52,53), | |
MA_Periods = c(1:5, 51,52,53), | |
CalendarVariables = TRUE, | |
HolidayVariable = TRUE, | |
TimeTrendVariable = TRUE, | |
DataTruncate = FALSE, | |
SplitRatios = c(1 - (30+z)/143, 30/143, z/143), | |
EvalMetric = "MAE", | |
GridTune = FALSE, | |
ModelCount = 1, | |
NTrees = 2000, | |
PartitionType = "timeseries", | |
MaxMem = "28G", | |
NThreads = 8, | |
Timer = TRUE) | |
# Plot aggregate sales forecast (Stores and Departments rolled up into Total)---- | |
H2oDRFResults$TimeSeriesPlot | |
H2oDRF_Results <- H2oDRFResults$ModelInformation$EvaluationMetricsByGroup | |
data.table::fwrite(H2oDRF_Results, paste0(getwd(),"/H2oDRF_Results",z,".csv")) | |
rm(H2oDRF_Results) | |
} | |
########################################### | |
# Prepare data for AutoH2OGBMCARMA()---- | |
########################################### | |
# Load Walmart Data from Dropbox---- | |
data <- data.table::fread("https://www.dropbox.com/s/2str3ek4f4cheqi/walmart_train.csv?dl=1") | |
# Subset for Stores / Departments With Full Series (143 time points each)---- | |
data <- data[, Counts := .N, by = c("Store","Dept")][Counts == 143][, Counts := NULL] | |
# Subset Columns (remove IsHoliday column)---- | |
keep <- c("Store","Dept","Date","Weekly_Sales") | |
data <- data[, ..keep] | |
for(z in c(1,5,10,20,30)) { | |
H2oGBMResults <- AutoH2oGBMCARMA( | |
data, | |
TargetColumnName = "Weekly_Sales", | |
DateColumnName = "Date", | |
GroupVariables = c("Store","Dept"), | |
FC_Periods = 2, | |
TimeUnit = "week", | |
TargetTransformation = TRUE, | |
Lags = c(1:5, 51,52,53), | |
MA_Periods = c(1:5, 51,52,53), | |
CalendarVariables = TRUE, | |
HolidayVariable = TRUE, | |
TimeTrendVariable = TRUE, | |
DataTruncate = FALSE, | |
SplitRatios = c(1 - (30+z)/143, 30/143, z/143), | |
EvalMetric = "MAE", | |
GridTune = FALSE, | |
ModelCount = 1, | |
NTrees = 2000, | |
PartitionType = "timeseries", | |
MaxMem = "28G", | |
NThreads = 8, | |
Timer = TRUE) | |
# Plot aggregate sales forecast (Stores and Departments rolled up into Total)---- | |
H2oGBMResults$TimeSeriesPlot | |
H2oGBM_Results <- H2oGBMResults$ModelInformation$EvaluationMetricsByGroup | |
data.table::fwrite(H2oGBM_Results, paste0(getwd(),"/H2oGBM_Results",z,".csv")) | |
rm(H2oGBM_Results) | |
} | |
################################################## | |
# AutoTS() and AutoCatBoostCARMA() Comparison---- | |
################################################## | |
# Gather results---- | |
for(i in c(1,5,10,20,30)) { | |
load(paste0("C:/Users/aantico/Desktop/Work/Remix/RemixAutoML/TimerList_",i,"_.R")) | |
load(paste0("C:/Users/aantico/Desktop/Work/Remix/RemixAutoML/OutputList_",i,"_.R")) | |
# Assemble TS Data | |
TimeList <- names(TimerList) | |
results <- list() | |
for(j in 1:2660) { | |
results[[j]] <- cbind( | |
StoreDept = TimeList[j], | |
tryCatch({OutputList[[j]]$EvaluationMetrics[, .(ModelName,MAE)][ | |
, ModelName := gsub("_.*","",ModelName) | |
][ | |
, ID := 1:.N, by = "ModelName" | |
][ | |
ID == 1 | |
][ | |
, ID := NULL | |
]}, | |
error = function(x) return( | |
data.table::data.table( | |
ModelName = "NONE", | |
MAE = NA)))) | |
} | |
# AutoTS() Results---- | |
Results <- data.table::rbindlist(results) | |
# Remove ModelName == NONE | |
Results <- Results[ModelName != "NONE"] | |
# Average out values: one per store and dept so straight avg works---- | |
Results <- Results[, .(MAE = mean(MAE, na.rm = TRUE)), by = c("StoreDept","ModelName")] | |
# Group Concatenation---- | |
Results[, c("Store","Dept") := data.table::tstrsplit(StoreDept, " ")][, StoreDept := NULL] | |
data.table::setcolorder(Results, c(3,4,1,2)) | |
################################## | |
# Machine Learning Results---- | |
################################## | |
# Load up CatBoost Results---- | |
CatBoost_Results <- data.table::fread(paste0(getwd(),"/CatBoost_Results_",i,".csv")) | |
CatBoost_Results[, ':=' (MAPE_Metric = NULL, MSE_Metric = NULL, R2_Metric = NULL)] | |
data.table::setnames(CatBoost_Results, "MAE_Metric", "MAE") | |
CatBoost_Results[, ModelName := "CatBoost"] | |
data.table::setcolorder(CatBoost_Results, c(1,2,4,3)) | |
# Load up XGBoost Results---- | |
XGBoost_Results <- data.table::fread(paste0(getwd(),"/XGBoost_Results",i,".csv")) | |
XGBoost_Results[, ':=' (MAPE_Metric = NULL, MSE_Metric = NULL, R2_Metric = NULL)] | |
data.table::setnames(XGBoost_Results, "MAE_Metric", "MAE") | |
XGBoost_Results[, ModelName := "XGBoost"] | |
data.table::setcolorder(XGBoost_Results, c(1,2,4,3)) | |
# Load up H2oDRF Results---- | |
H2oDRF_Results <- data.table::fread(paste0(getwd(),"/H2oDRF_Results",i,".csv")) | |
H2oDRF_Results[, ':=' (MAPE_Metric = NULL, MSE_Metric = NULL, R2_Metric = NULL)] | |
data.table::setnames(H2oDRF_Results, "MAE_Metric", "MAE") | |
H2oDRF_Results[, ModelName := "H2oDRF"] | |
data.table::setcolorder(H2oDRF_Results, c(1,2,4,3)) | |
# Load up H2oGBM Results---- | |
H2oGBM_Results <- data.table::fread(paste0(getwd(),"/H2oGBM_Results",i,".csv")) | |
H2oGBM_Results[, ':=' (MAPE_Metric = NULL, MSE_Metric = NULL, R2_Metric = NULL)] | |
data.table::setnames(H2oGBM_Results, "MAE_Metric", "MAE") | |
H2oGBM_Results[, ModelName := "H2oGBM"] | |
data.table::setcolorder(H2oGBM_Results, c(1,2,4,3)) | |
################################## | |
# Combine Data---- | |
################################## | |
# Stack Files---- | |
ModelDataEval <- data.table::rbindlist( | |
list(Results, CatBoost_Results, XGBoost_Results, H2oGBM_Results, H2oDRF_Results)) | |
data.table::setorderv(ModelDataEval, cols = c("Store","Dept","MAE")) | |
# Add rank---- | |
ModelDataEval[, Rank := 1:.N, by = c("Store","Dept")] | |
# Get Frequencies---- | |
RankResults <- ModelDataEval[, .(Counts = .N), by = c("ModelName","Rank")] | |
data.table::setorderv(RankResults, c("Rank", "Counts"), order = c(1,-1)) | |
# Final table---- | |
FinalResultsTable <- data.table::dcast(RankResults, formula = ModelName ~ Rank, value.var = "Counts") | |
data.table::setorderv(FinalResultsTable, "1", -1, na.last = TRUE) | |
# Rename Columns---- | |
for(k in 2:ncol(FinalResultsTable)) { | |
data.table::setnames(FinalResultsTable, | |
old = names(FinalResultsTable)[k], | |
new = paste0("Rank_",names(FinalResultsTable)[k])) | |
} | |
print(i) | |
print(knitr::kable(FinalResultsTable)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment