Last active
March 13, 2022 08:14
-
-
Save jiahao87/0a3883d2ac5cbe12249e6d73505918df to your computer and use it in GitHub Desktop.
Hierarchical Forecasting Reconciliation using OLS Method
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import hts # To install: pip install scikit-hts | |
import collections | |
from scipy.optimize import lsq_linear | |
hts_df = pd.DataFrame([{'total': 14, | |
'CA': 5.4, 'TX': 1.8, 'WI': 5.9, | |
'CA_1': 0.8, 'CA_2': 0.6, 'CA_3': 0.9, 'CA_4': 0.3, | |
'TX_1': 0.03, 'TX_2': 0.5, 'TX_3': 0.5, | |
'WI_1': 1.6, 'WI_2': 1.2, 'WI_3': 1.5 | |
}]) | |
print(hts_df) | |
states = ['CA', 'TX', 'WI'] | |
stores = ['CA_1', 'CA_2', 'CA_3', 'CA_4', 'TX_1', 'TX_2', 'TX_3', 'WI_1', 'WI_2', 'WI_3'] | |
# build the hierarchy tree as a dictionary | |
total = {'total': list(states)} | |
state_h = {k: [v for v in stores if v.startswith(k)] for k in states} | |
hierarchy = {**total, **state_h} | |
tree = hts.hierarchy.HierarchyTree.from_nodes(nodes=hierarchy, df=hts_df) | |
sum_mat, sum_mat_labels = hts.functions.to_sum_mat(tree) | |
pred_dict = collections.OrderedDict() | |
for label in sum_mat_labels: | |
pred_dict[label] = pd.DataFrame(data=hts_df[label].values, columns=['yhat']) | |
# perform forecast reconciliation | |
revised = hts.functions.optimal_combination(pred_dict, sum_mat, method='OLS', mse={}) | |
revised_forecasts = pd.DataFrame(data=revised, | |
index=hts_df.index, | |
columns=sum_mat_labels) | |
print(revised_forecasts) | |
###################################################### | |
####### For non-negative reconciled forecasts ####### | |
###################################################### | |
hat_mat = hts.functions.y_hat_matrix(pred_dict) | |
revised_nnls = np.dot(sum_mat, lsq_linear(sum_mat, hat_mat.flatten(), bounds=(0, np.inf))['x']) | |
revised_forecasts_nnls = pd.DataFrame(data=revised_nnls.reshape(1,-1), | |
index=hts_df.index, | |
columns=sum_mat_labels) | |
print(revised_forecasts_nnls) | |
# Note: In this toy example, the non-negative reconciled forecasts are the same | |
# as the normal reconciled forecasts as the unconstrained least-squares solution | |
# already fulfills the bounded constraints. | |
# | |
# To see an example where the non-negative least squares helped to prevent | |
# negative forecasts, you can try with the following figures: | |
# hts_df = pd.DataFrame([{'total': 2, | |
# 'CA': 1.4, 'TX': 1.8, 'WI': 1.9, | |
# 'CA_1': 0.8, 'CA_2': 0.6, 'CA_3': 0.9, 'CA_4': 0.3, | |
# 'TX_1': 0.03, 'TX_2': 0.5, 'TX_3': 0.5, | |
# 'WI_1': 1.6, 'WI_2': 1.2, 'WI_3': 1.5 | |
# }]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment