Created
July 11, 2022 22:44
-
-
Save rtkilian/284a37bf11bd96c53504e7c4cd1ed8df to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sktime.forecasting.model_selection import temporal_train_test_split, SingleWindowSplitter, ForecastingRandomizedSearchCV | |
from sktime.forecasting.base import ForecastingHorizon | |
from sktime.forecasting.compose import make_reduction | |
from sktime.utils.plotting import plot_series | |
from sktime.performance_metrics.forecasting import mean_absolute_percentage_error | |
from xgboost import XGBRegressor | |
# Create an exogenous dataframe indicating the month | |
X = pd.DataFrame({'month': y.index.month}, index=y.index) | |
X = pd.get_dummies(X.astype(str), drop_first=True) | |
# Split data | |
y_train, y_test = temporal_train_test_split(y, test_size=26) # Predict from 1st July 2019 | |
X_train, X_test = temporal_train_test_split(X, test_size=26) | |
# Forecasting horizon, same as test data | |
fh = ForecastingHorizon(y_test.index, is_relative=False) | |
# Window for cross-validation | |
validation_size = 26 | |
cv = SingleWindowSplitter(window_length=len(y)-validation_size, fh=validation_size) | |
# XGBoost forecaster with grid of parameters | |
param_grid = { | |
'estimator__max_depth': [3, 5, 6, 10, 15, 20], | |
'estimator__learning_rate': [0.01, 0.1, 0.2, 0.3], | |
'estimator__subsample': np.arange(0.5, 1.0, 0.1), | |
'estimator__colsample_bytree': np.arange(0.4, 1.0, 0.1), | |
'estimator__colsample_bylevel': np.arange(0.4, 1.0, 0.1), | |
'estimator__n_estimators': [100, 500, 1000] | |
} | |
regressor = XGBRegressor(objective='reg:squarederror', random_state=42) | |
forecaster = make_reduction(regressor, window_length=52, strategy="recursive") | |
# Randomised search | |
gscv = ForecastingRandomizedSearchCV(forecaster, cv=cv, param_distributions=param_grid, n_iter=100, random_state=42) | |
# Fit and predict | |
gscv.fit(y=y_train, X=X_train) | |
y_pred = gscv.predict(fh=fh, X=X_test) | |
# Plot predictions with training and test data | |
plot_series(y_train['2018-07-01':], y_test, y_pred, labels=["y_train", "y_test", "y_pred"], x_label='Date', y_label='Count pedestrians'); | |
# Evaluate | |
print('MAPE: %.4f' % mean_absolute_percentage_error(y_test, y_pred, symmetric=False)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment