rtkilian · July 11, 2022 22:44
diff --git a/sktime_forecast_xgboost_2.py b/sktime_forecast_xgboost_2.py
 from sktime.forecasting.model_selection import temporal_train_test_split, SingleWindowSplitter, ForecastingRandomizedSearchCV
 from sktime.forecasting.base import ForecastingHorizon
 from sktime.forecasting.compose import make_reduction
 from sktime.utils.plotting import plot_series
 from sktime.performance_metrics.forecasting import mean_absolute_percentage_error

 from xgboost import XGBRegressor

 # Create an exogenous dataframe indicating the month
 X = pd.DataFrame({'month': y.index.month}, index=y.index)
 X = pd.get_dummies(X.astype(str), drop_first=True)

 # Split data
 y_train, y_test = temporal_train_test_split(y, test_size=26) # Predict from 1st July 2019
 X_train, X_test = temporal_train_test_split(X, test_size=26)

 # Forecasting horizon, same as test data
 fh = ForecastingHorizon(y_test.index, is_relative=False)

 # Window for cross-validation
 validation_size = 26
 cv = SingleWindowSplitter(window_length=len(y)-validation_size, fh=validation_size)

 # XGBoost forecaster with grid of parameters
 param_grid = {
    'estimator__max_depth': [3, 5, 6, 10, 15, 20],
    'estimator__learning_rate': [0.01, 0.1, 0.2, 0.3],
    'estimator__subsample': np.arange(0.5, 1.0, 0.1),
    'estimator__colsample_bytree': np.arange(0.4, 1.0, 0.1),
    'estimator__colsample_bylevel': np.arange(0.4, 1.0, 0.1),
    'estimator__n_estimators': [100, 500, 1000]
 }
 regressor = XGBRegressor(objective='reg:squarederror', random_state=42)
 forecaster = make_reduction(regressor, window_length=52, strategy="recursive")

 # Randomised search
 gscv = ForecastingRandomizedSearchCV(forecaster, cv=cv, param_distributions=param_grid, n_iter=100, random_state=42)

 # Fit and predict
 gscv.fit(y=y_train, X=X_train)
 y_pred = gscv.predict(fh=fh, X=X_test)

 # Plot predictions with training and test data
 plot_series(y_train['2018-07-01':], y_test, y_pred, labels=["y_train", "y_test", "y_pred"], x_label='Date', y_label='Count pedestrians');

 # Evaluate
 print('MAPE: %.4f' % mean_absolute_percentage_error(y_test, y_pred, symmetric=False))
	from sktime.forecasting.model_selection import temporal_train_test_split, SingleWindowSplitter, ForecastingRandomizedSearchCV
	from sktime.forecasting.base import ForecastingHorizon
	from sktime.forecasting.compose import make_reduction
	from sktime.utils.plotting import plot_series
	from sktime.performance_metrics.forecasting import mean_absolute_percentage_error

	from xgboost import XGBRegressor

	# Create an exogenous dataframe indicating the month
	X = pd.DataFrame({'month': y.index.month}, index=y.index)
	X = pd.get_dummies(X.astype(str), drop_first=True)

	# Split data
	y_train, y_test = temporal_train_test_split(y, test_size=26) # Predict from 1st July 2019
	X_train, X_test = temporal_train_test_split(X, test_size=26)

	# Forecasting horizon, same as test data
	fh = ForecastingHorizon(y_test.index, is_relative=False)

	# Window for cross-validation
	validation_size = 26
	cv = SingleWindowSplitter(window_length=len(y)-validation_size, fh=validation_size)

	# XGBoost forecaster with grid of parameters
	param_grid = {
	'estimator__max_depth': [3, 5, 6, 10, 15, 20],
	'estimator__learning_rate': [0.01, 0.1, 0.2, 0.3],
	'estimator__subsample': np.arange(0.5, 1.0, 0.1),
	'estimator__colsample_bytree': np.arange(0.4, 1.0, 0.1),
	'estimator__colsample_bylevel': np.arange(0.4, 1.0, 0.1),
	'estimator__n_estimators': [100, 500, 1000]
	}
	regressor = XGBRegressor(objective='reg:squarederror', random_state=42)
	forecaster = make_reduction(regressor, window_length=52, strategy="recursive")

	# Randomised search
	gscv = ForecastingRandomizedSearchCV(forecaster, cv=cv, param_distributions=param_grid, n_iter=100, random_state=42)

	# Fit and predict
	gscv.fit(y=y_train, X=X_train)
	y_pred = gscv.predict(fh=fh, X=X_test)

	# Plot predictions with training and test data
	plot_series(y_train['2018-07-01':], y_test, y_pred, labels=["y_train", "y_test", "y_pred"], x_label='Date', y_label='Count pedestrians');

	# Evaluate
	print('MAPE: %.4f' % mean_absolute_percentage_error(y_test, y_pred, symmetric=False))