Skip to content

Instantly share code, notes, and snippets.

@tomonori-masui
Last active March 10, 2023 06:38
Show Gist options
  • Save tomonori-masui/0f6fc07571a24de27d771bd50521ca74 to your computer and use it in GitHub Desktop.
Save tomonori-masui/0f6fc07571a24de27d771bd50521ca74 to your computer and use it in GitHub Desktop.
from sktime.forecasting.compose import make_reduction, TransformedTargetForecaster
from sktime.forecasting.model_selection import ExpandingWindowSplitter, ForecastingGridSearchCV
from sktime.performance_metrics.forecasting import MeanAbsolutePercentageError
import lightgbm as lgb
def create_forecaster():
# creating forecaster with LightGBM
regressor = lgb.LGBMRegressor()
forecaster = make_reduction(regressor, window_length=5, strategy="recursive")
return forecaster
def grid_serch_forecaster(train, test, forecaster, param_grid):
# Grid search on window_length
cv = ExpandingWindowSplitter(initial_window=int(len(train) * 0.7))
gscv = ForecastingGridSearchCV(
forecaster, strategy="refit", cv=cv, param_grid=param_grid,
scoring=MeanAbsolutePercentageError(symmetric=True)
)
gscv.fit(train)
print(f"best params: {gscv.best_params_}")
# forecasting
fh = np.arange(len(test)) + 1
y_pred = gscv.predict(fh=fh)
mae, mape = plot_forecast(train, test, y_pred)
return mae, mape
param_grid = {
"window_length": [5, 10, 15, 20, 25, 30] # parameter set to be grid searched
}
forecaster = create_forecaster()
sun_lgb_mae, sun_lgb_mape = grid_serch_forecaster(
sun_train, sun_test, forecaster, param_grid
)
@hiteshgupta2507
Copy link

I tried this, however, I am receiving this error:


AttributeError Traceback (most recent call last)
in
3 cv = ExpandingWindowSplitter(initial_window=int(len(sun_train) * 0.8))
4 gscv = ForecastingGridSearchCV(forecaster, strategy="refit", cv=cv, param_grid=param_grid)
----> 5 gscv.fit(sun_train)

~/opt/anaconda3/lib/python3.8/site-packages/sktime/forecasting/base/_base.py in fit(self, y, X, fh)
178 #####################################################
179
--> 180 self._fit(y=y_inner, X=X_inner, fh=fh)
181
182 # this should happen last

~/opt/anaconda3/lib/python3.8/site-packages/sktime/forecasting/model_selection/_tune.py in _fit(self, y, X, fh, **fit_params)
294
295 # Run grid-search cross-validation.
--> 296 results = self._run_search(evaluate_candidates)
297
298 results = pd.DataFrame(results)

~/opt/anaconda3/lib/python3.8/site-packages/sktime/forecasting/model_selection/_tune.py in _run_search(self, evaluate_candidates)
471 """Search all candidates in param_grid."""
472 _check_param_grid(self.param_grid)
--> 473 return evaluate_candidates(ParameterGrid(self.param_grid))
474
475

~/opt/anaconda3/lib/python3.8/site-packages/sktime/forecasting/model_selection/_tune.py in evaluate_candidates(candidate_params)
280 )
281
--> 282 out = parallel(
283 delayed(_fit_and_score)(params) for params in candidate_params
284 )

~/opt/anaconda3/lib/python3.8/site-packages/joblib/parallel.py in call(self, iterable)
1046 # remaining jobs.
1047 self._iterating = False
-> 1048 if self.dispatch_one_batch(iterator):
1049 self._iterating = self._original_iterator is not None
1050

~/opt/anaconda3/lib/python3.8/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
864 return False
865 else:
--> 866 self._dispatch(tasks)
867 return True
868

~/opt/anaconda3/lib/python3.8/site-packages/joblib/parallel.py in _dispatch(self, batch)
782 with self._lock:
783 job_idx = len(self._jobs)
--> 784 job = self._backend.apply_async(batch, callback=cb)
785 # A job can complete so quickly than its callback is
786 # called before we get here, causing self._jobs to

~/opt/anaconda3/lib/python3.8/site-packages/joblib/_parallel_backends.py in apply_async(self, func, callback)
206 def apply_async(self, func, callback=None):
207 """Schedule a func to be run"""
--> 208 result = ImmediateResult(func)
209 if callback:
210 callback(result)

~/opt/anaconda3/lib/python3.8/site-packages/joblib/_parallel_backends.py in init(self, batch)
570 # Don't delay the application, to avoid keeping the input
571 # arguments in memory
--> 572 self.results = batch()
573
574 def get(self):

~/opt/anaconda3/lib/python3.8/site-packages/joblib/parallel.py in call(self)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264

~/opt/anaconda3/lib/python3.8/site-packages/joblib/parallel.py in (.0)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264

~/opt/anaconda3/lib/python3.8/site-packages/sktime/forecasting/model_selection/_tune.py in _fit_and_score(params)
245
246 # Evaluate.
--> 247 out = evaluate(
248 forecaster,
249 cv,

~/opt/anaconda3/lib/python3.8/site-packages/sktime/forecasting/model_evaluation/_functions.py in evaluate(forecaster, cv, y, X, strategy, scoring, fit_params, return_data)
103 # predict
104 start_pred = time.time()
--> 105 y_pred = forecaster.predict(fh, X=X_test)
106 pred_time = time.time() - start_pred
107

~/opt/anaconda3/lib/python3.8/site-packages/sktime/forecasting/base/_base.py in predict(self, fh, X, return_pred_int, alpha)
236 # alpha = check_alpha(alpha)
237
--> 238 y_pred = self._predict(
239 self.fh,
240 X=X_inner,

~/opt/anaconda3/lib/python3.8/site-packages/sktime/forecasting/base/_sktime.py in _predict(self, fh, X, return_pred_int, alpha)
75
76 # all values are out-of-sample
---> 77 if fh.is_all_out_of_sample(self.cutoff):
78 return self._predict_fixed_cutoff(
79 fh.to_out_of_sample(self.cutoff), **kwargs

~/opt/anaconda3/lib/python3.8/site-packages/sktime/forecasting/base/_fh.py in is_all_out_of_sample(self, cutoff)
426 cutoff.
427 """
--> 428 return sum(self._is_out_of_sample(cutoff)) == len(self)
429
430 def to_indexer(self, cutoff=None, from_cutoff=True):

~/opt/anaconda3/lib/python3.8/site-packages/sktime/forecasting/base/_fh.py in _is_out_of_sample(self, cutoff)
409 """Get index location of out-of-sample values."""
410 # return ~self._in_sample_idx(cutoff)
--> 411 return self.to_relative(cutoff).to_pandas() > 0
412
413 def is_all_out_of_sample(self, cutoff=None):

~/opt/anaconda3/lib/python3.8/site-packages/sktime/forecasting/base/_fh.py in to_relative(self, cutoff)
259 else:
260 absolute = self.to_pandas()
--> 261 _check_cutoff(cutoff, absolute)
262
263 if isinstance(absolute, pd.DatetimeIndex):

~/opt/anaconda3/lib/python3.8/site-packages/sktime/forecasting/base/_fh.py in _check_cutoff(cutoff, index)
485
486 if not hasattr(cutoff, "freqstr") or cutoff.freqstr is None:
--> 487 raise AttributeError(
488 "The freq attribute of the time index is required, "
489 "but found: None. Please specify the freq argument "

AttributeError: The freq attribute of the time index is required, but found: None. Please specify the freq argument when setting the time index.

@tomonori-masui
Copy link
Author

@hiteshgupta2507 Your input series needs to be indexed with the right frequency. Possible frequencies can be seen in this link.

@hiteshgupta2507
Copy link

hiteshgupta2507 commented Sep 21, 2021 via email

@tomonori-masui
Copy link
Author

@hiteshgupta2507

I have been following your online material but couldn't find anywhere freq being explicitly being fed into input series.

Only the WPI data have that conversion in that blog post.

I was looking at the code nile data set code.

Nile dataset is not indexed with datetime values. It just has numeric values of years, hence it does not require frequency on its index.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment