Last active
March 10, 2023 06:38
-
-
Save tomonori-masui/0f6fc07571a24de27d771bd50521ca74 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sktime.forecasting.compose import make_reduction, TransformedTargetForecaster | |
from sktime.forecasting.model_selection import ExpandingWindowSplitter, ForecastingGridSearchCV | |
from sktime.performance_metrics.forecasting import MeanAbsolutePercentageError | |
import lightgbm as lgb | |
def create_forecaster(): | |
# creating forecaster with LightGBM | |
regressor = lgb.LGBMRegressor() | |
forecaster = make_reduction(regressor, window_length=5, strategy="recursive") | |
return forecaster | |
def grid_serch_forecaster(train, test, forecaster, param_grid): | |
# Grid search on window_length | |
cv = ExpandingWindowSplitter(initial_window=int(len(train) * 0.7)) | |
gscv = ForecastingGridSearchCV( | |
forecaster, strategy="refit", cv=cv, param_grid=param_grid, | |
scoring=MeanAbsolutePercentageError(symmetric=True) | |
) | |
gscv.fit(train) | |
print(f"best params: {gscv.best_params_}") | |
# forecasting | |
fh = np.arange(len(test)) + 1 | |
y_pred = gscv.predict(fh=fh) | |
mae, mape = plot_forecast(train, test, y_pred) | |
return mae, mape | |
param_grid = { | |
"window_length": [5, 10, 15, 20, 25, 30] # parameter set to be grid searched | |
} | |
forecaster = create_forecaster() | |
sun_lgb_mae, sun_lgb_mape = grid_serch_forecaster( | |
sun_train, sun_test, forecaster, param_grid | |
) |
@hiteshgupta2507 Your input series needs to be indexed with the right frequency. Possible frequencies can be seen in this link.
Thanks for your quick response. I have been following your online material but couldn't find anywhere freq being explicitly being fed into input series.
https://towardsdatascience.com/multi-step-time-series-forecasting-with-arima-lightgbm-and-prophet-cc9e3f95dfb0
I was looking at the code nile data set code.
The way I have my input data is where freq should be auto detected, right? df = pd.read_csv('uniques_pv_imps.csv', index_col = [0], parse_dates = True)
On Monday, 20 September, 2021, 08:54:24 pm GMT-4, Tomonori Masui ***@***.***> wrote:
@tomonori-masui commented on this gist.
@hiteshgupta2507 Your input series needs to be indexed with the right frequency. Possible frequencies can be seen in this link.
—
You are receiving this because you were mentioned.
Reply to this email directly, view it on GitHub, or unsubscribe.
Triage notifications on the go with GitHub Mobile for iOS or Android.
I have been following your online material but couldn't find anywhere freq being explicitly being fed into input series.
Only the WPI data have that conversion in that blog post.
I was looking at the code nile data set code.
Nile dataset is not indexed with datetime values. It just has numeric values of years, hence it does not require frequency on its index.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I tried this, however, I am receiving this error:
AttributeError Traceback (most recent call last)
in
3 cv = ExpandingWindowSplitter(initial_window=int(len(sun_train) * 0.8))
4 gscv = ForecastingGridSearchCV(forecaster, strategy="refit", cv=cv, param_grid=param_grid)
----> 5 gscv.fit(sun_train)
~/opt/anaconda3/lib/python3.8/site-packages/sktime/forecasting/base/_base.py in fit(self, y, X, fh)
178 #####################################################
179
--> 180 self._fit(y=y_inner, X=X_inner, fh=fh)
181
182 # this should happen last
~/opt/anaconda3/lib/python3.8/site-packages/sktime/forecasting/model_selection/_tune.py in _fit(self, y, X, fh, **fit_params)
294
295 # Run grid-search cross-validation.
--> 296 results = self._run_search(evaluate_candidates)
297
298 results = pd.DataFrame(results)
~/opt/anaconda3/lib/python3.8/site-packages/sktime/forecasting/model_selection/_tune.py in _run_search(self, evaluate_candidates)
471 """Search all candidates in param_grid."""
472 _check_param_grid(self.param_grid)
--> 473 return evaluate_candidates(ParameterGrid(self.param_grid))
474
475
~/opt/anaconda3/lib/python3.8/site-packages/sktime/forecasting/model_selection/_tune.py in evaluate_candidates(candidate_params)
280 )
281
--> 282 out = parallel(
283 delayed(_fit_and_score)(params) for params in candidate_params
284 )
~/opt/anaconda3/lib/python3.8/site-packages/joblib/parallel.py in call(self, iterable)
1046 # remaining jobs.
1047 self._iterating = False
-> 1048 if self.dispatch_one_batch(iterator):
1049 self._iterating = self._original_iterator is not None
1050
~/opt/anaconda3/lib/python3.8/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
864 return False
865 else:
--> 866 self._dispatch(tasks)
867 return True
868
~/opt/anaconda3/lib/python3.8/site-packages/joblib/parallel.py in _dispatch(self, batch)
782 with self._lock:
783 job_idx = len(self._jobs)
--> 784 job = self._backend.apply_async(batch, callback=cb)
785 # A job can complete so quickly than its callback is
786 # called before we get here, causing self._jobs to
~/opt/anaconda3/lib/python3.8/site-packages/joblib/_parallel_backends.py in apply_async(self, func, callback)
206 def apply_async(self, func, callback=None):
207 """Schedule a func to be run"""
--> 208 result = ImmediateResult(func)
209 if callback:
210 callback(result)
~/opt/anaconda3/lib/python3.8/site-packages/joblib/_parallel_backends.py in init(self, batch)
570 # Don't delay the application, to avoid keeping the input
571 # arguments in memory
--> 572 self.results = batch()
573
574 def get(self):
~/opt/anaconda3/lib/python3.8/site-packages/joblib/parallel.py in call(self)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264
~/opt/anaconda3/lib/python3.8/site-packages/joblib/parallel.py in (.0)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264
~/opt/anaconda3/lib/python3.8/site-packages/sktime/forecasting/model_selection/_tune.py in _fit_and_score(params)
245
246 # Evaluate.
--> 247 out = evaluate(
248 forecaster,
249 cv,
~/opt/anaconda3/lib/python3.8/site-packages/sktime/forecasting/model_evaluation/_functions.py in evaluate(forecaster, cv, y, X, strategy, scoring, fit_params, return_data)
103 # predict
104 start_pred = time.time()
--> 105 y_pred = forecaster.predict(fh, X=X_test)
106 pred_time = time.time() - start_pred
107
~/opt/anaconda3/lib/python3.8/site-packages/sktime/forecasting/base/_base.py in predict(self, fh, X, return_pred_int, alpha)
236 # alpha = check_alpha(alpha)
237
--> 238 y_pred = self._predict(
239 self.fh,
240 X=X_inner,
~/opt/anaconda3/lib/python3.8/site-packages/sktime/forecasting/base/_sktime.py in _predict(self, fh, X, return_pred_int, alpha)
75
76 # all values are out-of-sample
---> 77 if fh.is_all_out_of_sample(self.cutoff):
78 return self._predict_fixed_cutoff(
79 fh.to_out_of_sample(self.cutoff), **kwargs
~/opt/anaconda3/lib/python3.8/site-packages/sktime/forecasting/base/_fh.py in is_all_out_of_sample(self, cutoff)
426 cutoff.
427 """
--> 428 return sum(self._is_out_of_sample(cutoff)) == len(self)
429
430 def to_indexer(self, cutoff=None, from_cutoff=True):
~/opt/anaconda3/lib/python3.8/site-packages/sktime/forecasting/base/_fh.py in _is_out_of_sample(self, cutoff)
409 """Get index location of out-of-sample values."""
410 # return ~self._in_sample_idx(cutoff)
--> 411 return self.to_relative(cutoff).to_pandas() > 0
412
413 def is_all_out_of_sample(self, cutoff=None):
~/opt/anaconda3/lib/python3.8/site-packages/sktime/forecasting/base/_fh.py in to_relative(self, cutoff)
259 else:
260 absolute = self.to_pandas()
--> 261 _check_cutoff(cutoff, absolute)
262
263 if isinstance(absolute, pd.DatetimeIndex):
~/opt/anaconda3/lib/python3.8/site-packages/sktime/forecasting/base/_fh.py in _check_cutoff(cutoff, index)
485
486 if not hasattr(cutoff, "freqstr") or cutoff.freqstr is None:
--> 487 raise AttributeError(
488 "The
freq
attribute of the time index is required, "489 "but found: None. Please specify the
freq
argument "AttributeError: The
freq
attribute of the time index is required, but found: None. Please specify thefreq
argument when setting the time index.