This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from scalecast.Forecaster import Forecaster | |
from pmdarima import auto_arima | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
sns.set(rc={'figure.figsize':(14,7)}) | |
df = pd.read_csv('AirPassengers.csv') | |
f = Forecaster(y=df['#Passengers'],current_dates=df['Month']) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def histogram_boxplot(data, feature, figsize=(12, 7), kde=False, bins='auto'): | |
""" | |
Boxplot and histogram combined | |
data: dataframe | |
feature: dataframe column | |
figsize: size of figure (default (12,7)) | |
kde: whether to show the density curve (default False) | |
bins: number of bins for histogram (default 'auto') | |
"""k |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# automatically tune and forecast with a series of models | |
models = ('knn','svr','lightgbm','mlp') | |
level_models = ('arima','hwes','prophet','silverkite') | |
for m in models: | |
f.set_estimator(m) | |
f.tune() | |
f.auto_forecast() | |
f.set_estimator('combo') | |
f.manual_forecast(how='simple',models=models,determine_best_by='ValidationMetricValue',call_me='avg_diff') # simple average of top_3 models based on performance in validation |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# writes model summaries to a csv file | |
model_summaries = pd.DataFrame() | |
for k, f in preds.items(): | |
df = f.export(dfs='model_summaries',determine_best_by='LevelTestSetMAPE') | |
df['Name'] = k | |
model_summaries = pd.concat([model_summaries,df],ignore_index=True) | |
model_summaries.to_csv('model_summaries.csv',index=False) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# loads sliced dataframe into forecaster objects stored in a dict | |
preds = {} | |
for z in zips: | |
data_load = df.loc[z].transpose() | |
f = Forecaster(y=data_load.values[8:],current_dates=data_load.index[8:],name=z) | |
preds[str(z)] = f | |
# full forecast process | |
for k, f in log_progress(preds.items()): | |
print(k) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from tqdm.notebook import tqdm as log_progress | |
from ipywidgets import widgets | |
from IPython.display import display, clear_output | |
import seaborn as sns | |
import matplotlib | |
import matplotlib.pyplot as plt | |
from scalecast import GridGenerator | |
from scalecast.Forecaster import Forecaster |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def results_vis(f_dict,plot_type='forecast',order_by='LevelTestSetMAPE',level=True): | |
""" visualize the forecast results | |
leverages Jupyter widgets | |
""" | |
def display_user_selections(ts_selection,mo_selection): | |
matplotlib.use('nbAgg') | |
%matplotlib inline | |
sns.set(rc={'figure.figsize':(16,8)}) | |
selected_data = f_dict[ts_selection] | |
if plot_type == 'forecast': |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
forecast_info = pd.DataFrame() | |
for k, f in avc.items(): | |
df = f.export(dfs='model_summaries',determine_best_by='LevelTestSetMAPE') | |
df['Name'] = k | |
df['Region'] = f.region | |
df['Type'] = f.type | |
forecast_info = pd.concat([forecast_info,df],ignore_index=True) | |
forecast_info.to_csv('avocado_model_summaries.csv',index=False) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for k, f in avc.items(): | |
f.generate_future_dates(52) | |
f.set_test_length(26) | |
f.set_validation_length(13) | |
f.add_ar_terms(3) | |
f.add_AR_terms((1,26)) | |
if not f.adf_test(): # returns True if it thinks it's stationary, False otherwise | |
f.diff() | |
f.add_seasonal_regressors('week','month','quarter',raw=False,sincos=True) | |
f.add_seasonal_regressors('year') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
avc = {} | |
for reg in data.region.unique(): | |
for typ in data.type.unique(): | |
data_slice = data.loc[(data['region'] == reg) & (data['type'] == typ)] | |
load_dates = pd.date_range(start=data_slice['Date'].min(),end=data_slice['Date'].max(),freq='W') # for missing weeks, assume 0 | |
data_load = pd.DataFrame({'Date':load_dates}) | |
data_load['Vol'] = data_load.merge(data_slice,how='left',on='Date')['Total Volume'].values | |
data_load.fillna(0,inplace=True) | |
f = Forecaster(y=data_load['Vol'],current_dates=data_load['Date'],type=typ,region=reg) | |
avc[f"{reg}-{typ}"] = f |