Skip to content

Instantly share code, notes, and snippets.

View mikekeith52's full-sized avatar

Michael Keith mikekeith52

  • Salt Lake City, UT
View GitHub Profile
import pandas as pd
import numpy as np
from scalecast.Forecaster import Forecaster
from pmdarima import auto_arima
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(rc={'figure.figsize':(14,7)})
df = pd.read_csv('AirPassengers.csv')
f = Forecaster(y=df['#Passengers'],current_dates=df['Month'])
def histogram_boxplot(data, feature, figsize=(12, 7), kde=False, bins='auto'):
"""
Boxplot and histogram combined
data: dataframe
feature: dataframe column
figsize: size of figure (default (12,7))
kde: whether to show the density curve (default False)
bins: number of bins for histogram (default 'auto')
"""k
# automatically tune and forecast with a series of models
models = ('knn','svr','lightgbm','mlp')
level_models = ('arima','hwes','prophet','silverkite')
for m in models:
f.set_estimator(m)
f.tune()
f.auto_forecast()
f.set_estimator('combo')
f.manual_forecast(how='simple',models=models,determine_best_by='ValidationMetricValue',call_me='avg_diff') # simple average of top_3 models based on performance in validation
# writes model summaries to a csv file
model_summaries = pd.DataFrame()
for k, f in preds.items():
df = f.export(dfs='model_summaries',determine_best_by='LevelTestSetMAPE')
df['Name'] = k
model_summaries = pd.concat([model_summaries,df],ignore_index=True)
model_summaries.to_csv('model_summaries.csv',index=False)
# loads sliced dataframe into forecaster objects stored in a dict
preds = {}
for z in zips:
data_load = df.loc[z].transpose()
f = Forecaster(y=data_load.values[8:],current_dates=data_load.index[8:],name=z)
preds[str(z)] = f
# full forecast process
for k, f in log_progress(preds.items()):
print(k)
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm as log_progress
from ipywidgets import widgets
from IPython.display import display, clear_output
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
from scalecast import GridGenerator
from scalecast.Forecaster import Forecaster
def results_vis(f_dict,plot_type='forecast',order_by='LevelTestSetMAPE',level=True):
""" visualize the forecast results
leverages Jupyter widgets
"""
def display_user_selections(ts_selection,mo_selection):
matplotlib.use('nbAgg')
%matplotlib inline
sns.set(rc={'figure.figsize':(16,8)})
selected_data = f_dict[ts_selection]
if plot_type == 'forecast':
forecast_info = pd.DataFrame()
for k, f in avc.items():
df = f.export(dfs='model_summaries',determine_best_by='LevelTestSetMAPE')
df['Name'] = k
df['Region'] = f.region
df['Type'] = f.type
forecast_info = pd.concat([forecast_info,df],ignore_index=True)
forecast_info.to_csv('avocado_model_summaries.csv',index=False)
for k, f in avc.items():
f.generate_future_dates(52)
f.set_test_length(26)
f.set_validation_length(13)
f.add_ar_terms(3)
f.add_AR_terms((1,26))
if not f.adf_test(): # returns True if it thinks it's stationary, False otherwise
f.diff()
f.add_seasonal_regressors('week','month','quarter',raw=False,sincos=True)
f.add_seasonal_regressors('year')
avc = {}
for reg in data.region.unique():
for typ in data.type.unique():
data_slice = data.loc[(data['region'] == reg) & (data['type'] == typ)]
load_dates = pd.date_range(start=data_slice['Date'].min(),end=data_slice['Date'].max(),freq='W') # for missing weeks, assume 0
data_load = pd.DataFrame({'Date':load_dates})
data_load['Vol'] = data_load.merge(data_slice,how='left',on='Date')['Total Volume'].values
data_load.fillna(0,inplace=True)
f = Forecaster(y=data_load['Vol'],current_dates=data_load['Date'],type=typ,region=reg)
avc[f"{reg}-{typ}"] = f