Skip to content

Instantly share code, notes, and snippets.

View mikekeith52's full-sized avatar

Michael Keith mikekeith52

  • Salt Lake City, UT
View GitHub Profile
sns.set(rc={'figure.figsize':(10,16)})
sns.heatmap(data_state_piv[['total_vaccinations_per_hundred']].sort_values('total_vaccinations_per_hundred',ascending=False),cmap="YlGnBu", annot=True, fmt='.1%', robust=True)
plt.title('Vaccines Administered by State as Pct of Eligible Population',size=14)
plt.show()
data_manufac_us = data_manufac.loc[data_manufac['location'] == 'United States']
data_mpiv = pd.pivot_table(data_manufac_us,
index=['date'],
columns=['vaccine'],
values='total_vaccinations',
aggfunc=np.sum,
fill_value=0)
data_mpiv['Total'] = data_mpiv[data_mpiv.columns.to_list()].sum(axis=1)
vax = data_mpiv.columns.to_list()
sns.set(rc={'figure.figsize':(14,8)})
_, ax1 = plt.subplots()
data_mpiv.plot(kind='bar',y=[c+' Daily Admin' for c in vax if c != 'Total'],stacked=True,label=['J&J','Moderna','Pfizer'],color=['orange','green','purple'],ax=ax1)
data_mpiv.reset_index().plot(kind='line',y='Total 7-day Avg',color='pink',label='Total 7 Day Average',ax=ax1)
plt.fill_between(data_mpiv.reset_index().index, data_mpiv['Total 7-day Avg'], 0,
facecolor="orange", # The fill color
color='blue', # The outline color
alpha=0.2) # Transparency of the fill
arima = {
'order':[(2,1,0),(0,1,2),(1,1,1)],
'seasonal_order':[(0,0,0,0),(0,1,1,12)],
'trend':['n','c','t','ct']
}
elasticnet = {
'alpha':[i/10 for i in range(1,101)],
'l1_ratio':[0,0.25,0.5,0.75,1],
'normalizer':['scale','minmax',None]
f.set_validation_length(6)
# automatically tune and forecast with a series of models
models = ('mlr','knn','svr','xgboost','gbt','elasticnet','mlp','prophet')
for m in models:
f.set_estimator(m)
f.tune() # by default, will pull grids from Grids.py
f.auto_forecast()
models = ('mlr','knn','svr','xgboost','gbt','elasticnet','mlp','prophet','silverkite')
for k, f in log_progress(avc.items()):
for m in models:
f.set_estimator(m)
f.tune() # by default, will pull the grid with the same name as the estimator (mlr will pull the mlr grid, etc.)
f.auto_forecast()
# combine models and run manually specified models of other varieties
f.set_estimator('combo')
f.manual_forecast(how='weighted',models=models,determine_best_by='ValidationMetricValue',call_me='weighted')
f.set_estimator('combo')
f.manual_forecast(how='simple',models='top_3',determine_best_by='ValidationMetricValue',call_me='avg')
f.manual_forecast(how='weighted',models=models,determine_best_by='ValidationMetricValue',call_me='weighted')
avc = {}
for reg in data.region.unique():
for typ in data.type.unique():
data_slice = data.loc[(data['region'] == reg) & (data['type'] == typ)]
load_dates = pd.date_range(start=data_slice['Date'].min(),end=data_slice['Date'].max(),freq='W') # for missing weeks, assume 0
data_load = pd.DataFrame({'Date':load_dates})
data_load['Vol'] = data_load.merge(data_slice,how='left',on='Date')['Total Volume'].values
data_load.fillna(0,inplace=True)
f = Forecaster(y=data_load['Vol'],current_dates=data_load['Date'],type=typ,region=reg)
avc[f"{reg}-{typ}"] = f
for k, f in avc.items():
f.generate_future_dates(52)
f.set_test_length(26)
f.set_validation_length(13)
f.add_ar_terms(3)
f.add_AR_terms((1,26))
if not f.adf_test(): # returns True if it thinks it's stationary, False otherwise
f.diff()
f.add_seasonal_regressors('week','month','quarter',raw=False,sincos=True)
f.add_seasonal_regressors('year')
forecast_info = pd.DataFrame()
for k, f in avc.items():
df = f.export(dfs='model_summaries',determine_best_by='LevelTestSetMAPE')
df['Name'] = k
df['Region'] = f.region
df['Type'] = f.type
forecast_info = pd.concat([forecast_info,df],ignore_index=True)
forecast_info.to_csv('avocado_model_summaries.csv',index=False)