mikekeith52’s gists

mikekeith52 / covid_heatmap.py

Created April 13, 2021 00:17

	sns.set(rc={'figure.figsize':(10,16)})
	sns.heatmap(data_state_piv[['total_vaccinations_per_hundred']].sort_values('total_vaccinations_per_hundred',ascending=False),cmap="YlGnBu", annot=True, fmt='.1%', robust=True)
	plt.title('Vaccines Administered by State as Pct of Eligible Population',size=14)
	plt.show()

mikekeith52 / prepare_covid_manufacturer_data.py

Last active April 13, 2021 00:28

	data_manufac_us = data_manufac.loc[data_manufac['location'] == 'United States']
	data_mpiv = pd.pivot_table(data_manufac_us,
	index=['date'],
	columns=['vaccine'],
	values='total_vaccinations',
	aggfunc=np.sum,
	fill_value=0)

	data_mpiv['Total'] = data_mpiv[data_mpiv.columns.to_list()].sum(axis=1)
	vax = data_mpiv.columns.to_list()

mikekeith52 / create_bar_line_chart_covid.py

Created April 13, 2021 00:19

	sns.set(rc={'figure.figsize':(14,8)})
	_, ax1 = plt.subplots()

	data_mpiv.plot(kind='bar',y=[c+' Daily Admin' for c in vax if c != 'Total'],stacked=True,label=['J&J','Moderna','Pfizer'],color=['orange','green','purple'],ax=ax1)
	data_mpiv.reset_index().plot(kind='line',y='Total 7-day Avg',color='pink',label='Total 7 Day Average',ax=ax1)

	plt.fill_between(data_mpiv.reset_index().index, data_mpiv['Total 7-day Avg'], 0,
	facecolor="orange", # The fill color
	color='blue', # The outline color
	alpha=0.2) # Transparency of the fill

mikekeith52 / Grids.py

Last active July 2, 2021 18:51

	arima = {
	'order':[(2,1,0),(0,1,2),(1,1,1)],
	'seasonal_order':[(0,0,0,0),(0,1,1,12)],
	'trend':['n','c','t','ct']
	}

	elasticnet = {
	'alpha':[i/10 for i in range(1,101)],
	'l1_ratio':[0,0.25,0.5,0.75,1],
	'normalizer':['scale','minmax',None]

mikekeith52 / housing_forecast_loop.py

Created July 2, 2021 15:51

	f.set_validation_length(6)
	# automatically tune and forecast with a series of models
	models = ('mlr','knn','svr','xgboost','gbt','elasticnet','mlp','prophet')
	for m in models:
	f.set_estimator(m)
	f.tune() # by default, will pull grids from Grids.py
	f.auto_forecast()

mikekeith52 / avocado_forecast_loop.py

Last active July 10, 2021 02:30

	models = ('mlr','knn','svr','xgboost','gbt','elasticnet','mlp','prophet','silverkite')
	for k, f in log_progress(avc.items()):
	for m in models:
	f.set_estimator(m)
	f.tune() # by default, will pull the grid with the same name as the estimator (mlr will pull the mlr grid, etc.)
	f.auto_forecast()

	# combine models and run manually specified models of other varieties
	f.set_estimator('combo')
	f.manual_forecast(how='weighted',models=models,determine_best_by='ValidationMetricValue',call_me='weighted')

mikekeith52 / combo_forecasting.py

Created July 2, 2021 22:41

	f.set_estimator('combo')
	f.manual_forecast(how='simple',models='top_3',determine_best_by='ValidationMetricValue',call_me='avg')
	f.manual_forecast(how='weighted',models=models,determine_best_by='ValidationMetricValue',call_me='weighted')

mikekeith52 / load_avocados.py

Created July 8, 2021 02:42

	avc = {}
	for reg in data.region.unique():
	for typ in data.type.unique():
	data_slice = data.loc[(data['region'] == reg) & (data['type'] == typ)]
	load_dates = pd.date_range(start=data_slice['Date'].min(),end=data_slice['Date'].max(),freq='W') # for missing weeks, assume 0
	data_load = pd.DataFrame({'Date':load_dates})
	data_load['Vol'] = data_load.merge(data_slice,how='left',on='Date')['Total Volume'].values
	data_load.fillna(0,inplace=True)
	f = Forecaster(y=data_load['Vol'],current_dates=data_load['Date'],type=typ,region=reg)
	avc[f"{reg}-{typ}"] = f

mikekeith52 / avocado_preprocess.py

Last active July 8, 2021 16:12

	for k, f in avc.items():
	f.generate_future_dates(52)
	f.set_test_length(26)
	f.set_validation_length(13)
	f.add_ar_terms(3)
	f.add_AR_terms((1,26))
	if not f.adf_test(): # returns True if it thinks it's stationary, False otherwise
	f.diff()
	f.add_seasonal_regressors('week','month','quarter',raw=False,sincos=True)
	f.add_seasonal_regressors('year')

mikekeith52 / avocado_results_write.py

Created July 8, 2021 15:36

	forecast_info = pd.DataFrame()
	for k, f in avc.items():
	df = f.export(dfs='model_summaries',determine_best_by='LevelTestSetMAPE')
	df['Name'] = k
	df['Region'] = f.region
	df['Type'] = f.type
	forecast_info = pd.concat([forecast_info,df],ignore_index=True)

	forecast_info.to_csv('avocado_model_summaries.csv',index=False)

Michael Keith mikekeith52