mikekeith52’s gists

mikekeith52 / dataframe_highlight.py

Created July 18, 2022 21:55

	df2 = df.copy()
	df2['year'] = df2.reset_index().DATE.dt.year
	df2['month'] = df2.reset_index().DATE.dt.month
	df2['pct_chg'] = df2['HOUSTNSA'].pct_change()

	avg_chg_month = df2.groupby('month')['pct_chg'].mean().reset_index()
	avg_chg_month['pct_chg_std'] = df2.groupby('month')['pct_chg'].std().values
	avg_chg_month['pct_chg_2016'] = df2.loc[df2['year'] == 2016,'pct_chg'].values
	avg_chg_month['pct_chg_2020'] = df2.loc[df2['year'] == 2020,'pct_chg'].values
	avg_chg_month['pct_chg_2021'] = df2.loc[df2['year'] == 2021,'pct_chg'].values

mikekeith52 / prepare_fcst.py

Last active June 16, 2022 13:43

	def prepare_fcst(f, test_length=0.1, fcst_length=120):
	""" adds all variables and sets the test length/forecast length in the object

	Args:
	f (Forecaster): the Forecaster object.
	test_length (int or float): the test length as a size or proportion.
	fcst_length (int): the forecast horizon.

	Returns:
	(Forecaster) the processed object.

mikekeith52 / most_dropped_vars.py

Created June 7, 2022 23:20

	pd.options.display.max_rows = None
	all_xvars = f.get_regressor_names()
	final_dropped = pd.DataFrame({"Var": all_xvars})
	for i, v in f.export("model_summaries").iterrows():
	model = v["ModelNickname"]
	Xvars = v["Xvars"]
	dropped_vars = [x for x in f.get_regressor_names() if x not in Xvars]
	if not dropped_vars:
	continue
	tmp_dropped = pd.DataFrame(

mikekeith52 / retrain_reduced_models.py

Created June 7, 2022 23:03

	selected_model = "gbt"
	hp = results[selected_model][3]
	f.set_estimator(selected_model)
	f.manual_forecast(**hp, Xvars="all", call_me=selected_model + "_all_vars")
	f.manual_forecast(
	**hp, Xvars=lasso_reduced_vars, call_me=selected_model + "_l1_reduced_vars"
	)
	f.manual_forecast(
	**hp, Xvars=mlr_reduced_vars, call_me=selected_model + "pfi-mlr_reduced_vars"
	)

mikekeith52 / adf.py

Created May 4, 2022 13:15

	critical_pval = 0.05
	print('-'*100)
	print('Augmented Dickey-Fuller results:')
	stat, pval, _, _, _, _ = f.adf_test(full_res=True)
	print('the test-stat value is: {:.2f}'.format(stat))
	print('the p-value is {:.4f}'.format(pval))
	print('the series is {}'.format('stationary' if pval < critical_pval else 'not stationary'))
	print('-'*100)

mikekeith52 / stacking_sklearn_scalecast.py

Last active May 4, 2022 14:04

	f.add_sklearn_estimator(StackingRegressor,'stacking')
	f.set_estimator('stacking')
	results = f.export('model_summaries')
	estimators = [
	('knn',
	KNeighborsRegressor(**results.loc[results['ModelNickname'] == 'knn','HyperParams'].values[0])),
	('xgboost',
	XGBRegressor(**results.loc[results['ModelNickname'] == 'xgboost','HyperParams'].values[0])),
	('lightgbm',
	LGBMRegressor(**results.loc[results['ModelNickname'] == 'lightgbm','HyperParams'].values[0])),

mikekeith52 / stacking_regressor_scalecast.py

Created March 29, 2022 20:23

	from sklearn.linear_model import LinearRegression
	from sklearn.linear_model import ElasticNet
	from sklearn.neural_network import MLPRegressor
	from sklearn.neighbors import KNeighborsRegressor
	from xgboost import XGBRegressor

	results = mvf.export_model_summaries()

	estimators = [
	("mlr", LinearRegression()),

mikekeith52 / scalecast_arima4.py

Created March 15, 2022 14:46

mikekeith52 / scalecast_arima3.py

Last active August 9, 2022 17:22

	from scalecast.auxmodels import auto_arima

	auto_arima(
	f,
	start_P=1,
	start_q=1,
	max_p=6,
	max_q=6,
	m=12,
	seasonal=True,

mikekeith52 / scalecast_arima2.py

Last active March 18, 2022 09:41

	# EDA
	f.plot_acf()
	plt.show()
	f.plot_pacf()
	plt.show()
	f.seasonal_decompose().plot()
	plt.show()
	stat, pval, _, _, _, _ = f.adf_test(full_res=True)
	print(stat)
	print(pval)

Michael Keith mikekeith52