John Clements jkclem

Statistics | Econometrics | Machine Learning

jkclem / simulation_ttests.py

Created August 8, 2020 21:47

	# for the paired t-test for differnce of means
	from scipy.stats import ttest_rel

	# save and printprint the results of the test on the Ridge estimates
	lasso_results = ttest_rel(unbiased_sigma_estimates, lasso_sigma_estimates)
	print(f'Test Statistic for the Paired t-test between the True Model and LASSO: {round(lasso_results[0], 4)}')
	print(f'p-value for the Paired t-test between the True Model and LASSO: {round(lasso_results[1], 4)}')
	print()
	# save and printprint the results of the test on the Ridge estimates
	ridge_results = ttest_rel(unbiased_sigma_estimates, ridge_sigma_estimates)

jkclem / simulation_study2.py

Created August 8, 2020 21:44

	# suppresses warnings from sklearn
	def warn(args, *kwargs):
	pass
	import warnings
	warnings.warn = warn

	# import LassoCV
	from sklearn.linear_model import LassoCV
	# import RidgeCV
	from sklearn.linear_model import RidgeCV

jkclem / simulation_study1.py

Created August 8, 2020 21:39

	# for linear algebra and random numbers
	import numpy as np
	# for linear regression
	import statsmodels.api as sm
	# for visualization
	import matplotlib.pyplot as plt
	# for generating combinations of explanatory variables for model selection based on AIC
	from itertools import combinations

	# set a random seed for reproducibility

jkclem / best_information_criterion_selection.py

Created August 8, 2020 21:38

	def best_information_criterion_selection(y, X, criterion='AIC'):
	'''
	This function takes in a column numpy array (y) and design matrix (X) (with the first column as all 1s for
	the intercept) which is also a numpy array, and returns the OLS model with the lowest Information
	Criterion. The default criterion is AIC; and the other option is BIC.
	'''

	# check inputs are valid
	assert y.shape[0] == X.shape[0], 'The number of rows in y and X do not match!'
	assert (criterion == 'AIC') or (criterion == 'BIC'), 'Valid criterions are AIC and BIC!'

jkclem / mcmc_demo3.py

Last active November 1, 2024 15:18

	from statsmodels.discrete.discrete_model import Logit

	# add an intercept since statsmodels does not
	my_data['Intercept'] = 1

	# fit the logistic regression model using MLE
	mle_mod = Logit(my_data[target], my_data[['Intercept'] + vars_of_interest])
	mle_mod_fit = mle_mod.fit(disp=False)

	# print the summary

jkclem / mcmc_demo2.py

Last active November 1, 2024 15:18

	plt.figure(figsize=(12, 5), dpi= 80, facecolor='w', edgecolor='k')

	plt.subplot(1, 2, 1)
	plt.plot(mcmc_log_mod.raw_beta_distr[0], mcmc_log_mod.raw_beta_distr[1])
	plt.title('Simulated Raw Joint Distribution of the Coefficients', fontsize=12)
	plt.xlabel('Intercept', fontsize=10)
	plt.ylabel('Coefficient of Price Percentile', fontsize=10)

	plt.subplot(1, 2, 2)
	plt.plot(mcmc_log_mod.beta_distr[0], mcmc_log_mod.beta_distr[1])

jkclem / mcmc_demo1.py

Last active November 1, 2024 15:19

	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt

	# load in field goal data
	all_data = pd.read_csv('candy-data.csv')

	# list of independent variables in the model
	vars_of_interest = ['pricepercent']
	# name of dependent variable

jkclem / mcmc_demo0.py

Last active May 18, 2020 21:27

jkclem / Daily Monte Carlo Simulation for Stock Price Prediction Intervals.ipynb

Last active November 1, 2024 15:16

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

jkclem / Timing Sorting and Search Algorithms.ipynb

Last active August 19, 2019 06:57

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.