This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# for the paired t-test for differnce of means | |
from scipy.stats import ttest_rel | |
# save and printprint the results of the test on the Ridge estimates | |
lasso_results = ttest_rel(unbiased_sigma_estimates, lasso_sigma_estimates) | |
print(f'Test Statistic for the Paired t-test between the True Model and LASSO: {round(lasso_results[0], 4)}') | |
print(f'p-value for the Paired t-test between the True Model and LASSO: {round(lasso_results[1], 4)}') | |
print() | |
# save and printprint the results of the test on the Ridge estimates | |
ridge_results = ttest_rel(unbiased_sigma_estimates, ridge_sigma_estimates) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# suppresses warnings from sklearn | |
def warn(*args, **kwargs): | |
pass | |
import warnings | |
warnings.warn = warn | |
# import LassoCV | |
from sklearn.linear_model import LassoCV | |
# import RidgeCV | |
from sklearn.linear_model import RidgeCV |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# for linear algebra and random numbers | |
import numpy as np | |
# for linear regression | |
import statsmodels.api as sm | |
# for visualization | |
import matplotlib.pyplot as plt | |
# for generating combinations of explanatory variables for model selection based on AIC | |
from itertools import combinations | |
# set a random seed for reproducibility |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def best_information_criterion_selection(y, X, criterion='AIC'): | |
''' | |
This function takes in a column numpy array (y) and design matrix (X) (with the first column as all 1s for | |
the intercept) which is also a numpy array, and returns the OLS model with the lowest Information | |
Criterion. The default criterion is AIC; and the other option is BIC. | |
''' | |
# check inputs are valid | |
assert y.shape[0] == X.shape[0], 'The number of rows in y and X do not match!' | |
assert (criterion == 'AIC') or (criterion == 'BIC'), 'Valid criterions are AIC and BIC!' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from statsmodels.discrete.discrete_model import Logit | |
# add an intercept since statsmodels does not | |
my_data['Intercept'] = 1 | |
# fit the logistic regression model using MLE | |
mle_mod = Logit(my_data[target], my_data[['Intercept'] + vars_of_interest]) | |
mle_mod_fit = mle_mod.fit(disp=False) | |
# print the summary |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plt.figure(figsize=(12, 5), dpi= 80, facecolor='w', edgecolor='k') | |
plt.subplot(1, 2, 1) | |
plt.plot(mcmc_log_mod.raw_beta_distr[0], mcmc_log_mod.raw_beta_distr[1]) | |
plt.title('Simulated Raw Joint Distribution of the Coefficients', fontsize=12) | |
plt.xlabel('Intercept', fontsize=10) | |
plt.ylabel('Coefficient of Price Percentile', fontsize=10) | |
plt.subplot(1, 2, 2) | |
plt.plot(mcmc_log_mod.beta_distr[0], mcmc_log_mod.beta_distr[1]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
# load in field goal data | |
all_data = pd.read_csv('candy-data.csv') | |
# list of independent variables in the model | |
vars_of_interest = ['pricepercent'] | |
# name of dependent variable |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class mcmc_logistic_reg: | |
import numpy as np | |
def __init__self(self): | |
self.raw_beta_distr = np.empty(1) | |
self.beta_distr = np.empty(1) | |
self.beta_hat = np.empty(1) | |
self.cred_ints = np.empty(1) | |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.