Created
August 8, 2020 21:44
-
-
Save jkclem/06b3972fd5f91b142941872c54f4ee08 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# suppresses warnings from sklearn | |
def warn(*args, **kwargs): | |
pass | |
import warnings | |
warnings.warn = warn | |
# import LassoCV | |
from sklearn.linear_model import LassoCV | |
# import RidgeCV | |
from sklearn.linear_model import RidgeCV | |
# set the number of simulations to run | |
num_sims = 500 | |
# set n and k and make a n by k+1 matrix of standard normal random numbers | |
n = 200 | |
k = 100 | |
# set the number of predictors, including the intercept that are significant | |
k_significant = 51 | |
# create a column vector of zeros, with k + 1 betas, where the first value is the true intercept | |
beta = np.zeros(shape=(k + 1, 1)) | |
# set the first k_significant betas to be non-zero values, using random N(10, 2) random variables | |
beta[:k_significant] = np.random.normal(loc=5.0, scale=1.0, size=(k_significant, 1)) | |
# create an array of zeros to hold our UNBIASED estimates of the variance of our error term (from OLS) | |
unbiased_sigma_estimates = np.zeros(shape=num_sims) | |
# create an array of zeros to hold our LASSO estimates of the variance of our error term (from OLS) | |
lasso_sigma_estimates = np.zeros(shape=num_sims) | |
# create an array of zeros to hold our LASSO estimates of the variance of our error term (from OLS) | |
ridge_sigma_estimates = np.zeros(shape=num_sims) | |
# perform num_sims simulations | |
for i in range(num_sims): | |
### | |
# This block generates the random sample | |
### | |
# create the temporary design n by k + 1 design matrix | |
temp_X = np.random.normal(loc=0.0, scale=1.0, size=(n, k + 1)) | |
# set the first column of the design matrix to 1.0 to include an intercept | |
temp_X[:,0] = 1.0 | |
# create temporary y values using the design matrix and beta vector, and add standard normal random noise | |
temp_y = np.matmul(temp_X, beta) + np.random.normal(loc=0.0, scale=1.0, size=(n, 1)) | |
### | |
# This block estimates the variance using the true model | |
### | |
# fit an OLS linear model to the temporary data | |
temp_true_mod = sm.OLS(temp_y, temp_X[:,0:k_significant]).fit() | |
# estimate the variance of the error term (note: ddof=1 is to use the sample variance formula) | |
temp_unbiased_sigma_estimate = np.sqrt(np.sum(temp_true_mod.resid **2) / | |
(n - temp_true_mod.params.shape[0])) | |
# add the unbiased estimate to the array at the ith slot | |
unbiased_sigma_estimates[i] = temp_unbiased_sigma_estimate | |
### | |
# This block estimates the variance using the LASSO model with the best alpha chosen by cross-validation | |
### | |
# create an instance of a LASSO model | |
lasso_mod = LassoCV(alphas=(0.01, 0.1, 1.0, 10.0), fit_intercept=False, cv=3) | |
# reshape temp_y to be compatible with sklearn standards | |
reshaped_temp_y = temp_y.reshape(temp_y.shape[0]) | |
# fit the LASSO model with the best alpha | |
fit_lasso_mod = lasso_mod.fit(temp_X, reshaped_temp_y) | |
# calculate the residuals from the temporary LASSO model | |
temp_lasso_resids = reshaped_temp_y - fit_lasso_mod.predict(temp_X) | |
# count the number of non-zero parameters in the LASSO model | |
non_zero_param_count = (fit_lasso_mod.coef_ != np.repeat(0, fit_lasso_mod.coef_.shape[0])).sum() | |
# estimate the standard deviation of the error term | |
temp_lasso_sigma_estimate = np.sqrt(np.sum(temp_lasso_resids ** 2) / (n - non_zero_param_count)) | |
# add the unbiased estimate to the array at the ith slot | |
lasso_sigma_estimates[i] = temp_lasso_sigma_estimate | |
### | |
# This block estimates the variance using the Ridge model with the best alpha chosen by cross-validation | |
### | |
# create an instance of a ridge model | |
ridge_mod = RidgeCV(alphas=(0.01, 0.1, 1.0, 10.0), fit_intercept=False, cv=3) | |
# reshape temp_y to be compatible with sklearn standards | |
reshaped_temp_y = temp_y.reshape(temp_y.shape[0]) | |
# fit the ridge model with the best alpha | |
fit_ridge_mod = ridge_mod.fit(temp_X, reshaped_temp_y) | |
# calculate the residuals from the temporary ridge model | |
temp_ridge_resids = reshaped_temp_y - fit_ridge_mod.predict(temp_X) | |
# estimate the variance of the error term (note: ddof=1 is to use the sample variance formula) | |
temp_ridge_sigma_estimate = np.sqrt(np.sum(temp_ridge_resids ** 2) / (n - k - 1)) | |
# add the unbiased estimate to the array at the ith slot | |
ridge_sigma_estimates[i] = temp_ridge_sigma_estimate |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment