Skip to content

Instantly share code, notes, and snippets.

@yoavram
Last active July 19, 2021 19:03
Show Gist options
  • Save yoavram/378777660e13b77d6daf to your computer and use it in GitHub Desktop.
Save yoavram/378777660e13b77d6daf to your computer and use it in GitHub Desktop.
Run Lasso Regression with CV to find alpha on the California Housing dataset using Scikit-Learn
import matplotlib.pyplot as plt
import numpy as np
import sklearn.datasets
import sklearn.cross_validation as cv
from sklearn import linear_model
dataset = sklearn.datasets.fetch_california_housing()
X = dataset['data']
y = dataset['target']
X_train, X_test, y_train, y_test = cv.train_test_split(X, y, test_size=0.25, random_state=0)
alphas = np.logspace(-4, -1, 10)
scores = np.empty_like(alphas)
for i,a in enumerate(alphas):
lasso = linear_model.Lasso()
lasso.set_params(alpha=a)
lasso.fit(X_train, y_train)
scores[i] = lasso.score(X_test, y_test)
print(a, lasso.coef_)
lassocv = linear_model.LassoCV()
lassocv.fit(X, y)
lassocv_score = lassocv.score(X, y)
lassocv_alpha = lassocv.alpha_
print('CV', lassocv.coef_)
plt.plot(alphas, scores, '-ko')
plt.axhline(lassocv_score, color='b', ls='--')
plt.axvline(lassocv_alpha, color='b', ls='--')
plt.xlabel(r'$\alpha$')
plt.ylabel('Score')
plt.xscale('log')
sns.despine(offset=15)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment