Skip to content

Instantly share code, notes, and snippets.

@aweinstein
Created October 3, 2012 18:20
Show Gist options
  • Save aweinstein/3828759 to your computer and use it in GitHub Desktop.
Save aweinstein/3828759 to your computer and use it in GitHub Desktop.
scikitlearn lasso path fat vs thin X matrix
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import Lasso, lars_path
np.random.seed(42)
def gen_data(n, m, k):
X = np.random.randn(n, m)
w = np.zeros((m, 1))
i = np.arange(0, m)
np.random.shuffle(i)
supp = i[:k]
w[supp] = np.sign(np.random.randn(k, 1)) * (np.random.rand(k, 1) + 1)
y = np.dot(X, w)
sigma = 0.2
y += sigma * np.random.rand(*y.shape)
return X, w, y
def homotopy(A, y):
m, n = A.shape
x = np.zeros((n, 1))
c0 = np.dot(A.T, y)
I = [np.argmax(np.abs(c0))]
lambda_ = np.abs(c0).max()
k = 0
xs = []
lambdas = []
ATA = np.dot(A.T, A)
i_set = set(range(n))
while len(I) < n and lambda_> 0:
c = c0 - np.dot(ATA, x)
k += 1
d = np.zeros_like(x)
d[I] = np.linalg.solve(np.dot(A[:,I].T, A[:,I]), np.sign(c[I]))
Ic = list(i_set - set(I))
v = np.dot(A[:,Ic].T, np.dot(A[:,I], d[I]))
# Use masked arrays to compute the min only over positive entries
v1 = np.ma.masked_array((lambda_ - c[Ic]) / (1 - v))
v2 = np.ma.masked_array((lambda_ + c[Ic]) / (1 + v))
v1[v1 <= 0] = np.ma.masked
v2[v2 <= 0] = np.ma.masked
i1, i2 = v1.argmin(), v2.argmin()
if v1[i1] < v2[i2]:
gamma_plus = v1[i1,0]
i_plus = Ic[i1]
else:
gamma_plus = v2[i2,0]
i_plus = Ic[i2]
v = np.ma.masked_array(-x[I] / d[I])
v[v <= 0] = np.ma.masked
i = v.argmin()
gamma_minus = v[i,0] if v[i,0] > 0 else np.inf
i_minus = I[i]
if gamma_plus < gamma_minus:
gamma = gamma_plus
I.append(i_plus)
else:
gamma = gamma_minus
I.remove(i_minus)
xs.append(x.copy())
x += gamma * d
lambdas.append(lambda_)
lambda_ -= gamma
return np.array(xs).squeeze(), lambdas
if __name__ == '__main__':
for n, m in [(100, 80), (80,100)]:
X, w, y = gen_data(n, m, k=5)
lasso_alphas, _, lasso_coef = lars_path(X, y.squeeze(), method='lasso')
h_alphas, lambdas = homotopy(X, y)
ws = np.zeros((w.shape[0], len(lasso_alphas)))
for i, alpha in enumerate(lasso_alphas[:-1]):
rgr_lasso = Lasso(alpha=alpha)
rgr_lasso.fit(X, y)
ws[:,i] = rgr_lasso.coef_
plt.figure(figsize=(8,10))
plt.subplot(311)
plt.plot(lasso_coef.T)
plt.title('Using lars_path')
plt.subplot(312)
plt.plot(ws.T)
plt.title('Using Lasso')
plt.subplot(313)
plt.plot(h_alphas)
plt.title('Using naive lars')
if n < m:
plt.suptitle('Fat X')
plt.savefig('lasso_fat.png')
else:
plt.suptitle('Thin X')
plt.savefig('lasso_thin.png')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment