Created
July 12, 2012 09:58
-
-
Save fabianp/3097107 to your computer and use it in GitHub Desktop.
strong rules lasso
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Strong rules for coordinate descent | |
Author: Fabian Pedregosa <[email protected]> | |
""" | |
import numpy as np | |
from scipy import linalg | |
MAX_ITER = 100 | |
def l1_coordinate_descent(X, y, alpha, warm_start=None, max_iter=MAX_ITER): | |
if warm_start is not None: | |
beta = warm_start | |
else: | |
beta = np.zeros(X.shape[1], dtype=np.float) | |
alpha = alpha * X.shape[0] | |
for _ in range(max_iter): | |
for i in range(X.shape[1]): | |
bb = beta.copy() | |
bb[i] = 0. | |
residual = np.dot(X[:, i], y - np.dot(X, bb).T) | |
beta[i] = np.sign(residual) * np.fmax(np.abs(residual) - alpha, 0) \ | |
/ np.dot(X[:, i], X[:, i]) | |
return beta | |
def shrinkage(X, y, alpha, beta, active_set, max_iter): | |
bb = beta.copy() | |
for _ in range(max_iter): | |
for i in active_set: | |
bb[i] = 0 | |
residual = np.dot(X[:, i], y - np.dot(X, bb).T) | |
bb[i] = np.sign(residual) * np.fmax(np.abs(residual) - alpha, 0) \ | |
/ np.dot(X[:, i], X[:, i]) | |
return bb | |
def l1_path(X, y, alphas, max_iter=MAX_ITER, verbose=False): | |
""" | |
The strategy is described in "Strong rules for discarding predictors in lasso-type problems" | |
alphas must be a decreasing sequence of regularization parameters | |
WARNING: does not compute intercept | |
""" | |
beta = np.zeros((len(alphas), X.shape[1]), dtype=np.float) | |
alphas_scaled = np.array(alphas) * X.shape[0] | |
active_set = np.arange(X.shape[1]).tolist() | |
for k, a in enumerate(alphas_scaled): | |
if k > 0: | |
# .. Strong rules for discarding predictors in lasso-type .. | |
tmp = np.dot(X.T, y - np.dot(X, beta[k - 1])) | |
tmp = np.abs(tmp) | |
strong_active_set = tmp >= 2 * alphas_scaled[k] - alphas_scaled[k - 1] | |
strong_active_set = np.where(strong_active_set)[0] | |
else: | |
strong_active_set = np.arange(X.shape[1]) | |
if verbose: | |
print 'Strong active set ', strong_active_set | |
print 'Active set ', active_set | |
# solve for the current active set | |
beta[k] = shrinkage(X, y, a, beta[k], active_set, max_iter) | |
# check KKT in the strong active set | |
kkt_violations = False | |
for i in strong_active_set: | |
tmp = np.dot(X[:, i], y - np.dot(X, beta[k])) | |
if beta[k, i] != 0 and not np.allclose(tmp, np.sign(beta[k, i]) * alphas_scaled[k]): | |
if i not in active_set: | |
active_set.append(i) | |
kkt_violations = True | |
if beta[k, i] == 0 and abs(tmp) >= np.abs(alphas_scaled[k]): | |
if i not in active_set: | |
active_set.append(i) | |
kkt_violations = True | |
if not kkt_violations: | |
# passed KKT for all variables in strong active set, we're done | |
active_set = np.where(beta[k] != 0)[0].tolist() | |
if verbose: | |
print 'no KKT violated on strong active set' | |
# .. recompute with new active set .. | |
else: | |
if verbose: | |
print 'KKT violated on strong active set' | |
beta[k] = shrinkage(X, y, a, beta[k], active_set, max_iter) | |
# .. check KKT on all predictors .. | |
kkt_violations = False | |
for i in range(X.shape[1]): | |
tmp = np.dot(X[:, i], y - np.dot(X, beta[k])) | |
if beta[k, i] != 0 and not np.allclose(tmp, np.sign(beta[k, i]) * alphas_scaled[k]): | |
if i not in active_set: | |
active_set.append(i) | |
kkt_violations = True | |
if beta[k, i] == 0 and abs(tmp) >= np.abs(alphas_scaled[k]): | |
if i not in active_set: | |
active_set.append(i) | |
kkt_violations = True | |
if not kkt_violations: | |
# passed KKT for all variables, we're done | |
active_set = np.where(beta[k] != 0)[0].tolist() | |
print 'no KKT violated on full active set' | |
else: | |
if verbose: | |
print 'KKT violated on full active set' | |
beta[k] = shrinkage(X, y, a, beta[k], active_set, max_iter) | |
return beta | |
def check_kkt_lasso(xr, coef, penalty, tol=1e-3): | |
""" | |
Check KKT conditions for Lasso | |
xr : X'(y - X coef) | |
""" | |
nonzero = (coef != 0) | |
return np.all(np.abs(xr[nonzero] - np.sign(coef[nonzero]) * penalty) < tol) \ | |
and np.all(np.abs(xr[~nonzero] / penalty) <= 1) | |
if __name__ == '__main__': | |
np.random.seed(0) | |
from sklearn import datasets | |
diabetes = datasets.load_diabetes() | |
X = diabetes.data | |
y = diabetes.target | |
# X = np.random.randn(20, 100) | |
# y = np.random.randn(20) | |
# X = (X / np.sqrt((X ** 2).sum(0))) | |
#print l1_coordinate_descent(X, y, .001) | |
alphas = np.linspace(.1, 2., 10)[::-1] | |
coef = l1_path(X, y, alphas, verbose=True) | |
for i in range(len(alphas)): | |
Xr = np.dot(X.T, y - np.dot(X, coef[i])) | |
assert check_kkt_lasso(Xr, coef[i], X.shape[0] * alphas[i]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment