Created
August 21, 2017 22:02
-
-
Save fabianp/4d4a7b0b1c4bcbc129fbc91fa0dfccf7 to your computer and use it in GitHub Desktop.
Sparse Proximal SAGA
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from scipy import sparse | |
from datetime import datetime | |
from numba import njit | |
@njit | |
def deriv_logistic(p, b): | |
"""Derivative of the logistic loss""" | |
p *= b | |
if p > 0: | |
phi = 1. / (1 + np.exp(-p)) | |
else: | |
exp_t = np.exp(p) | |
phi = exp_t / (1. + exp_t) | |
return (phi - 1) * b | |
@njit | |
def prox_L1(x, gamma): | |
"""Proximal operator for the L1 norm""" | |
return x | |
@njit | |
def _SPSAGA(x, memory_gradient, gradient_average, A_data, A_indices, A_indptr, | |
b, alpha, beta, d, max_iter, n_samples, step_size, trace_x): | |
sample_indices = np.arange(n_samples) | |
# .. main iteration .. | |
for it in range(max_iter): | |
# .. keep track of coefficients for later plotting .. | |
trace_x[it] = x | |
np.random.shuffle(sample_indices) | |
for i in sample_indices: | |
p = 0. | |
# .. sparse dot product .. | |
for j in range(A_indptr[i], A_indptr[i+1]): | |
j_idx = A_indices[j] | |
p += x[j_idx] * A_data[j] | |
grad_i = deriv_logistic(p, b[i]) | |
old_grad = memory_gradient[i] | |
memory_gradient[i] = grad_i | |
# .. update coefficients .. | |
for j in range(A_indptr[i], A_indptr[i+1]): | |
j_idx = A_indices[j] | |
delta = (grad_i - old_grad) * A_data[j] | |
incr = delta + d[j_idx] * ( | |
gradient_average[j_idx] + alpha * x[j_idx]) | |
x[j_idx] = prox_L1(x[j_idx] - step_size * incr, | |
beta * step_size * d[j_idx]) | |
gradient_average[j_idx] += delta * A_data[j] / n_samples | |
return x | |
def SPSAGA(A, b, alpha, beta, step_size, max_iter=100): | |
""" | |
Solves an L1-regularized logistic regression problem of the form | |
argmin_x logistic_loss + alpha * ||x||^2_2 + beta * ||x||_1 | |
""" | |
# .. make sure input has the correct tipe and .. | |
# .. create temporary variables .. | |
A = sparse.csr_matrix(A, dtype=np.float) | |
n_samples, n_features = A.shape | |
x = np.zeros(n_features) | |
memory_gradient = np.zeros(n_samples) | |
gradient_average = np.zeros(n_features) | |
# .. to tracking execution time later .. | |
start = datetime.now() | |
trace_x = np.zeros((max_iter, n_features)) | |
# .. construct the diagonal D matrix .. | |
B = A.copy() | |
B.data[:] = 1 | |
d = np.array(B.mean(0)).ravel() | |
idx = (d != 0) | |
d[idx] = 1 / d[idx] | |
# .. run SPSAGA iteration .. | |
_SPSAGA( | |
x, memory_gradient, gradient_average, A.data, A.indices, A.indptr, | |
b, alpha, beta, d, max_iter, n_samples, step_size, trace_x) | |
# .. there is no way to track execution time inside Numba code .. | |
# .. but each iteration involves the same number of iterations .. | |
# .. so we assume that each iteration has taken the same amount of time .. | |
delta = (datetime.now() - start).total_seconds() | |
trace_time = np.linspace(0, delta, max_iter) | |
return x, trace_x, trace_time |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment