fabianp · October 28, 2011 16:37
diff --git a/gistfile1.py b/gistfile1.py
 def _solve(A, b, solver, tol):
    # helper method for ridge_regression, A is symmetric positive

    if solver == 'auto':
        if hasattr(A, 'todense'):
            solver = 'sparse_cg'
        else:
            solver = 'dense_cholesky'

    if solver == 'sparse_cg':
        if b.ndim < 2:
            from scipy.sparse import linalg as sp_linalg
            sol, error = sp_linalg.cg(A, b, tol=tol)
            if error:
                raise ValueError("Failed with error code %d" % error)
            return sol
        else:
            # sparse_cg cannot handle a 2-d b.
            sol = []
            for j in range(b.shape[1]):
                sol.append(_solve(A, b[:, j], solver="sparse_cg", tol=tol))
            return np.array(sol).T

    elif solver == 'dense_cholesky':
        from scipy import linalg
        if hasattr(A, 'todense'):
            A = A.todense()
        return linalg.solve(A, b, sym_pos=True, overwrite_a=True)
    else:
        raise NotImplementedError('Solver %s not implemented' % solver)


 def old_ridge_regression(X, y, alpha, sample_weight=1.0, solver='auto', tol=1e-3):
    """
    Solve the ridge equation by the method of normal equations.

    Parameters
    ----------
    X : {array-like, sparse matrix}, shape = [n_samples, n_features]
        Training data

    y : array-like, shape = [n_samples] or [n_samples, n_responses]
        Target values

    sample_weight : float or numpy array of shape [n_samples]
        Individual weights for each sample

    solver : {'auto', 'dense_cholesky', 'sparse_cg'}, optional
        Solver to use in the computational routines. 'delse_cholesky'
        will use the standard scipy.linalg.solve function, 'sparse_cg'
        will use the a conjugate gradient solver as found in
        scipy.sparse.linalg.cg while 'auto' will chose the most
        appropiate depending on the matrix X.

    tol: float
        Precision of the solution.

    Returns
    -------
    coef: array, shape = [n_features] or [n_responses, n_features]
        Weight vector(s).

    Notes
    -----
    This function won't compute the intercept.
    """

    n_samples, n_features = X.shape
    is_sparse = False

    if hasattr(X, 'todense'): # lazy import of scipy.sparse
        from scipy import sparse
        is_sparse = sparse.issparse(X)

    if is_sparse:
        if n_features > n_samples or \
           isinstance(sample_weight, np.ndarray) or \
           sample_weight != 1.0:

            I = sparse.lil_matrix((n_samples, n_samples))
            I.setdiag(np.ones(n_samples) * alpha * sample_weight)
            c = _solve(X * X.T + I, y, solver, tol)
            coef = X.T * c
        else:
            I = sparse.lil_matrix((n_features, n_features))
            I.setdiag(np.ones(n_features) * alpha)
            coef = _solve(X.T * X + I, X.T * y, solver, tol)
    else:
        if n_features > n_samples or \
           isinstance(sample_weight, np.ndarray) or \
           sample_weight != 1.0:

            # kernel ridge
            # w = X.T * inv(X X^t + alpha*Id) y
            A = np.dot(X, X.T)
            A.flat[::n_samples + 1] += alpha * sample_weight
            coef = np.dot(X.T, _solve(A, y, solver, tol))
        else:
            # ridge
            # w = inv(X^t X + alpha*Id) * X.T y
            A = np.dot(X.T, X)
            A.flat[::n_features + 1] += alpha
            coef = _solve(A, np.dot(X.T, y), solver, tol)

    return coef.T

 def new_ridge_regression(X, y, alpha, sample_weight=1.0, solver='auto', tol=1e-3):
    """
    Solve the ridge equation by the method of normal equations.

    Parameters
    ----------
    X : {array-like, sparse matrix, LinearOperator}, shape = [n_samples, n_features]
        Training data

    y : array-like, shape = [n_samples] or [n_samples, n_responses]
        Target values

    sample_weight : float or numpy array of shape [n_samples]
        Individual weights for each sample

    solver : {'auto', 'dense_cholesky', 'sparse_cg'}, optional
        Solver to use in the computational routines. 'delse_cholesky'
        will use the standard scipy.linalg.solve function, 'sparse_cg'
        will use the a conjugate gradient solver as found in
        scipy.sparse.linalg.cg while 'auto' will chose the most
        appropiate depending on the matrix X.

    tol: float
        Precision of the solution.

    Returns
    -------
    coef: array, shape = [n_features] or [n_responses, n_features]
        Weight vector(s).

    Notes
    -----
    This function won't compute the intercept.
    """

    n_samples, n_features = X.shape

    if solver == 'auto':
        # cholesky if it's a dense array and cg in
        # any other case
        if hasattr(X, '__array__'):
            solver = 'dense_cholesky'
        else:
            solver = 'sparse_cg'

    if solver == 'sparse_cg':
        # gradient descent
        from scipy.sparse import linalg as sp_linalg
        X1 = sp_linalg.aslinearoperator(X)
        if y.ndim == 1:
            y1 = np.reshape(y, (-1, 1))
        else:
            y1 = y
        coefs = np.empty((y1.shape[1], n_features))

        for i in range(y1.shape[1]):
            y_column = y1[:, i]
            if n_features > n_samples:
                # kernel ridge
                # w = X.T * inv(X X^t + alpha*Id) y
                def mv(x):
                    return X1.matvec(X1.rmatvec(x)) + alpha * x
                C = sp_linalg.LinearOperator(
                    (n_samples, n_samples), matvec=mv, dtype=X.dtype)
                coef, info = sp_linalg.cg(C, y_column, tol=tol)
                coef = X1.rmatvec(coef)
            else:
                # ridge
                # w = inv(X^t X + alpha*Id) * X.T y
                def mv(x):
                    return X1.rmatvec(X1.matvec(x)) + alpha * x
                y_column = X1.rmatvec(y_column)
                C = sp_linalg.LinearOperator(
                    (n_features, n_features), matvec=mv, dtype=X.dtype)
                coefs[i], info = sp_linalg.cg(C, y_column, tol=tol)
            if info != 0:
                raise ValueError("Failed with error code %d" % info)

        if y.ndim == 1:
            return np.ravel(coefs)
        return coefs
    else:
        # normal equations (cholesky) method
        from scipy import linalg
        if hasattr(X, 'todense'):
            X = X.todense()
        if n_features > n_samples or \
           isinstance(sample_weight, np.ndarray) or \
           sample_weight != 1.0:

            # kernel ridge
            # w = X.T * inv(X X^t + alpha*Id) y
            A = np.dot(X, X.T)
            A.flat[::n_samples + 1] += alpha * sample_weight
            coef = np.dot(X.T, linalg.solve(A, y, sym_pos=True, overwrite_a=True))
        else:
            # ridge
            # w = inv(X^t X + alpha*Id) * X.T y
            A = np.dot(X.T, X)
            A.flat[::n_features + 1] += alpha
            coef = linalg.solve(A, np.dot(X.T, y), sym_pos=True, overwrite_a=True)

    return coef.T

 import numpy as np
 from scipy import sparse
 from time import time

 n, m = (1000, 1000)
 time_old = []
 time_new = []
 density = np.linspace(1e-3, .5, 10)
 for i, d in enumerate(density):
    print 'iteration %s out of %s' % (i, 10)
    A = sparse.rand(n, m, density=d)
    b = np.random.randn(n)
    start = time()
    res_old = old_ridge_regression(A, b, .1, solver='sparse_cg')
    time_old.append(time() - start)

    start = time()
    res_new = new_ridge_regression(A, b, .1, solver='sparse_cg')
    time_new.append(time() - start)

    # sanity check
    assert np.allclose(res_old, res_new, atol=.1)

 import pylab as pl
 pl.plot(density, time_old, label='old code')
 pl.plot(density, time_new, label='new code')
 pl.ylabel('time')
 pl.xlabel('density of sparse matrix')
 pl.legend()
 pl.title('matrix size: %s x %s' % A.shape)
 pl.show()
	def _solve(A, b, solver, tol):
	# helper method for ridge_regression, A is symmetric positive

	if solver == 'auto':
	if hasattr(A, 'todense'):
	solver = 'sparse_cg'
	else:
	solver = 'dense_cholesky'

	if solver == 'sparse_cg':
	if b.ndim < 2:
	from scipy.sparse import linalg as sp_linalg
	sol, error = sp_linalg.cg(A, b, tol=tol)
	if error:
	raise ValueError("Failed with error code %d" % error)
	return sol
	else:
	# sparse_cg cannot handle a 2-d b.
	sol = []
	for j in range(b.shape[1]):
	sol.append(_solve(A, b[:, j], solver="sparse_cg", tol=tol))
	return np.array(sol).T

	elif solver == 'dense_cholesky':
	from scipy import linalg
	if hasattr(A, 'todense'):
	A = A.todense()
	return linalg.solve(A, b, sym_pos=True, overwrite_a=True)
	else:
	raise NotImplementedError('Solver %s not implemented' % solver)


	def old_ridge_regression(X, y, alpha, sample_weight=1.0, solver='auto', tol=1e-3):
	"""
	Solve the ridge equation by the method of normal equations.

	Parameters
	----------
	X : {array-like, sparse matrix}, shape = [n_samples, n_features]
	Training data

	y : array-like, shape = [n_samples] or [n_samples, n_responses]
	Target values

	sample_weight : float or numpy array of shape [n_samples]
	Individual weights for each sample

	solver : {'auto', 'dense_cholesky', 'sparse_cg'}, optional
	Solver to use in the computational routines. 'delse_cholesky'
	will use the standard scipy.linalg.solve function, 'sparse_cg'
	will use the a conjugate gradient solver as found in
	scipy.sparse.linalg.cg while 'auto' will chose the most
	appropiate depending on the matrix X.

	tol: float
	Precision of the solution.

	Returns
	-------
	coef: array, shape = [n_features] or [n_responses, n_features]
	Weight vector(s).

	Notes
	-----
	This function won't compute the intercept.
	"""

	n_samples, n_features = X.shape
	is_sparse = False

	if hasattr(X, 'todense'): # lazy import of scipy.sparse
	from scipy import sparse
	is_sparse = sparse.issparse(X)

	if is_sparse:
	if n_features > n_samples or \
	isinstance(sample_weight, np.ndarray) or \
	sample_weight != 1.0:

	I = sparse.lil_matrix((n_samples, n_samples))
	I.setdiag(np.ones(n_samples) * alpha * sample_weight)
	c = _solve(X * X.T + I, y, solver, tol)
	coef = X.T * c
	else:
	I = sparse.lil_matrix((n_features, n_features))
	I.setdiag(np.ones(n_features) * alpha)
	coef = _solve(X.T * X + I, X.T * y, solver, tol)
	else:
	if n_features > n_samples or \
	isinstance(sample_weight, np.ndarray) or \
	sample_weight != 1.0:

	# kernel ridge
	# w = X.T * inv(X X^t + alpha*Id) y
	A = np.dot(X, X.T)
	A.flat[::n_samples + 1] += alpha * sample_weight
	coef = np.dot(X.T, _solve(A, y, solver, tol))
	else:
	# ridge
	# w = inv(X^t X + alphaId) X.T y
	A = np.dot(X.T, X)
	A.flat[::n_features + 1] += alpha
	coef = _solve(A, np.dot(X.T, y), solver, tol)

	return coef.T

	def new_ridge_regression(X, y, alpha, sample_weight=1.0, solver='auto', tol=1e-3):
	"""
	Solve the ridge equation by the method of normal equations.

	Parameters
	----------
	X : {array-like, sparse matrix, LinearOperator}, shape = [n_samples, n_features]
	Training data

	y : array-like, shape = [n_samples] or [n_samples, n_responses]
	Target values

	sample_weight : float or numpy array of shape [n_samples]
	Individual weights for each sample

	solver : {'auto', 'dense_cholesky', 'sparse_cg'}, optional
	Solver to use in the computational routines. 'delse_cholesky'
	will use the standard scipy.linalg.solve function, 'sparse_cg'
	will use the a conjugate gradient solver as found in
	scipy.sparse.linalg.cg while 'auto' will chose the most
	appropiate depending on the matrix X.

	tol: float
	Precision of the solution.

	Returns
	-------
	coef: array, shape = [n_features] or [n_responses, n_features]
	Weight vector(s).

	Notes
	-----
	This function won't compute the intercept.
	"""

	n_samples, n_features = X.shape

	if solver == 'auto':
	# cholesky if it's a dense array and cg in
	# any other case
	if hasattr(X, '__array__'):
	solver = 'dense_cholesky'
	else:
	solver = 'sparse_cg'

	if solver == 'sparse_cg':
	# gradient descent
	from scipy.sparse import linalg as sp_linalg
	X1 = sp_linalg.aslinearoperator(X)
	if y.ndim == 1:
	y1 = np.reshape(y, (-1, 1))
	else:
	y1 = y
	coefs = np.empty((y1.shape[1], n_features))

	for i in range(y1.shape[1]):
	y_column = y1[:, i]
	if n_features > n_samples:
	# kernel ridge
	# w = X.T * inv(X X^t + alpha*Id) y
	def mv(x):
	return X1.matvec(X1.rmatvec(x)) + alpha * x
	C = sp_linalg.LinearOperator(
	(n_samples, n_samples), matvec=mv, dtype=X.dtype)
	coef, info = sp_linalg.cg(C, y_column, tol=tol)
	coef = X1.rmatvec(coef)
	else:
	# ridge
	# w = inv(X^t X + alphaId) X.T y
	def mv(x):
	return X1.rmatvec(X1.matvec(x)) + alpha * x
	y_column = X1.rmatvec(y_column)
	C = sp_linalg.LinearOperator(
	(n_features, n_features), matvec=mv, dtype=X.dtype)
	coefs[i], info = sp_linalg.cg(C, y_column, tol=tol)
	if info != 0:
	raise ValueError("Failed with error code %d" % info)

	if y.ndim == 1:
	return np.ravel(coefs)
	return coefs
	else:
	# normal equations (cholesky) method
	from scipy import linalg
	if hasattr(X, 'todense'):
	X = X.todense()
	if n_features > n_samples or \
	isinstance(sample_weight, np.ndarray) or \
	sample_weight != 1.0:

	# kernel ridge
	# w = X.T * inv(X X^t + alpha*Id) y
	A = np.dot(X, X.T)
	A.flat[::n_samples + 1] += alpha * sample_weight
	coef = np.dot(X.T, linalg.solve(A, y, sym_pos=True, overwrite_a=True))
	else:
	# ridge
	# w = inv(X^t X + alphaId) X.T y
	A = np.dot(X.T, X)
	A.flat[::n_features + 1] += alpha
	coef = linalg.solve(A, np.dot(X.T, y), sym_pos=True, overwrite_a=True)

	return coef.T

	import numpy as np
	from scipy import sparse
	from time import time

	n, m = (1000, 1000)
	time_old = []
	time_new = []
	density = np.linspace(1e-3, .5, 10)
	for i, d in enumerate(density):
	print 'iteration %s out of %s' % (i, 10)
	A = sparse.rand(n, m, density=d)
	b = np.random.randn(n)
	start = time()
	res_old = old_ridge_regression(A, b, .1, solver='sparse_cg')
	time_old.append(time() - start)

	start = time()
	res_new = new_ridge_regression(A, b, .1, solver='sparse_cg')
	time_new.append(time() - start)

	# sanity check
	assert np.allclose(res_old, res_new, atol=.1)

	import pylab as pl
	pl.plot(density, time_old, label='old code')
	pl.plot(density, time_new, label='new code')
	pl.ylabel('time')
	pl.xlabel('density of sparse matrix')
	pl.legend()
	pl.title('matrix size: %s x %s' % A.shape)
	pl.show()