Skip to content

Instantly share code, notes, and snippets.

@fabianp
Last active December 17, 2015 02:08
Show Gist options
  • Save fabianp/5533111 to your computer and use it in GitHub Desktop.
Save fabianp/5533111 to your computer and use it in GitHub Desktop.
Benchmark different solvers in scikit-learn's Ridge
from __future__ import print_function
import numpy as np
from sklearn import linear_model
from datetime import datetime
import pylab as pl
import pylab
def errorfill(x, y, yerr, color=None, alpha_fill=0.3, ax=None, label=None):
# helper function, stolen from http://tonysyu.github.com/plotting-error-bars.html
ax = ax if ax is not None else pl.gca()
if color is None:
color = ax._get_lines.color_cycle.next()
if np.isscalar(yerr) or len(yerr) == len(y):
ymin = y - yerr
ymax = y + yerr
elif len(yerr) == 2:
ymin, ymax = yerr
ax.plot(x, y, color=color, label=label)
ax.fill_between(x, ymax, ymin, color=color, alpha=alpha_fill)
def bench_features():
n_samples = 1000
timings = {}
for solver in ('svd', 'dense_cholesky', 'lsqr', 'sparse_cg'):
timings[solver] = []
print('Solver: %s' % solver)
features = np.linspace(100, 1000, 10).astype(np.int)
for n_features in features:
for _ in range(5): # perform five runs
w = np.random.randn(n_features)
X = np.random.randn(n_samples, n_features)
y = X.dot(w) + .1 * np.random.randn(n_samples)
clf = linear_model.Ridge(solver=solver)
start = datetime.now()
clf.fit(X, y)
elapsed = datetime.now() - start
timings[solver].append(elapsed.total_seconds())
print(elapsed)
for s in timings.keys():
tmp = np.array(timings[s]).reshape((-1, 5))
errorfill(features, tmp.mean(1), tmp.std(1), label=s)
pylab.ylim((0, tmp.mean(1).max()))
pl.legend(loc='upper left')
pl.xlabel('Number of features')
pl.ylabel('Seconds')
pl.show()
def bench_samples():
n_samples = 1000
timings = {}
for solver in ('svd', 'dense_cholesky', 'lsqr', 'sparse_cg'):
timings[solver] = []
print('Solver: %s' % solver)
features = np.linspace(100, 1000, 10).astype(np.int)
for n_features in features:
for _ in range(5): # perform five runs
w = np.random.randn(n_features)
X = np.random.randn(n_samples, n_features)
y = X.dot(w) + .1 * np.random.randn(n_samples)
clf = linear_model.Ridge(solver=solver)
start = datetime.now()
clf.fit(X, y)
elapsed = datetime.now() - start
timings[solver].append(elapsed.total_seconds())
print(elapsed)
for s in timings.keys():
tmp = np.array(timings[s]).reshape((-1, 5))
errorfill(features, tmp.mean(1), tmp.std(1), label=s)
pylab.ylim((0, tmp.mean(1).max()))
pl.legend(loc='upper left')
pl.xlabel('Number of samples')
pl.ylabel('Seconds')
pl.show()
bench_features()
bench_samples()
# benchmark using the hilbert matrix
from __future__ import print_function
import numpy as np
from sklearn import linear_model
from datetime import datetime
import pylab as pl
from scipy import linalg
def errorfill(x, y, yerr, color=None, alpha_fill=0.3, ax=None, label=None):
# helper function, stolen from http://tonysyu.github.com/plotting-error-bars.html
ax = ax if ax is not None else pl.gca()
if color is None:
color = ax._get_lines.color_cycle.next()
if np.isscalar(yerr) or len(yerr) == len(y):
ymin = y - yerr
ymax = y + yerr
elif len(yerr) == 2:
ymin, ymax = yerr
ax.plot(x, y, color=color, label=label)
ax.fill_between(x, ymax, ymin, color=color, alpha=alpha_fill)
def bench_features():
n_samples = 1000
timings = {}
for solver in ('lsqr', 'sparse_cg'):
timings[solver] = []
print('Solver: %s' % solver)
features = np.linspace(100, 1000, 10).astype(np.int)
for n_features in features:
for _ in range(20): # perform five runs
w = np.random.randn(n_features)
X = linalg.hilbert(max(n_samples, n_features))[:n_samples, :n_features]
y = X.dot(w) + .5 * np.random.randn(n_samples)
clf = linear_model.Ridge(solver=solver, alpha=1e-6)
start = datetime.now()
clf.fit(X, y)
elapsed = datetime.now() - start
timings[solver].append(elapsed.total_seconds())
print(elapsed)
for s in timings.keys():
tmp = np.array(timings[s]).reshape((-1, 20))
errorfill(features, tmp.mean(1), tmp.std(1), label=s)
pylab.ylim((0, tmp.mean(1).max()))
pl.legend(loc='upper left')
pl.xlabel('Number of features')
pl.ylabel('Seconds')
pl.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment