fabianp · December 17, 2015 02:08
diff --git a/bench.py b/bench.py
 from __future__ import print_function
 import numpy as np
 from sklearn import linear_model
 from datetime import datetime
 import pylab as pl
 import pylab


 def errorfill(x, y, yerr, color=None, alpha_fill=0.3, ax=None, label=None):
    # helper function, stolen from http://tonysyu.github.com/plotting-error-bars.html
    ax = ax if ax is not None else pl.gca()
    if color is None:
        color = ax._get_lines.color_cycle.next()
    if np.isscalar(yerr) or len(yerr) == len(y):
        ymin = y - yerr
        ymax = y + yerr
    elif len(yerr) == 2:
        ymin, ymax = yerr
    ax.plot(x, y, color=color, label=label)
    ax.fill_between(x, ymax, ymin, color=color, alpha=alpha_fill)


 def bench_features():
    n_samples = 1000
    timings = {}
    for solver in ('svd', 'dense_cholesky', 'lsqr', 'sparse_cg'):
        timings[solver] = []
        print('Solver: %s' % solver)
        features = np.linspace(100, 1000, 10).astype(np.int)
        for n_features in features:
            for _ in range(5):  # perform five runs
                w = np.random.randn(n_features)
                X = np.random.randn(n_samples, n_features)
                y = X.dot(w) + .1 * np.random.randn(n_samples)

                clf = linear_model.Ridge(solver=solver)
                start = datetime.now()
                clf.fit(X, y)
                elapsed = datetime.now() - start
                timings[solver].append(elapsed.total_seconds())
                print(elapsed)
    for s in timings.keys():
        tmp = np.array(timings[s]).reshape((-1, 5))
        errorfill(features, tmp.mean(1), tmp.std(1), label=s)

    pylab.ylim((0, tmp.mean(1).max()))
    pl.legend(loc='upper left')
    pl.xlabel('Number of features')
    pl.ylabel('Seconds')
    pl.show()



 def bench_samples():
    n_samples = 1000
    timings = {}
    for solver in ('svd', 'dense_cholesky', 'lsqr', 'sparse_cg'):
        timings[solver] = []
        print('Solver: %s' % solver)
        features = np.linspace(100, 1000, 10).astype(np.int)
        for n_features in features:
            for _ in range(5):  # perform five runs
                w = np.random.randn(n_features)
                X = np.random.randn(n_samples, n_features)
                y = X.dot(w) + .1 * np.random.randn(n_samples)

                clf = linear_model.Ridge(solver=solver)
                start = datetime.now()
                clf.fit(X, y)
                elapsed = datetime.now() - start
                timings[solver].append(elapsed.total_seconds())
                print(elapsed)
    for s in timings.keys():
        tmp = np.array(timings[s]).reshape((-1, 5))
        errorfill(features, tmp.mean(1), tmp.std(1), label=s)

    pylab.ylim((0, tmp.mean(1).max()))
    pl.legend(loc='upper left')
    pl.xlabel('Number of samples')
    pl.ylabel('Seconds')
    pl.show()

 bench_features()
 bench_samples()
diff --git a/bench_hilbert.py b/bench_hilbert.py
 # benchmark using the hilbert matrix

 from __future__ import print_function
 import numpy as np
 from sklearn import linear_model
 from datetime import datetime
 import pylab as pl
 from scipy import linalg


 def errorfill(x, y, yerr, color=None, alpha_fill=0.3, ax=None, label=None):
    # helper function, stolen from http://tonysyu.github.com/plotting-error-bars.html
    ax = ax if ax is not None else pl.gca()
    if color is None:
        color = ax._get_lines.color_cycle.next()
    if np.isscalar(yerr) or len(yerr) == len(y):
        ymin = y - yerr
        ymax = y + yerr
    elif len(yerr) == 2:
        ymin, ymax = yerr
    ax.plot(x, y, color=color, label=label)
    ax.fill_between(x, ymax, ymin, color=color, alpha=alpha_fill)


 def bench_features():
    n_samples = 1000
    timings = {}
    for solver in ('lsqr', 'sparse_cg'):
        timings[solver] = []
        print('Solver: %s' % solver)
        features = np.linspace(100, 1000, 10).astype(np.int)
        for n_features in features:
            for _ in range(20):  # perform five runs
                w = np.random.randn(n_features)
                X = linalg.hilbert(max(n_samples, n_features))[:n_samples, :n_features]
                y = X.dot(w) + .5 * np.random.randn(n_samples)

                clf = linear_model.Ridge(solver=solver, alpha=1e-6)
                start = datetime.now()
                clf.fit(X, y)
                elapsed = datetime.now() - start
                timings[solver].append(elapsed.total_seconds())
                print(elapsed)
    for s in timings.keys():
        tmp = np.array(timings[s]).reshape((-1, 20))
        errorfill(features, tmp.mean(1), tmp.std(1), label=s)

    pylab.ylim((0, tmp.mean(1).max()))
    pl.legend(loc='upper left')
    pl.xlabel('Number of features')
    pl.ylabel('Seconds')
    pl.show()
	from __future__ import print_function
	import numpy as np
	from sklearn import linear_model
	from datetime import datetime
	import pylab as pl
	import pylab


	def errorfill(x, y, yerr, color=None, alpha_fill=0.3, ax=None, label=None):
	# helper function, stolen from http://tonysyu.github.com/plotting-error-bars.html
	ax = ax if ax is not None else pl.gca()
	if color is None:
	color = ax._get_lines.color_cycle.next()
	if np.isscalar(yerr) or len(yerr) == len(y):
	ymin = y - yerr
	ymax = y + yerr
	elif len(yerr) == 2:
	ymin, ymax = yerr
	ax.plot(x, y, color=color, label=label)
	ax.fill_between(x, ymax, ymin, color=color, alpha=alpha_fill)


	def bench_features():
	n_samples = 1000
	timings = {}
	for solver in ('svd', 'dense_cholesky', 'lsqr', 'sparse_cg'):
	timings[solver] = []
	print('Solver: %s' % solver)
	features = np.linspace(100, 1000, 10).astype(np.int)
	for n_features in features:
	for _ in range(5): # perform five runs
	w = np.random.randn(n_features)
	X = np.random.randn(n_samples, n_features)
	y = X.dot(w) + .1 * np.random.randn(n_samples)

	clf = linear_model.Ridge(solver=solver)
	start = datetime.now()
	clf.fit(X, y)
	elapsed = datetime.now() - start
	timings[solver].append(elapsed.total_seconds())
	print(elapsed)
	for s in timings.keys():
	tmp = np.array(timings[s]).reshape((-1, 5))
	errorfill(features, tmp.mean(1), tmp.std(1), label=s)

	pylab.ylim((0, tmp.mean(1).max()))
	pl.legend(loc='upper left')
	pl.xlabel('Number of features')
	pl.ylabel('Seconds')
	pl.show()



	def bench_samples():
	n_samples = 1000
	timings = {}
	for solver in ('svd', 'dense_cholesky', 'lsqr', 'sparse_cg'):
	timings[solver] = []
	print('Solver: %s' % solver)
	features = np.linspace(100, 1000, 10).astype(np.int)
	for n_features in features:
	for _ in range(5): # perform five runs
	w = np.random.randn(n_features)
	X = np.random.randn(n_samples, n_features)
	y = X.dot(w) + .1 * np.random.randn(n_samples)

	clf = linear_model.Ridge(solver=solver)
	start = datetime.now()
	clf.fit(X, y)
	elapsed = datetime.now() - start
	timings[solver].append(elapsed.total_seconds())
	print(elapsed)
	for s in timings.keys():
	tmp = np.array(timings[s]).reshape((-1, 5))
	errorfill(features, tmp.mean(1), tmp.std(1), label=s)

	pylab.ylim((0, tmp.mean(1).max()))
	pl.legend(loc='upper left')
	pl.xlabel('Number of samples')
	pl.ylabel('Seconds')
	pl.show()

	bench_features()
	bench_samples()
	# benchmark using the hilbert matrix

	from __future__ import print_function
	import numpy as np
	from sklearn import linear_model
	from datetime import datetime
	import pylab as pl
	from scipy import linalg


	def errorfill(x, y, yerr, color=None, alpha_fill=0.3, ax=None, label=None):
	# helper function, stolen from http://tonysyu.github.com/plotting-error-bars.html
	ax = ax if ax is not None else pl.gca()
	if color is None:
	color = ax._get_lines.color_cycle.next()
	if np.isscalar(yerr) or len(yerr) == len(y):
	ymin = y - yerr
	ymax = y + yerr
	elif len(yerr) == 2:
	ymin, ymax = yerr
	ax.plot(x, y, color=color, label=label)
	ax.fill_between(x, ymax, ymin, color=color, alpha=alpha_fill)


	def bench_features():
	n_samples = 1000
	timings = {}
	for solver in ('lsqr', 'sparse_cg'):
	timings[solver] = []
	print('Solver: %s' % solver)
	features = np.linspace(100, 1000, 10).astype(np.int)
	for n_features in features:
	for _ in range(20): # perform five runs
	w = np.random.randn(n_features)
	X = linalg.hilbert(max(n_samples, n_features))[:n_samples, :n_features]
	y = X.dot(w) + .5 * np.random.randn(n_samples)

	clf = linear_model.Ridge(solver=solver, alpha=1e-6)
	start = datetime.now()
	clf.fit(X, y)
	elapsed = datetime.now() - start
	timings[solver].append(elapsed.total_seconds())
	print(elapsed)
	for s in timings.keys():
	tmp = np.array(timings[s]).reshape((-1, 20))
	errorfill(features, tmp.mean(1), tmp.std(1), label=s)

	pylab.ylim((0, tmp.mean(1).max()))
	pl.legend(loc='upper left')
	pl.xlabel('Number of features')
	pl.ylabel('Seconds')
	pl.show()