Created
September 1, 2012 09:51
-
-
Save fabianp/3568540 to your computer and use it in GitHub Desktop.
isotonic regression benchmarks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
# Author : Fabian Pedregosa <[email protected]> | |
# | |
# License : BSD | |
def isotonic_regression_new(w, y, x_min=None, x_max=None): | |
""" | |
Solve the isotonic regression with complete ordering model: | |
min_x Sum_i{ w_i (y_i - x_i) ** 2 } | |
subject to x_min = x_1 <= x_2 ... <= x_n = x_max | |
where each w_i is strictly positive and each y_i is an arbitrary | |
real number. | |
Parameters | |
---------- | |
w : iterable of floating-point values | |
y : iterable of floating-point values | |
Returns | |
------- | |
x : list of floating-point values | |
""" | |
if x_min is not None or x_max is not None: | |
y = np.copy(y) | |
w = np.copy(w) | |
C = np.dot(w, y * y) * 10 # upper bound on the cost function | |
if x_min is not None: | |
y[0] = x_min | |
w[0] = C | |
if x_max is not None: | |
y[-1] = x_max | |
w[-1] = C | |
J = [(w[i] * y[i], w[i], [i,]) for i in range(len(y))] | |
cur = 0 | |
while cur < len(J) - 1: | |
v0, v1, v2 = 0, 0, np.inf | |
w0, w1, w2 = 1, 1, 1 | |
while v0 * w1 <= v1 * w0 and cur < len(J) - 1: | |
v0, w0, idx0 = J[cur] | |
v1, w1, idx1 = J[cur + 1] | |
if v0 * w1 <= v1 * w0: | |
cur +=1 | |
if cur == len(J) - 1: | |
break | |
# merge two groups | |
v0, w0, idx0 = J.pop(cur) | |
v1, w1, idx1 = J.pop(cur) | |
J.insert(cur, (v0 + v1, w0 + w1, idx0 + idx1)) | |
while v2 * w0 > v0 * w2 and cur > 0: | |
v0, w0, idx0 = J[cur] | |
v2, w2, idx2 = J[cur - 1] | |
if w0 * v2 >= w2 * v0: | |
J.pop(cur) | |
J[cur - 1] = (v0 + v2, w0 + w2, idx0 + idx2) | |
cur -= 1 | |
sol = np.empty(len(y)) | |
for v, w, idx in J: | |
sol[idx] = v / w | |
return sol | |
def isotonic_regression(y, weights=[]): | |
""" | |
Solve the isotonic regression model: | |
min Sum{ weights_i (x_i - y_i) ** 2 } | |
subject to x_0 <= x_1 < ... < x_n | |
Parameters | |
---------- | |
y : array-like, shape=(n_samples,) | |
Input data. | |
w : array-like, shape=(n_samples,), optional | |
Weights in the cost function (must be strictly | |
positive numbers), | |
Returns | |
------- | |
x : array | |
""" | |
y, weights = map(np.asarray, (y, weights)) | |
assert y.ndim == 1 | |
if weights.size: | |
assert weights.ndim == 1 | |
assert weights.size == y.size | |
n_samples = len(y) | |
v = y.copy() | |
lvls = np.arange(n_samples) | |
lvlsets = np.c_[lvls, lvls] | |
viol = np.where(np.diff(v) < 0)[0] | |
while viol.size: | |
start = lvlsets[viol[0], 0] | |
last = lvlsets[viol[0] + 1, 1] | |
n = last - start + 1 | |
idx = slice(start, last + 1) | |
if weights.size: | |
v[idx] = np.dot(weights[idx], y[idx]) / np.sum(weights[idx]) | |
else: | |
v[idx]= np.sum(v[idx]) / n | |
lvlsets[idx, 0] = start | |
lvlsets[idx, 1] = last | |
viol = np.where(np.diff(v) < 0)[0] | |
return v | |
def isotonic_regression_2(w, y, x_min=None, x_max=None): | |
""" | |
Solve the isotonic regression model: | |
min Sum w_i (y_i - x_i) ** 2 | |
subject to x_min = x_1 <= x_2 ... <= x_n = x_max | |
where each w_i is strictly positive and each y_i is an arbitrary | |
real number. | |
Parameters | |
---------- | |
w : iterable of floating-point values | |
y : iterable of floating-point values | |
Returns | |
------- | |
x : list of floating-point values | |
""" | |
if x_min is not None or x_max is not None: | |
y = np.copy(y) | |
w = np.copy(w) | |
if x_min is not None: | |
y[0] = x_min | |
w[0] = 1e32 | |
if x_max is not None: | |
y[-1] = x_max | |
w[-1] = 1e32 | |
J = [[i,] for i in range(len(y))] | |
cur = 0 | |
while cur < len(J) - 1: | |
av0, av1, av2 = 0, 0, np.inf | |
while av0 <= av1 and cur < len(J) - 1: | |
idx0 = J[cur] | |
idx1 = J[cur + 1] | |
av0 = np.dot(w[idx0], y[idx0]) / np.sum(w[idx0]) | |
av1 = np.dot(w[idx1], y[idx1]) / np.sum(w[idx1]) | |
cur += 1 if av0 <= av1 else 0 | |
if cur == len(J) - 1: | |
break | |
a = J.pop(cur) | |
b = J.pop(cur) | |
J.insert(cur, a + b) | |
while av2 > av0 and cur > 0: | |
idx0 = J[cur] | |
idx2 = J[cur - 1] | |
av0 = np.dot(w[idx0], y[idx0]) / np.sum(w[idx0]) | |
av2 = np.dot(w[idx2], y[idx2]) / np.sum(w[idx2]) | |
if av2 >= av0: | |
a = J.pop(cur - 1) | |
b = J.pop(cur - 1) | |
J.insert(cur - 1, a + b) | |
cur -= 1 | |
sol = [] | |
for idx in J: | |
sol += [np.dot(w[idx], y[idx]) / np.sum(w[idx])] * len(idx) | |
return np.asarray(sol) | |
if __name__ == '__main__': | |
from time import time | |
np.random.seed(0) | |
t1, t2, t3 = [], [], [] | |
for i in range(1, 11): | |
dat = np.arange(i * 1e4).astype(np.float) | |
dat += 2 * np.random.randn(i * 1e4) # add noise | |
weights = .5 + np.abs(np.random.randn(i * 1e4)) | |
start = time() | |
dat_hat = isotonic_regression(dat, weights) | |
t1.append(time() - start) | |
start = time() | |
dat_hat2 = isotonic_regression_2(weights, dat) | |
t2.append(time() - start) | |
print 'Result matches: ', np.allclose(dat_hat, dat_hat2) | |
start = time() | |
dat_hat2 = isotonic_regression_new(weights, dat) | |
t3.append(time() - start) | |
print 'Result matches: ', np.allclose(dat_hat, dat_hat2) | |
import pylab as pl | |
n_samples = [i * 1e4 for i in range(10)] | |
pl.plot(n_samples, t1, color='blue', label='simon collins') | |
pl.plot(n_samples, t2, color='red', label='me') | |
pl.plot(n_samples, t3, color='green', label='me new') | |
pl.xlabel('Number of samples') | |
pl.ylabel('Time') | |
pl.legend() | |
pl.savefig('isotonic_compare.png') | |
pl.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment