Skip to content

Instantly share code, notes, and snippets.

@fabianp
Created September 1, 2012 09:51
Show Gist options
  • Save fabianp/3568540 to your computer and use it in GitHub Desktop.
Save fabianp/3568540 to your computer and use it in GitHub Desktop.
isotonic regression benchmarks
import numpy as np
# Author : Fabian Pedregosa <[email protected]>
#
# License : BSD
def isotonic_regression_new(w, y, x_min=None, x_max=None):
"""
Solve the isotonic regression with complete ordering model:
min_x Sum_i{ w_i (y_i - x_i) ** 2 }
subject to x_min = x_1 <= x_2 ... <= x_n = x_max
where each w_i is strictly positive and each y_i is an arbitrary
real number.
Parameters
----------
w : iterable of floating-point values
y : iterable of floating-point values
Returns
-------
x : list of floating-point values
"""
if x_min is not None or x_max is not None:
y = np.copy(y)
w = np.copy(w)
C = np.dot(w, y * y) * 10 # upper bound on the cost function
if x_min is not None:
y[0] = x_min
w[0] = C
if x_max is not None:
y[-1] = x_max
w[-1] = C
J = [(w[i] * y[i], w[i], [i,]) for i in range(len(y))]
cur = 0
while cur < len(J) - 1:
v0, v1, v2 = 0, 0, np.inf
w0, w1, w2 = 1, 1, 1
while v0 * w1 <= v1 * w0 and cur < len(J) - 1:
v0, w0, idx0 = J[cur]
v1, w1, idx1 = J[cur + 1]
if v0 * w1 <= v1 * w0:
cur +=1
if cur == len(J) - 1:
break
# merge two groups
v0, w0, idx0 = J.pop(cur)
v1, w1, idx1 = J.pop(cur)
J.insert(cur, (v0 + v1, w0 + w1, idx0 + idx1))
while v2 * w0 > v0 * w2 and cur > 0:
v0, w0, idx0 = J[cur]
v2, w2, idx2 = J[cur - 1]
if w0 * v2 >= w2 * v0:
J.pop(cur)
J[cur - 1] = (v0 + v2, w0 + w2, idx0 + idx2)
cur -= 1
sol = np.empty(len(y))
for v, w, idx in J:
sol[idx] = v / w
return sol
def isotonic_regression(y, weights=[]):
"""
Solve the isotonic regression model:
min Sum{ weights_i (x_i - y_i) ** 2 }
subject to x_0 <= x_1 < ... < x_n
Parameters
----------
y : array-like, shape=(n_samples,)
Input data.
w : array-like, shape=(n_samples,), optional
Weights in the cost function (must be strictly
positive numbers),
Returns
-------
x : array
"""
y, weights = map(np.asarray, (y, weights))
assert y.ndim == 1
if weights.size:
assert weights.ndim == 1
assert weights.size == y.size
n_samples = len(y)
v = y.copy()
lvls = np.arange(n_samples)
lvlsets = np.c_[lvls, lvls]
viol = np.where(np.diff(v) < 0)[0]
while viol.size:
start = lvlsets[viol[0], 0]
last = lvlsets[viol[0] + 1, 1]
n = last - start + 1
idx = slice(start, last + 1)
if weights.size:
v[idx] = np.dot(weights[idx], y[idx]) / np.sum(weights[idx])
else:
v[idx]= np.sum(v[idx]) / n
lvlsets[idx, 0] = start
lvlsets[idx, 1] = last
viol = np.where(np.diff(v) < 0)[0]
return v
def isotonic_regression_2(w, y, x_min=None, x_max=None):
"""
Solve the isotonic regression model:
min Sum w_i (y_i - x_i) ** 2
subject to x_min = x_1 <= x_2 ... <= x_n = x_max
where each w_i is strictly positive and each y_i is an arbitrary
real number.
Parameters
----------
w : iterable of floating-point values
y : iterable of floating-point values
Returns
-------
x : list of floating-point values
"""
if x_min is not None or x_max is not None:
y = np.copy(y)
w = np.copy(w)
if x_min is not None:
y[0] = x_min
w[0] = 1e32
if x_max is not None:
y[-1] = x_max
w[-1] = 1e32
J = [[i,] for i in range(len(y))]
cur = 0
while cur < len(J) - 1:
av0, av1, av2 = 0, 0, np.inf
while av0 <= av1 and cur < len(J) - 1:
idx0 = J[cur]
idx1 = J[cur + 1]
av0 = np.dot(w[idx0], y[idx0]) / np.sum(w[idx0])
av1 = np.dot(w[idx1], y[idx1]) / np.sum(w[idx1])
cur += 1 if av0 <= av1 else 0
if cur == len(J) - 1:
break
a = J.pop(cur)
b = J.pop(cur)
J.insert(cur, a + b)
while av2 > av0 and cur > 0:
idx0 = J[cur]
idx2 = J[cur - 1]
av0 = np.dot(w[idx0], y[idx0]) / np.sum(w[idx0])
av2 = np.dot(w[idx2], y[idx2]) / np.sum(w[idx2])
if av2 >= av0:
a = J.pop(cur - 1)
b = J.pop(cur - 1)
J.insert(cur - 1, a + b)
cur -= 1
sol = []
for idx in J:
sol += [np.dot(w[idx], y[idx]) / np.sum(w[idx])] * len(idx)
return np.asarray(sol)
if __name__ == '__main__':
from time import time
np.random.seed(0)
t1, t2, t3 = [], [], []
for i in range(1, 11):
dat = np.arange(i * 1e4).astype(np.float)
dat += 2 * np.random.randn(i * 1e4) # add noise
weights = .5 + np.abs(np.random.randn(i * 1e4))
start = time()
dat_hat = isotonic_regression(dat, weights)
t1.append(time() - start)
start = time()
dat_hat2 = isotonic_regression_2(weights, dat)
t2.append(time() - start)
print 'Result matches: ', np.allclose(dat_hat, dat_hat2)
start = time()
dat_hat2 = isotonic_regression_new(weights, dat)
t3.append(time() - start)
print 'Result matches: ', np.allclose(dat_hat, dat_hat2)
import pylab as pl
n_samples = [i * 1e4 for i in range(10)]
pl.plot(n_samples, t1, color='blue', label='simon collins')
pl.plot(n_samples, t2, color='red', label='me')
pl.plot(n_samples, t3, color='green', label='me new')
pl.xlabel('Number of samples')
pl.ylabel('Time')
pl.legend()
pl.savefig('isotonic_compare.png')
pl.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment