fabianp · September 1, 2012 09:51
diff --git a/gistfile1.py b/gistfile1.py
 import numpy as np

 # Author : Fabian Pedregosa <[email protected]>
 #
 # License : BSD


 def isotonic_regression_new(w, y, x_min=None, x_max=None):
    """
    Solve the isotonic regression with complete ordering model:

        min_x Sum_i{ w_i (y_i - x_i) ** 2 }

        subject to x_min = x_1 <= x_2 ... <= x_n = x_max

    where each w_i is strictly positive and each y_i is an arbitrary
    real number.

    Parameters
    ----------
    w : iterable of floating-point values
    y : iterable of floating-point values

    Returns
    -------
    x : list of floating-point values
    """

    if x_min is not None or x_max is not None:
        y = np.copy(y)
        w = np.copy(w)
        C = np.dot(w, y * y) * 10 # upper bound on the cost function
        if x_min is not None:
            y[0] = x_min
            w[0] = C
        if x_max is not None:
            y[-1] = x_max
            w[-1] = C

    J = [(w[i] * y[i], w[i], [i,]) for i in range(len(y))]
    cur = 0

    while cur < len(J) - 1:
        v0, v1, v2 = 0, 0, np.inf
        w0, w1, w2 = 1, 1, 1
        while v0 * w1 <= v1 * w0 and cur < len(J) - 1:
            v0, w0, idx0 = J[cur]
            v1, w1, idx1 = J[cur + 1]
            if v0 * w1 <= v1 * w0:
                cur +=1

        if cur == len(J) - 1:
            break

        # merge two groups
        v0, w0, idx0 = J.pop(cur)
        v1, w1, idx1 = J.pop(cur)
        J.insert(cur, (v0 + v1, w0 + w1, idx0 + idx1))
        while v2 * w0 > v0 * w2 and cur > 0:
            v0, w0, idx0 = J[cur]
            v2, w2, idx2 = J[cur - 1]
            if w0 * v2 >= w2 * v0:
                J.pop(cur)
                J[cur - 1] = (v0 + v2, w0 + w2, idx0 + idx2)
                cur -= 1

    sol = np.empty(len(y))
    for v, w, idx in J:
        sol[idx] = v / w
    return sol



 def isotonic_regression(y, weights=[]):
    """
    Solve the isotonic regression model:

        min Sum{ weights_i (x_i - y_i) ** 2 }

        subject to x_0 <= x_1 < ... < x_n

    Parameters
    ----------
    y : array-like, shape=(n_samples,)
        Input data.

    w : array-like, shape=(n_samples,), optional
        Weights in the cost function (must be strictly
        positive numbers),

    Returns
    -------
    x : array
    """
    y, weights = map(np.asarray, (y, weights))
    assert y.ndim == 1
    if weights.size:
        assert weights.ndim == 1
        assert weights.size == y.size
    n_samples = len(y)
    v = y.copy()
    lvls = np.arange(n_samples)
    lvlsets = np.c_[lvls, lvls]
    viol = np.where(np.diff(v) < 0)[0]
    while viol.size:
        start = lvlsets[viol[0], 0]
        last = lvlsets[viol[0] + 1, 1]

        n = last - start + 1
        idx = slice(start, last + 1)
        if weights.size:
            v[idx] = np.dot(weights[idx], y[idx]) / np.sum(weights[idx])
        else:
            v[idx]= np.sum(v[idx]) / n
        lvlsets[idx, 0] = start
        lvlsets[idx, 1] = last
        viol = np.where(np.diff(v) < 0)[0]
    return v


 def isotonic_regression_2(w, y, x_min=None, x_max=None):
    """
    Solve the isotonic regression model:

        min Sum w_i (y_i - x_i) ** 2

        subject to x_min = x_1 <= x_2 ... <= x_n = x_max

    where each w_i is strictly positive and each y_i is an arbitrary
    real number.

    Parameters
    ----------
    w : iterable of floating-point values
    y : iterable of floating-point values

    Returns
    -------
    x : list of floating-point values
    """

    if x_min is not None or x_max is not None:
        y = np.copy(y)
        w = np.copy(w)
        if x_min is not None:
            y[0] = x_min
            w[0] = 1e32
        if x_max is not None:
            y[-1] = x_max
            w[-1] = 1e32

    J = [[i,] for i in range(len(y))]
    cur = 0

    while cur < len(J) - 1:
        av0, av1, av2 = 0, 0, np.inf
        while av0 <= av1 and cur < len(J) - 1:
            idx0 = J[cur]
            idx1 = J[cur + 1]
            av0 = np.dot(w[idx0], y[idx0]) / np.sum(w[idx0])
            av1 = np.dot(w[idx1], y[idx1]) / np.sum(w[idx1])
            cur += 1 if av0 <= av1 else 0

        if cur == len(J) - 1:
            break

        a = J.pop(cur)
        b = J.pop(cur)
        J.insert(cur, a + b)
        while av2 > av0 and cur > 0:
            idx0 = J[cur]
            idx2 = J[cur - 1]
            av0 = np.dot(w[idx0], y[idx0]) / np.sum(w[idx0])
            av2 = np.dot(w[idx2], y[idx2]) / np.sum(w[idx2])
            if av2 >= av0:
                a = J.pop(cur - 1)
                b = J.pop(cur - 1)
                J.insert(cur - 1, a + b)
                cur -= 1

    sol = []
    for idx in J:
        sol += [np.dot(w[idx], y[idx]) / np.sum(w[idx])] * len(idx)
    return np.asarray(sol)


 if __name__ == '__main__':
    from time import time
    np.random.seed(0)
    t1, t2, t3 = [], [], []
    for i in range(1, 11):
        dat = np.arange(i * 1e4).astype(np.float)
        dat += 2 * np.random.randn(i * 1e4)  # add noise
        weights = .5 + np.abs(np.random.randn(i * 1e4))
        start = time()
        dat_hat = isotonic_regression(dat, weights)

        t1.append(time() - start)
        start = time()
        dat_hat2 = isotonic_regression_2(weights, dat)
        t2.append(time() - start)
        print 'Result matches: ', np.allclose(dat_hat, dat_hat2)


        start = time()
        dat_hat2 = isotonic_regression_new(weights, dat)
        t3.append(time() - start)
        print 'Result matches: ', np.allclose(dat_hat, dat_hat2)

    import pylab as pl
    n_samples = [i * 1e4 for i in range(10)]
    pl.plot(n_samples, t1, color='blue', label='simon collins')
    pl.plot(n_samples, t2, color='red', label='me')
    pl.plot(n_samples, t3, color='green', label='me new')
    pl.xlabel('Number of samples')
    pl.ylabel('Time')
    pl.legend()
    pl.savefig('isotonic_compare.png')
    pl.show()
	import numpy as np

	# Author : Fabian Pedregosa <[email protected]>
	#
	# License : BSD


	def isotonic_regression_new(w, y, x_min=None, x_max=None):
	"""
	Solve the isotonic regression with complete ordering model:

	min_x Sum_i{ w_i (y_i - x_i) ** 2 }

	subject to x_min = x_1 <= x_2 ... <= x_n = x_max

	where each w_i is strictly positive and each y_i is an arbitrary
	real number.

	Parameters
	----------
	w : iterable of floating-point values
	y : iterable of floating-point values

	Returns
	-------
	x : list of floating-point values
	"""

	if x_min is not None or x_max is not None:
	y = np.copy(y)
	w = np.copy(w)
	C = np.dot(w, y * y) * 10 # upper bound on the cost function
	if x_min is not None:
	y[0] = x_min
	w[0] = C
	if x_max is not None:
	y[-1] = x_max
	w[-1] = C

	J = [(w[i] * y[i], w[i], [i,]) for i in range(len(y))]
	cur = 0

	while cur < len(J) - 1:
	v0, v1, v2 = 0, 0, np.inf
	w0, w1, w2 = 1, 1, 1
	while v0 * w1 <= v1 * w0 and cur < len(J) - 1:
	v0, w0, idx0 = J[cur]
	v1, w1, idx1 = J[cur + 1]
	if v0 * w1 <= v1 * w0:
	cur +=1

	if cur == len(J) - 1:
	break

	# merge two groups
	v0, w0, idx0 = J.pop(cur)
	v1, w1, idx1 = J.pop(cur)
	J.insert(cur, (v0 + v1, w0 + w1, idx0 + idx1))
	while v2 * w0 > v0 * w2 and cur > 0:
	v0, w0, idx0 = J[cur]
	v2, w2, idx2 = J[cur - 1]
	if w0 * v2 >= w2 * v0:
	J.pop(cur)
	J[cur - 1] = (v0 + v2, w0 + w2, idx0 + idx2)
	cur -= 1

	sol = np.empty(len(y))
	for v, w, idx in J:
	sol[idx] = v / w
	return sol



	def isotonic_regression(y, weights=[]):
	"""
	Solve the isotonic regression model:

	min Sum{ weights_i (x_i - y_i) ** 2 }

	subject to x_0 <= x_1 < ... < x_n

	Parameters
	----------
	y : array-like, shape=(n_samples,)
	Input data.

	w : array-like, shape=(n_samples,), optional
	Weights in the cost function (must be strictly
	positive numbers),

	Returns
	-------
	x : array
	"""
	y, weights = map(np.asarray, (y, weights))
	assert y.ndim == 1
	if weights.size:
	assert weights.ndim == 1
	assert weights.size == y.size
	n_samples = len(y)
	v = y.copy()
	lvls = np.arange(n_samples)
	lvlsets = np.c_[lvls, lvls]
	viol = np.where(np.diff(v) < 0)[0]
	while viol.size:
	start = lvlsets[viol[0], 0]
	last = lvlsets[viol[0] + 1, 1]

	n = last - start + 1
	idx = slice(start, last + 1)
	if weights.size:
	v[idx] = np.dot(weights[idx], y[idx]) / np.sum(weights[idx])
	else:
	v[idx]= np.sum(v[idx]) / n
	lvlsets[idx, 0] = start
	lvlsets[idx, 1] = last
	viol = np.where(np.diff(v) < 0)[0]
	return v


	def isotonic_regression_2(w, y, x_min=None, x_max=None):
	"""
	Solve the isotonic regression model:

	min Sum w_i (y_i - x_i) ** 2

	subject to x_min = x_1 <= x_2 ... <= x_n = x_max

	where each w_i is strictly positive and each y_i is an arbitrary
	real number.

	Parameters
	----------
	w : iterable of floating-point values
	y : iterable of floating-point values

	Returns
	-------
	x : list of floating-point values
	"""

	if x_min is not None or x_max is not None:
	y = np.copy(y)
	w = np.copy(w)
	if x_min is not None:
	y[0] = x_min
	w[0] = 1e32
	if x_max is not None:
	y[-1] = x_max
	w[-1] = 1e32

	J = [[i,] for i in range(len(y))]
	cur = 0

	while cur < len(J) - 1:
	av0, av1, av2 = 0, 0, np.inf
	while av0 <= av1 and cur < len(J) - 1:
	idx0 = J[cur]
	idx1 = J[cur + 1]
	av0 = np.dot(w[idx0], y[idx0]) / np.sum(w[idx0])
	av1 = np.dot(w[idx1], y[idx1]) / np.sum(w[idx1])
	cur += 1 if av0 <= av1 else 0

	if cur == len(J) - 1:
	break

	a = J.pop(cur)
	b = J.pop(cur)
	J.insert(cur, a + b)
	while av2 > av0 and cur > 0:
	idx0 = J[cur]
	idx2 = J[cur - 1]
	av0 = np.dot(w[idx0], y[idx0]) / np.sum(w[idx0])
	av2 = np.dot(w[idx2], y[idx2]) / np.sum(w[idx2])
	if av2 >= av0:
	a = J.pop(cur - 1)
	b = J.pop(cur - 1)
	J.insert(cur - 1, a + b)
	cur -= 1

	sol = []
	for idx in J:
	sol += [np.dot(w[idx], y[idx]) / np.sum(w[idx])] * len(idx)
	return np.asarray(sol)


	if __name__ == '__main__':
	from time import time
	np.random.seed(0)
	t1, t2, t3 = [], [], []
	for i in range(1, 11):
	dat = np.arange(i * 1e4).astype(np.float)
	dat += 2 * np.random.randn(i * 1e4) # add noise
	weights = .5 + np.abs(np.random.randn(i * 1e4))
	start = time()
	dat_hat = isotonic_regression(dat, weights)

	t1.append(time() - start)
	start = time()
	dat_hat2 = isotonic_regression_2(weights, dat)
	t2.append(time() - start)
	print 'Result matches: ', np.allclose(dat_hat, dat_hat2)


	start = time()
	dat_hat2 = isotonic_regression_new(weights, dat)
	t3.append(time() - start)
	print 'Result matches: ', np.allclose(dat_hat, dat_hat2)

	import pylab as pl
	n_samples = [i * 1e4 for i in range(10)]
	pl.plot(n_samples, t1, color='blue', label='simon collins')
	pl.plot(n_samples, t2, color='red', label='me')
	pl.plot(n_samples, t3, color='green', label='me new')
	pl.xlabel('Number of samples')
	pl.ylabel('Time')
	pl.legend()
	pl.savefig('isotonic_compare.png')
	pl.show()