Skip to content

Instantly share code, notes, and snippets.

@fabianp
Last active February 1, 2024 10:02

Revisions

  1. fabianp revised this gist Mar 19, 2015. 1 changed file with 2 additions and 1 deletion.
    3 changes: 2 additions & 1 deletion ranking.py
    Original file line number Diff line number Diff line change
    @@ -104,7 +104,8 @@ def predict(self, X):
    Predict an ordering on X. For a list of n samples, this method
    returns a list from 0 to n-1 with the relative order of the rows of X.
    The item is given such that items ranked on top have are
    predicted a higher ordering.
    predicted a higher ordering (i.e. 0 means is the last item
    and n_samples would be the item ranked on top).
    Parameters
    ----------
  2. fabianp revised this gist Mar 19, 2015. 1 changed file with 2 additions and 0 deletions.
    2 changes: 2 additions & 0 deletions ranking.py
    Original file line number Diff line number Diff line change
    @@ -103,6 +103,8 @@ def predict(self, X):
    """
    Predict an ordering on X. For a list of n samples, this method
    returns a list from 0 to n-1 with the relative order of the rows of X.
    The item is given such that items ranked on top have are
    predicted a higher ordering.
    Parameters
    ----------
  3. fabianp revised this gist Mar 11, 2015. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions ranking.py
    Original file line number Diff line number Diff line change
    @@ -96,6 +96,9 @@ def fit(self, X, y):
    super(RankSVM, self).fit(X_trans, y_trans)
    return self

    def decision_function(self, X):
    return np.dot(X, self.coef_.ravel())

    def predict(self, X):
    """
    Predict an ordering on X. For a list of n samples, this method
  4. fabianp revised this gist Mar 9, 2015. 1 changed file with 8 additions and 2 deletions.
    10 changes: 8 additions & 2 deletions ranking.py
    Original file line number Diff line number Diff line change
    @@ -1,8 +1,14 @@
    """
    Implementation of pairwise ranking using scikit-learn LinearSVC
    Reference: "Large Margin Rank Boundaries for Ordinal Regression", R. Herbrich,
    T. Graepel, K. Obermayer.
    Reference:
    "Large Margin Rank Boundaries for Ordinal Regression", R. Herbrich,
    T. Graepel, K. Obermayer 1999
    "Learning to rank from medical imaging data." Pedregosa, Fabian, et al.,
    Machine Learning in Medical Imaging 2012.
    Authors: Fabian Pedregosa <fabian@fseoane.net>
    Alexandre Gramfort <alexandre.gramfort@inria.fr>
  5. fabianp revised this gist Mar 8, 2015. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion ranking.py
    Original file line number Diff line number Diff line change
    @@ -106,7 +106,7 @@ def predict(self, X):
    the rows in X.
    """
    if hasattr(self, 'coef_'):
    return np.argsort(np.dot(X, self.coef_.T))
    return np.argsort(np.dot(X, self.coef_.ravel()))
    else:
    raise ValueError("Must call fit() prior to predict()")

  6. fabianp revised this gist Mar 8, 2015. 1 changed file with 40 additions and 163 deletions.
    203 changes: 40 additions & 163 deletions ranking.py
    Original file line number Diff line number Diff line change
    @@ -13,7 +13,6 @@

    import itertools
    import numpy as np
    from scipy import linalg

    from sklearn import svm, linear_model, cross_validation

    @@ -44,51 +43,35 @@ def transform_pairwise(X, y):
    Data as pairs
    y_trans : array, shape (k,)
    Output class labels, where classes have values {-1, +1}
    diff: array
    target difference for each considered samples
    """
    X_new = []
    y_new = []
    diff = []
    y = np.asarray(y)
    if y.ndim == 1:
    y = np.c_[y, np.ones(y.shape[0])]
    comb = itertools.combinations(range(X.shape[0]), 2)
    k = 0
    for (i, j) in comb:
    #if np.abs(y[i, 0] - y[j, 0]) <= 1. or y[i, 1] != y[j, 1]:
    for k, (i, j) in enumerate(comb):
    if y[i, 0] == y[j, 0] or y[i, 1] != y[j, 1]:
    # skip if same target or different group
    continue
    X_new.append(X[i] - X[j])
    diff.append(y[i, 0] - y[j, 0])
    y_new.append(np.sign(diff[-1]))
    y_new.append(np.sign(y[i, 0] - y[j, 0]))
    # output balanced classes
    if y_new[-1] != (-1) ** k:
    y_new[-1] = - y_new[-1]
    X_new[-1] = - X_new[-1]
    diff[-1] = - diff[-1]
    k += 1
    return np.asarray(X_new), np.asarray(y_new).ravel(), np.array(diff).ravel()
    return np.asarray(X_new), np.asarray(y_new).ravel()


    class RankSVM(svm.SVC):
    class RankSVM(svm.LinearSVC):
    """Performs pairwise ranking with an underlying LinearSVC model
    Input should be a n-class ranking problem, this object will convert it
    into a two-class classification problem, a setting known as
    `pairwise ranking`.
    See object :ref:`svm.SVC` for a full description of parameters.
    See object :ref:`svm.LinearSVC` for a full description of parameters.
    """
    def __init__(self, weight_cb=None, C=1.0, degree=3,
    shrinking=True, probability=False,
    tol=1e-3, cache_size=200):

    self.weight_cb = weight_cb
    super(RankSVM, self).__init__(kernel='linear', degree=degree,
    tol=tol, C=C, shrinking=shrinking, probability=probability,
    cache_size=cache_size)

    def fit(self, X, y):
    """
    @@ -98,21 +81,13 @@ def fit(self, X, y):
    ----------
    X : array, shape (n_samples, n_features)
    y : array, shape (n_samples,) or (n_samples, 2)
    sample_weights : boolean
    wether to consider sample weights in the ranking problem
    (= weighted loss function)
    Returns
    -------
    self
    """
    X_trans, y_trans, diff = transform_pairwise(X, y)
    if self.weight_cb is not None:
    assert hasattr(self.weight_cb, '__call__')
    diff = self.weight_cb(diff)
    super(RankSVM, self).fit(X_trans, y_trans, sample_weight=diff)
    else:
    super(RankSVM, self).fit(X_trans, y_trans)
    X_trans, y_trans = transform_pairwise(X, y)
    super(RankSVM, self).fit(X_trans, y_trans)
    return self

    def predict(self, X):
    @@ -131,147 +106,49 @@ def predict(self, X):
    the rows in X.
    """
    if hasattr(self, 'coef_'):
    np.argsort(np.dot(X, self.coef_.T))
    return np.argsort(np.dot(X, self.coef_.T))
    else:
    raise ValueError("Must call fit() prior to predict()")

    def score(self, X, y):
    """
    Because we transformed into a balanced pairwise problem, chance level is at 0.5
    Because we transformed into a pairwise problem, chance level is at 0.5
    """
    X_trans, y_trans, diff = transform_pairwise(X, y)
    X_trans, y_trans = transform_pairwise(X, y)
    return np.mean(super(RankSVM, self).predict(X_trans) == y_trans)

    class RankLogistic(linear_model.LogisticRegression):

    def fit(self, X, y, sample_weight=False):
    """
    Fit a pairwise ranking model.
    Parameters
    ----------
    X : array, shape (n_samples, n_features)
    y : array, shape (n_samples,) or (n_samples, 2)
    sample_weights : boolean
    wether to consider sample weights in the ranking problem
    (= weighted loss function)
    Returns
    -------
    self
    """
    X_trans, y_trans, diff = transform_pairwise(X, y)
    if sample_weight:
    super(RankLogistic, self).fit(X_trans, y_trans, sample_weight=diff)
    else:
    super(RankLogistic, self).fit(X_trans, y_trans)
    return self

    def predict(self, X):
    """
    Predict an ordering on X. For a list of n samples, this method
    returns a list from 0 to n-1 with the relative order of the rows of X.
    Parameters
    ----------
    X : array, shape (n_samples, n_features)
    Returns
    -------
    ord : array, shape (n_samples,)
    Returns a list of integers representing the relative order of
    the rows in X.
    """
    if hasattr(self, 'coef_'):
    np.argsort(np.dot(X, self.coef_.T))
    else:
    raise ValueError("Must call fit() prior to predict()")

    def score(self, X, y):
    """
    Because we transformed into a balanced pairwise problem, chance level is at 0.5
    """
    X_trans, y_trans, diff = transform_pairwise(X, y)
    return np.mean(super(RankLogistic, self).predict(X_trans) == y_trans)


    def linear_rank(X, y, alphas):
    """
    Fit athe "value-regularized" linear method described in the paper
    "On the consistency of Ranking Algorithms", Duchi et al.
    Parameters
    ----------
    alpha: l2-term regularization
    """

    alphas = np.array(alphas)
    U, s, Vt = linalg.svd(X, full_matrices=False)
    Xp, yp, d = transform_pairwise(X, y)
    #d = np.sign(d) * ((100 * d) ** 2)
    #omega = 1. / ( 1e3 * X.shape[0] ** 2)
    #omega = 1e-4
    v = np.dot(Vt, np.dot(d, Xp))
    return np.dot(v / ((s ** 2) + alphas[:, np.newaxis]), Vt)


    def test_linear_rank():
    X = np.random.randn(5, 5)
    y = np.random.randn(5)
    alphas = [.1, 1., 2.]
    W = linear_rank(X, y, alphas)
    Xp, yp, d = transform_pairwise(X, y)

    # check KKT conditions
    for i in range(3):
    right = np.dot(np.dot(X.T, X) + alphas[i] * np.eye(5), W[i])
    left = np.dot(Xp.T, d)
    assert np.allclose(right, left)
    print 'TESTS OK'



    class LinearRankCV(linear_model.base.LinearModel):
    def __init__(self, alphas):
    self.alphas = alphas
    self.fit_intercept = True

    def fit(self, X, y):
    X, y, X_mean, y_mean, X_std =\
    self._center_data(X, y, self.fit_intercept,
    False, True)
    cv = cross_validation.KFold(X.shape[0], 5)
    scores = np.zeros(len(self.alphas))
    for train, test in cv:
    Xp, yp, dp = transform_pairwise(X[train], y[train])
    W = linear_rank(X[train], y[train], self.alphas)
    Xt, yt, dt = transform_pairwise(X[test], y[test])
    if not yt.size:
    continue

    train_score = dp * np.dot(Xp, W.T).T
    assert train_score.shape == (len(self.alphas), Xp.shape[0])
    #print(np.mean(dp * np.dot(Xp, W.T).T > 0, 1))

    scores += np.mean(dt * np.dot(Xt, W.T).T > 0, 1)

    self.best_alpha = self.alphas[np.argmax(scores)]
    self.coef_ = linear_rank(X, y, [self.best_alpha])[0] # learn on whole dataset
    self._set_intercept(X_mean, y_mean, X_std)
    return self

    def score(self, X, y):
    Xt, yt, dt = transform_pairwise(X, y)
    return np.mean(np.sign(np.dot(Xt, self.coef_.T)) == yt)

    def transform(self, X):
    return np.dot(X, self.coef_)[:, np.newaxis]



    if __name__ == '__main__':
    # as showcase, we will create some non-linear data
    # and print the performance of ranking vs linear regression

    pass
    np.random.seed(1)
    n_samples, n_features = 300, 5
    true_coef = np.random.randn(n_features)
    X = np.random.randn(n_samples, n_features)
    noise = np.random.randn(n_samples) / np.linalg.norm(true_coef)
    y = np.dot(X, true_coef)
    y = np.arctan(y) # add non-linearities
    y += .1 * noise # add noise
    Y = np.c_[y, np.mod(np.arange(n_samples), 5)] # add query fake id
    cv = cross_validation.KFold(n_samples, 5)
    train, test = iter(cv).next()

    # make a simple plot out of it
    import pylab as pl
    pl.scatter(np.dot(X, true_coef), y)
    pl.title('Data to be learned')
    pl.xlabel('<X, coef>')
    pl.ylabel('y')
    pl.show()

    # print the performance of ranking
    rank_svm = RankSVM().fit(X[train], Y[train])
    print 'Performance of ranking ', rank_svm.score(X[test], Y[test])

    # and that of linear regression
    ridge = linear_model.RidgeCV(fit_intercept=True)
    ridge.fit(X[train], y[train])
    X_test_trans, y_test_trans = transform_pairwise(X[test], y[test])
    score = np.mean(np.sign(np.dot(X_test_trans, ridge.coef_)) == y_test_trans)
    print 'Performance of linear regression ', score
  7. fabianp revised this gist Mar 7, 2015. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions ranking.py
    Original file line number Diff line number Diff line change
    @@ -6,6 +6,9 @@
    Authors: Fabian Pedregosa <fabian@fseoane.net>
    Alexandre Gramfort <alexandre.gramfort@inria.fr>
    See also https://github.com/fabianp/pysofia for a more efficient implementation
    of RankSVM using stochastic gradient descent methdos.
    """

    import itertools
  8. Fabian Pedregosa revised this gist May 24, 2012. 1 changed file with 11 additions and 40 deletions.
    51 changes: 11 additions & 40 deletions ranking.py
    Original file line number Diff line number Diff line change
    @@ -78,15 +78,16 @@ class RankSVM(svm.SVC):
    See object :ref:`svm.SVC` for a full description of parameters.
    """
    def __init__(self, C=1.0, degree=3,
    def __init__(self, weight_cb=None, C=1.0, degree=3,
    shrinking=True, probability=False,
    tol=1e-3, cache_size=200, scale_C=True):
    tol=1e-3, cache_size=200):

    self.weight_cb = weight_cb
    super(RankSVM, self).__init__(kernel='linear', degree=degree,
    tol=tol, C=C, shrinking=shrinking, probability=probability,
    cache_size=cache_size, scale_C=scale_C)
    cache_size=cache_size)

    def fit(self, X, y, sample_weight=True):
    def fit(self, X, y):
    """
    Fit a pairwise ranking model.
    @@ -103,8 +104,10 @@ def fit(self, X, y, sample_weight=True):
    self
    """
    X_trans, y_trans, diff = transform_pairwise(X, y)
    if sample_weight:
    super(RankSVM, self).fit(X_trans, y_trans, sample_weight=20 * np.abs(diff))
    if self.weight_cb is not None:
    assert hasattr(self.weight_cb, '__call__')
    diff = self.weight_cb(diff)
    super(RankSVM, self).fit(X_trans, y_trans, sample_weight=diff)
    else:
    super(RankSVM, self).fit(X_trans, y_trans)
    return self
    @@ -191,6 +194,7 @@ def score(self, X, y):

    def linear_rank(X, y, alphas):
    """
    Fit athe "value-regularized" linear method described in the paper
    "On the consistency of Ranking Algorithms", Duchi et al.
    @@ -226,10 +230,6 @@ def test_linear_rank():


    class LinearRankCV(linear_model.base.LinearModel):
    """
    value-regularized linear method described in the paper
    "On the consistency of Ranking Algorithms", Duchi et al.
    """
    def __init__(self, alphas):
    self.alphas = alphas
    self.fit_intercept = True
    @@ -271,33 +271,4 @@ def transform(self, X):
    # as showcase, we will create some non-linear data
    # and print the performance of ranking vs linear regression

    np.random.seed(0)
    n_samples, n_features = 300, 5
    true_coef = np.random.randn(n_features)
    X = np.random.randn(n_samples, n_features)
    noise = np.random.randn(n_samples) / np.linalg.norm(true_coef)
    y = np.dot(X, true_coef)
    y = np.sqrt(y - np.min(y)) # add non-linearities
    y += .1 * noise # add noise
    Y = np.c_[y, np.mod(np.arange(n_samples), 5)] # add query fake id
    cv = cross_validation.KFold(n_samples, 5)
    train, test = iter(cv).next()

    # make a simple plot out of it
    import pylab as pl
    pl.scatter(np.dot(X, true_coef), y)
    pl.title('Data to be learned')
    pl.xlabel('<X, coef>')
    pl.ylabel('y')
    pl.show()

    # print the performance of ranking
    rank_svm = RankSVM().fit(X[train], Y[train])
    print 'Performance of ranking ', rank_svm.score(X[test], Y[test])

    # and that of linear regression
    ridge = linear_model.RidgeCV(fit_intercept=False)
    ridge.fit(X[train], y[train])
    X_test_trans, y_test_trans, diff = transform_pairwise(X[test], y[test])
    score = np.mean(np.sign(ridge.predict(X_test_trans)) == y_test_trans)
    print 'Performance of linear regression ', score
    pass
  9. Fabian Pedregosa revised this gist Apr 18, 2012. 1 changed file with 0 additions and 1 deletion.
    1 change: 0 additions & 1 deletion ranking.py
    Original file line number Diff line number Diff line change
    @@ -191,7 +191,6 @@ def score(self, X, y):

    def linear_rank(X, y, alphas):
    """
    Fit athe "value-regularized" linear method described in the paper
    "On the consistency of Ranking Algorithms", Duchi et al.
  10. Fabian Pedregosa revised this gist Apr 18, 2012. 1 changed file with 4 additions and 0 deletions.
    4 changes: 4 additions & 0 deletions ranking.py
    Original file line number Diff line number Diff line change
    @@ -227,6 +227,10 @@ def test_linear_rank():


    class LinearRankCV(linear_model.base.LinearModel):
    """
    value-regularized linear method described in the paper
    "On the consistency of Ranking Algorithms", Duchi et al.
    """
    def __init__(self, alphas):
    self.alphas = alphas
    self.fit_intercept = True
  11. Fabian Pedregosa revised this gist Apr 18, 2012. 1 changed file with 30 additions and 30 deletions.
    60 changes: 30 additions & 30 deletions ranking.py
    Original file line number Diff line number Diff line change
    @@ -268,33 +268,33 @@ def transform(self, X):
    # as showcase, we will create some non-linear data
    # and print the performance of ranking vs linear regression

    # np.random.seed(0)
    # n_samples, n_features = 300, 5
    # true_coef = np.random.randn(n_features)
    # X = np.random.randn(n_samples, n_features)
    # noise = np.random.randn(n_samples) / np.linalg.norm(true_coef)
    # y = np.dot(X, true_coef)
    # y = np.sqrt(y - np.min(y)) # add non-linearities
    # y += .1 * noise # add noise
    # Y = np.c_[y, np.mod(np.arange(n_samples), 5)] # add query fake id
    # cv = cross_validation.KFold(n_samples, 5)
    # train, test = iter(cv).next()
    #
    # # make a simple plot out of it
    # import pylab as pl
    # pl.scatter(np.dot(X, true_coef), y)
    # pl.title('Data to be learned')
    # pl.xlabel('<X, coef>')
    # pl.ylabel('y')
    # pl.show()
    #
    # # print the performance of ranking
    # rank_svm = RankSVM().fit(X[train], Y[train])
    # print 'Performance of ranking ', rank_svm.score(X[test], Y[test])
    #
    # # and that of linear regression
    # ridge = linear_model.RidgeCV(fit_intercept=False)
    # ridge.fit(X[train], y[train])
    # X_test_trans, y_test_trans = transform_pairwise(X[test], y[test])
    # score = np.mean(np.sign(ridge.predict(X_test_trans)) == y_test_trans)
    # print 'Performance of linear regression ', score
    np.random.seed(0)
    n_samples, n_features = 300, 5
    true_coef = np.random.randn(n_features)
    X = np.random.randn(n_samples, n_features)
    noise = np.random.randn(n_samples) / np.linalg.norm(true_coef)
    y = np.dot(X, true_coef)
    y = np.sqrt(y - np.min(y)) # add non-linearities
    y += .1 * noise # add noise
    Y = np.c_[y, np.mod(np.arange(n_samples), 5)] # add query fake id
    cv = cross_validation.KFold(n_samples, 5)
    train, test = iter(cv).next()

    # make a simple plot out of it
    import pylab as pl
    pl.scatter(np.dot(X, true_coef), y)
    pl.title('Data to be learned')
    pl.xlabel('<X, coef>')
    pl.ylabel('y')
    pl.show()

    # print the performance of ranking
    rank_svm = RankSVM().fit(X[train], Y[train])
    print 'Performance of ranking ', rank_svm.score(X[test], Y[test])

    # and that of linear regression
    ridge = linear_model.RidgeCV(fit_intercept=False)
    ridge.fit(X[train], y[train])
    X_test_trans, y_test_trans, diff = transform_pairwise(X[test], y[test])
    score = np.mean(np.sign(ridge.predict(X_test_trans)) == y_test_trans)
    print 'Performance of linear regression ', score
  12. Fabian Pedregosa revised this gist Apr 18, 2012. 1 changed file with 189 additions and 40 deletions.
    229 changes: 189 additions & 40 deletions ranking.py
    Original file line number Diff line number Diff line change
    @@ -10,6 +10,7 @@

    import itertools
    import numpy as np
    from scipy import linalg

    from sklearn import svm, linear_model, cross_validation

    @@ -40,51 +41,72 @@ def transform_pairwise(X, y):
    Data as pairs
    y_trans : array, shape (k,)
    Output class labels, where classes have values {-1, +1}
    diff: array
    target difference for each considered samples
    """
    X_new = []
    y_new = []
    diff = []
    y = np.asarray(y)
    if y.ndim == 1:
    y = np.c_[y, np.ones(y.shape[0])]
    comb = itertools.combinations(range(X.shape[0]), 2)
    for k, (i, j) in enumerate(comb):
    k = 0
    for (i, j) in comb:
    #if np.abs(y[i, 0] - y[j, 0]) <= 1. or y[i, 1] != y[j, 1]:
    if y[i, 0] == y[j, 0] or y[i, 1] != y[j, 1]:
    # skip if same target or different group
    continue
    X_new.append(X[i] - X[j])
    y_new.append(np.sign(y[i, 0] - y[j, 0]))
    diff.append(y[i, 0] - y[j, 0])
    y_new.append(np.sign(diff[-1]))
    # output balanced classes
    if y_new[-1] != (-1) ** k:
    y_new[-1] = - y_new[-1]
    X_new[-1] = - X_new[-1]
    return np.asarray(X_new), np.asarray(y_new).ravel()
    diff[-1] = - diff[-1]
    k += 1
    return np.asarray(X_new), np.asarray(y_new).ravel(), np.array(diff).ravel()


    class RankSVM(svm.LinearSVC):
    class RankSVM(svm.SVC):
    """Performs pairwise ranking with an underlying LinearSVC model
    Input should be a n-class ranking problem, this object will convert it
    into a two-class classification problem, a setting known as
    `pairwise ranking`.
    See object :ref:`svm.LinearSVC` for a full description of parameters.
    See object :ref:`svm.SVC` for a full description of parameters.
    """
    def __init__(self, C=1.0, degree=3,
    shrinking=True, probability=False,
    tol=1e-3, cache_size=200, scale_C=True):

    def fit(self, X, y):
    super(RankSVM, self).__init__(kernel='linear', degree=degree,
    tol=tol, C=C, shrinking=shrinking, probability=probability,
    cache_size=cache_size, scale_C=scale_C)

    def fit(self, X, y, sample_weight=True):
    """
    Fit a pairwise ranking model.
    Parameters
    ----------
    X : array, shape (n_samples, n_features)
    y : array, shape (n_samples,) or (n_samples, 2)
    sample_weights : boolean
    wether to consider sample weights in the ranking problem
    (= weighted loss function)
    Returns
    -------
    self
    """
    X_trans, y_trans = transform_pairwise(X, y)
    super(RankSVM, self).fit(X_trans, y_trans)
    X_trans, y_trans, diff = transform_pairwise(X, y)
    if sample_weight:
    super(RankSVM, self).fit(X_trans, y_trans, sample_weight=20 * np.abs(diff))
    else:
    super(RankSVM, self).fit(X_trans, y_trans)
    return self

    def predict(self, X):
    @@ -109,43 +131,170 @@ def predict(self, X):

    def score(self, X, y):
    """
    Because we transformed into a pairwise problem, chance level is at 0.5
    Because we transformed into a balanced pairwise problem, chance level is at 0.5
    """
    X_trans, y_trans = transform_pairwise(X, y)
    X_trans, y_trans, diff = transform_pairwise(X, y)
    return np.mean(super(RankSVM, self).predict(X_trans) == y_trans)

    class RankLogistic(linear_model.LogisticRegression):

    def fit(self, X, y, sample_weight=False):
    """
    Fit a pairwise ranking model.
    Parameters
    ----------
    X : array, shape (n_samples, n_features)
    y : array, shape (n_samples,) or (n_samples, 2)
    sample_weights : boolean
    wether to consider sample weights in the ranking problem
    (= weighted loss function)
    Returns
    -------
    self
    """
    X_trans, y_trans, diff = transform_pairwise(X, y)
    if sample_weight:
    super(RankLogistic, self).fit(X_trans, y_trans, sample_weight=diff)
    else:
    super(RankLogistic, self).fit(X_trans, y_trans)
    return self

    def predict(self, X):
    """
    Predict an ordering on X. For a list of n samples, this method
    returns a list from 0 to n-1 with the relative order of the rows of X.
    Parameters
    ----------
    X : array, shape (n_samples, n_features)
    Returns
    -------
    ord : array, shape (n_samples,)
    Returns a list of integers representing the relative order of
    the rows in X.
    """
    if hasattr(self, 'coef_'):
    np.argsort(np.dot(X, self.coef_.T))
    else:
    raise ValueError("Must call fit() prior to predict()")

    def score(self, X, y):
    """
    Because we transformed into a balanced pairwise problem, chance level is at 0.5
    """
    X_trans, y_trans, diff = transform_pairwise(X, y)
    return np.mean(super(RankLogistic, self).predict(X_trans) == y_trans)


    def linear_rank(X, y, alphas):
    """
    Fit athe "value-regularized" linear method described in the paper
    "On the consistency of Ranking Algorithms", Duchi et al.
    Parameters
    ----------
    alpha: l2-term regularization
    """

    alphas = np.array(alphas)
    U, s, Vt = linalg.svd(X, full_matrices=False)
    Xp, yp, d = transform_pairwise(X, y)
    #d = np.sign(d) * ((100 * d) ** 2)
    #omega = 1. / ( 1e3 * X.shape[0] ** 2)
    #omega = 1e-4
    v = np.dot(Vt, np.dot(d, Xp))
    return np.dot(v / ((s ** 2) + alphas[:, np.newaxis]), Vt)


    def test_linear_rank():
    X = np.random.randn(5, 5)
    y = np.random.randn(5)
    alphas = [.1, 1., 2.]
    W = linear_rank(X, y, alphas)
    Xp, yp, d = transform_pairwise(X, y)

    # check KKT conditions
    for i in range(3):
    right = np.dot(np.dot(X.T, X) + alphas[i] * np.eye(5), W[i])
    left = np.dot(Xp.T, d)
    assert np.allclose(right, left)
    print 'TESTS OK'



    class LinearRankCV(linear_model.base.LinearModel):
    def __init__(self, alphas):
    self.alphas = alphas
    self.fit_intercept = True

    def fit(self, X, y):
    X, y, X_mean, y_mean, X_std =\
    self._center_data(X, y, self.fit_intercept,
    False, True)
    cv = cross_validation.KFold(X.shape[0], 5)
    scores = np.zeros(len(self.alphas))
    for train, test in cv:
    Xp, yp, dp = transform_pairwise(X[train], y[train])
    W = linear_rank(X[train], y[train], self.alphas)
    Xt, yt, dt = transform_pairwise(X[test], y[test])
    if not yt.size:
    continue

    train_score = dp * np.dot(Xp, W.T).T
    assert train_score.shape == (len(self.alphas), Xp.shape[0])
    #print(np.mean(dp * np.dot(Xp, W.T).T > 0, 1))

    scores += np.mean(dt * np.dot(Xt, W.T).T > 0, 1)

    self.best_alpha = self.alphas[np.argmax(scores)]
    self.coef_ = linear_rank(X, y, [self.best_alpha])[0] # learn on whole dataset
    self._set_intercept(X_mean, y_mean, X_std)
    return self

    def score(self, X, y):
    Xt, yt, dt = transform_pairwise(X, y)
    return np.mean(np.sign(np.dot(Xt, self.coef_.T)) == yt)

    def transform(self, X):
    return np.dot(X, self.coef_)[:, np.newaxis]



    if __name__ == '__main__':
    # as showcase, we will create some non-linear data
    # and print the performance of ranking vs linear regression

    np.random.seed(0)
    n_samples, n_features = 300, 5
    true_coef = np.random.randn(n_features)
    X = np.random.randn(n_samples, n_features)
    noise = np.random.randn(n_samples) / np.linalg.norm(true_coef)
    y = np.dot(X, true_coef)
    y = np.sqrt(y - np.min(y)) # add non-linearities
    y += .1 * noise # add noise
    Y = np.c_[y, np.mod(np.arange(n_samples), 5)] # add query fake id
    cv = cross_validation.KFold(n_samples, 5)
    train, test = iter(cv).next()

    # make a simple plot out of it
    import pylab as pl
    pl.scatter(np.dot(X, true_coef), y)
    pl.title('Data to be learned')
    pl.xlabel('<X, coef>')
    pl.ylabel('y')
    pl.show()

    # print the performance of ranking
    rank_svm = RankSVM().fit(X[train], Y[train])
    print 'Performance of ranking ', rank_svm.score(X[test], Y[test])

    # and that of linear regression
    ridge = linear_model.RidgeCV(fit_intercept=False)
    ridge.fit(X[train], y[train])
    X_test_trans, y_test_trans = transform_pairwise(X[test], y[test])
    score = np.mean(np.sign(ridge.predict(X_test_trans)) == y_test_trans)
    print 'Performance of linear regression ', score
    # np.random.seed(0)
    # n_samples, n_features = 300, 5
    # true_coef = np.random.randn(n_features)
    # X = np.random.randn(n_samples, n_features)
    # noise = np.random.randn(n_samples) / np.linalg.norm(true_coef)
    # y = np.dot(X, true_coef)
    # y = np.sqrt(y - np.min(y)) # add non-linearities
    # y += .1 * noise # add noise
    # Y = np.c_[y, np.mod(np.arange(n_samples), 5)] # add query fake id
    # cv = cross_validation.KFold(n_samples, 5)
    # train, test = iter(cv).next()
    #
    # # make a simple plot out of it
    # import pylab as pl
    # pl.scatter(np.dot(X, true_coef), y)
    # pl.title('Data to be learned')
    # pl.xlabel('<X, coef>')
    # pl.ylabel('y')
    # pl.show()
    #
    # # print the performance of ranking
    # rank_svm = RankSVM().fit(X[train], Y[train])
    # print 'Performance of ranking ', rank_svm.score(X[test], Y[test])
    #
    # # and that of linear regression
    # ridge = linear_model.RidgeCV(fit_intercept=False)
    # ridge.fit(X[train], y[train])
    # X_test_trans, y_test_trans = transform_pairwise(X[test], y[test])
    # score = np.mean(np.sign(ridge.predict(X_test_trans)) == y_test_trans)
    # print 'Performance of linear regression ', score
  13. @agramfort agramfort revised this gist Mar 18, 2012. 1 changed file with 3 additions and 2 deletions.
    5 changes: 3 additions & 2 deletions ranking.py
    Original file line number Diff line number Diff line change
    @@ -127,6 +127,7 @@ def score(self, X, y):
    y = np.dot(X, true_coef)
    y = np.sqrt(y - np.min(y)) # add non-linearities
    y += .1 * noise # add noise
    Y = np.c_[y, np.mod(np.arange(n_samples), 5)] # add query fake id
    cv = cross_validation.KFold(n_samples, 5)
    train, test = iter(cv).next()

    @@ -139,8 +140,8 @@ def score(self, X, y):
    pl.show()

    # print the performance of ranking
    rank_svm = RankSVM().fit(X[train], y[train])
    print 'Performance of ranking ', rank_svm.score(X[test], y[test])
    rank_svm = RankSVM().fit(X[train], Y[train])
    print 'Performance of ranking ', rank_svm.score(X[test], Y[test])

    # and that of linear regression
    ridge = linear_model.RidgeCV(fit_intercept=False)
  14. @agramfort agramfort revised this gist Mar 18, 2012. 1 changed file with 63 additions and 59 deletions.
    122 changes: 63 additions & 59 deletions ranking.py
    Original file line number Diff line number Diff line change
    @@ -4,7 +4,8 @@
    Reference: "Large Margin Rank Boundaries for Ordinal Regression", R. Herbrich,
    T. Graepel, K. Obermayer.
    Author: Fabian Pedregosa <fabian@fseoane.net>
    Authors: Fabian Pedregosa <fabian@fseoane.net>
    Alexandre Gramfort <alexandre.gramfort@inria.fr>
    """

    import itertools
    @@ -13,12 +14,58 @@
    from sklearn import svm, linear_model, cross_validation


    class RankSVM(svm.LinearSVC):
    def transform_pairwise(X, y):
    """Transforms data into pairs with balanced labels for ranking
    Transforms a n-class ranking problem into a two-class classification
    problem. Subclasses implementing particular strategies for choosing
    pairs should override this method.
    In this method, all pairs are choosen, except for those that have the
    same target value. The output is an array of balanced classes, i.e.
    there are the same number of -1 as +1
    Parameters
    ----------
    X : array, shape (n_samples, n_features)
    The data
    y : array, shape (n_samples,) or (n_samples, 2)
    Target labels. If it's a 2D array, the second column represents
    the grouping of samples, i.e., samples with different groups will
    not be considered.
    Returns
    -------
    X_trans : array, shape (k, n_feaures)
    Data as pairs
    y_trans : array, shape (k,)
    Output class labels, where classes have values {-1, +1}
    """
    Performs pairwise ranking with an underlying LinearSVC model. Input should
    be a n-class ranking problem, this object will convert it into a two-class
    classification problem, a setting known as `pairwise ranking`.
    X_new = []
    y_new = []
    y = np.asarray(y)
    if y.ndim == 1:
    y = np.c_[y, np.ones(y.shape[0])]
    comb = itertools.combinations(range(X.shape[0]), 2)
    for k, (i, j) in enumerate(comb):
    if y[i, 0] == y[j, 0] or y[i, 1] != y[j, 1]:
    # skip if same target or different group
    continue
    X_new.append(X[i] - X[j])
    y_new.append(np.sign(y[i, 0] - y[j, 0]))
    # output balanced classes
    if y_new[-1] != (-1) ** k:
    y_new[-1] = - y_new[-1]
    X_new[-1] = - X_new[-1]
    return np.asarray(X_new), np.asarray(y_new).ravel()


    class RankSVM(svm.LinearSVC):
    """Performs pairwise ranking with an underlying LinearSVC model
    Input should be a n-class ranking problem, this object will convert it
    into a two-class classification problem, a setting known as
    `pairwise ranking`.
    See object :ref:`svm.LinearSVC` for a full description of parameters.
    """
    @@ -36,7 +83,7 @@ def fit(self, X, y):
    -------
    self
    """
    X_trans, y_trans = self.transform_pairwise(X, y)
    X_trans, y_trans = transform_pairwise(X, y)
    super(RankSVM, self).fit(X_trans, y_trans)
    return self

    @@ -64,65 +111,23 @@ def score(self, X, y):
    """
    Because we transformed into a pairwise problem, chance level is at 0.5
    """
    X_trans, y_trans = self.transform_pairwise(X, y)
    X_trans, y_trans = transform_pairwise(X, y)
    return np.mean(super(RankSVM, self).predict(X_trans) == y_trans)

    def transform_pairwise(self, X, y):
    """
    Transforms a n-class ranking problem into a two-class classification
    problem. Subclasses implementing particular strategies for choosing
    pairs should override this method.
    In this method, all pairs are choosen, except for those that have the
    same target value. The output is an array of balanced classes, i.e.
    there are the same number of -1 as +1
    Parameters
    ----------
    X : array, shape (n_samples, n_features)
    y : array, shape (n_samples,) or (n_samples, 2)
    it it's a 2D array, the second column represents the grouping
    of samples, i.e., samples with different groups will not be
    considered.
    Returns
    -------
    X_trans : array, shape (k, n_feaures)
    y_trans : array, shape (k,)
    Output class labels, where classes have values {-1, +1}
    """
    X_new = []
    y_new = []
    y = np.asarray(y)
    if y.ndim < 2:
    y = y[:, np.newaxis]
    if y.shape[1] < 2:
    y = np.c_[y, np.ones(y.shape[0])]
    comb = itertools.combinations(range(X.shape[0]), 2)
    for k, (i, j) in enumerate(comb):
    if y[i, 0] == y[j, 0] or y[i, 1] != y[j, 1]:
    # skip if same target or different group
    continue
    X_new.append(X[i] - X[j])
    y_new.append(np.sign(y[i, 0] - y[j, 0]))
    # output balanced classes
    if y_new[-1] != (-1) ** k:
    y_new[-1] = - y_new[-1]
    X_new[-1] = - X_new[-1]
    return np.asarray(X_new), np.asarray(y_new).ravel()


    if __name__ == '__main__':
    # as showcase, we will create some non-linear data
    # and print the performance of ranking vs linear regression

    np.random.seed(0)
    M, N = 5, 100
    true_coef, X = np.random.randn(M), np.random.randn(N, M)
    noise = np.random.randn(X.shape[0]) / np.linalg.norm(true_coef)
    n_samples, n_features = 300, 5
    true_coef = np.random.randn(n_features)
    X = np.random.randn(n_samples, n_features)
    noise = np.random.randn(n_samples) / np.linalg.norm(true_coef)
    y = np.dot(X, true_coef)
    y = np.sqrt(y - np.min(y)) + .1 * noise # add noise and non-linearities
    cv = cross_validation.KFold(X.shape[0], 5)
    y = np.sqrt(y - np.min(y)) # add non-linearities
    y += .1 * noise # add noise
    cv = cross_validation.KFold(n_samples, 5)
    train, test = iter(cv).next()

    # make a simple plot out of it
    @@ -134,13 +139,12 @@ def transform_pairwise(self, X, y):
    pl.show()

    # print the performance of ranking
    from sklearn import cross_validation
    rank_svm = RankSVM().fit(X[train], y[train])
    print 'Performance of ranking ', rank_svm.score(X[test], y[test])

    # and that of linear regression
    ridge = linear_model.Ridge(fit_intercept=False)
    ridge = linear_model.RidgeCV(fit_intercept=False)
    ridge.fit(X[train], y[train])
    X_test_trans, y_test_trans = rank_svm.transform_pairwise(X[test], y[test])
    X_test_trans, y_test_trans = transform_pairwise(X[test], y[test])
    score = np.mean(np.sign(ridge.predict(X_test_trans)) == y_test_trans)
    print 'Performance of linear regression ', score
  15. Fabian Pedregosa revised this gist Mar 18, 2012. 1 changed file with 13 additions and 5 deletions.
    18 changes: 13 additions & 5 deletions ranking.py
    Original file line number Diff line number Diff line change
    @@ -30,7 +30,7 @@ def fit(self, X, y):
    Parameters
    ----------
    X : array, shape (n_samples, n_features)
    y : array, shape (n_samples,)
    y : array, shape (n_samples,) or (n_samples, 2)
    Returns
    -------
    @@ -80,7 +80,10 @@ def transform_pairwise(self, X, y):
    Parameters
    ----------
    X : array, shape (n_samples, n_features)
    y : array, shape (n_samples,)
    y : array, shape (n_samples,) or (n_samples, 2)
    it it's a 2D array, the second column represents the grouping
    of samples, i.e., samples with different groups will not be
    considered.
    Returns
    -------
    @@ -90,13 +93,18 @@ def transform_pairwise(self, X, y):
    """
    X_new = []
    y_new = []
    y = np.asarray(y)
    if y.ndim < 2:
    y = y[:, np.newaxis]
    if y.shape[1] < 2:
    y = np.c_[y, np.ones(y.shape[0])]
    comb = itertools.combinations(range(X.shape[0]), 2)
    for k, (i, j) in enumerate(comb):
    if y[i] == y[j]:
    # skip if same target
    if y[i, 0] == y[j, 0] or y[i, 1] != y[j, 1]:
    # skip if same target or different group
    continue
    X_new.append(X[i] - X[j])
    y_new.append(np.sign(y[i] - y[j]))
    y_new.append(np.sign(y[i, 0] - y[j, 0]))
    # output balanced classes
    if y_new[-1] != (-1) ** k:
    y_new[-1] = - y_new[-1]
  16. Fabian Pedregosa revised this gist Mar 12, 2012. 1 changed file with 3 additions and 6 deletions.
    9 changes: 3 additions & 6 deletions ranking.py
    Original file line number Diff line number Diff line change
    @@ -58,7 +58,7 @@ def predict(self, X):
    if hasattr(self, 'coef_'):
    np.argsort(np.dot(X, self.coef_.T))
    else:
    raise ValueError("Model has no coefficients, must call fit() prior to predict()")
    raise ValueError("Must call fit() prior to predict()")

    def score(self, X, y):
    """
    @@ -67,8 +67,6 @@ def score(self, X, y):
    X_trans, y_trans = self.transform_pairwise(X, y)
    return np.mean(super(RankSVM, self).predict(X_trans) == y_trans)



    def transform_pairwise(self, X, y):
    """
    Transforms a n-class ranking problem into a two-class classification
    @@ -106,7 +104,6 @@ def transform_pairwise(self, X, y):
    return np.asarray(X_new), np.asarray(y_new).ravel()



    if __name__ == '__main__':
    # as showcase, we will create some non-linear data
    # and print the performance of ranking vs linear regression
    @@ -116,7 +113,7 @@ def transform_pairwise(self, X, y):
    true_coef, X = np.random.randn(M), np.random.randn(N, M)
    noise = np.random.randn(X.shape[0]) / np.linalg.norm(true_coef)
    y = np.dot(X, true_coef)
    y = np.sqrt(y - np.min(y)) + .1 * noise # add noise and non-linearities
    y = np.sqrt(y - np.min(y)) + .1 * noise # add noise and non-linearities
    cv = cross_validation.KFold(X.shape[0], 5)
    train, test = iter(cv).next()

    @@ -138,4 +135,4 @@ def transform_pairwise(self, X, y):
    ridge.fit(X[train], y[train])
    X_test_trans, y_test_trans = rank_svm.transform_pairwise(X[test], y[test])
    score = np.mean(np.sign(ridge.predict(X_test_trans)) == y_test_trans)
    print 'Performance of linear regression ', score
    print 'Performance of linear regression ', score
  17. Fabian Pedregosa revised this gist Mar 12, 2012. No changes.
  18. Fabian Pedregosa revised this gist Mar 12, 2012. 1 changed file with 38 additions and 38 deletions.
    76 changes: 38 additions & 38 deletions ranking.py
    Original file line number Diff line number Diff line change
    @@ -36,7 +36,7 @@ def fit(self, X, y):
    -------
    self
    """
    X_trans, y_trans = transform_pairwise(X, y)
    X_trans, y_trans = self.transform_pairwise(X, y)
    super(RankSVM, self).fit(X_trans, y_trans)
    return self

    @@ -64,46 +64,46 @@ def score(self, X, y):
    """
    Because we transformed into a pairwise problem, chance level is at 0.5
    """
    X_trans, y_trans = transform_pairwise(X, y)
    X_trans, y_trans = self.transform_pairwise(X, y)
    return np.mean(super(RankSVM, self).predict(X_trans) == y_trans)



    def transform_pairwise(X, y):
    """
    Transforms a n-class ranking problem into a two-class classification
    problem. Subclasses implementing particular strategies for choosing
    pairs should override this method.
    In this method, all pairs are choosen, except for those that have the
    same target value. The output is an array of balanced classes, i.e.
    there are the same number of -1 as +1
    Parameters
    ----------
    X : array, shape (n_samples, n_features)
    y : array, shape (n_samples,)
    Returns
    -------
    X_trans : array, shape (k, n_feaures)
    y_trans : array, shape (k,)
    Output class labels, where classes have values {-1, +1}
    """
    X_new = []
    y_new = []
    comb = itertools.combinations(range(X.shape[0]), 2)
    for k, (i, j) in enumerate(comb):
    if y[i] == y[j]:
    # skip if same target
    continue
    X_new.append(X[i] - X[j])
    y_new.append(np.sign(y[i] - y[j]))
    # output balanced classes
    if y_new[-1] != (-1) ** k:
    y_new[-1] = - y_new[-1]
    X_new[-1] = - X_new[-1]
    return np.asarray(X_new), np.asarray(y_new).ravel()
    def transform_pairwise(self, X, y):
    """
    Transforms a n-class ranking problem into a two-class classification
    problem. Subclasses implementing particular strategies for choosing
    pairs should override this method.
    In this method, all pairs are choosen, except for those that have the
    same target value. The output is an array of balanced classes, i.e.
    there are the same number of -1 as +1
    Parameters
    ----------
    X : array, shape (n_samples, n_features)
    y : array, shape (n_samples,)
    Returns
    -------
    X_trans : array, shape (k, n_feaures)
    y_trans : array, shape (k,)
    Output class labels, where classes have values {-1, +1}
    """
    X_new = []
    y_new = []
    comb = itertools.combinations(range(X.shape[0]), 2)
    for k, (i, j) in enumerate(comb):
    if y[i] == y[j]:
    # skip if same target
    continue
    X_new.append(X[i] - X[j])
    y_new.append(np.sign(y[i] - y[j]))
    # output balanced classes
    if y_new[-1] != (-1) ** k:
    y_new[-1] = - y_new[-1]
    X_new[-1] = - X_new[-1]
    return np.asarray(X_new), np.asarray(y_new).ravel()



    @@ -136,6 +136,6 @@ def transform_pairwise(X, y):
    # and that of linear regression
    ridge = linear_model.Ridge(fit_intercept=False)
    ridge.fit(X[train], y[train])
    X_test_trans, y_test_trans = transform_pairwise(X[test], y[test])
    X_test_trans, y_test_trans = rank_svm.transform_pairwise(X[test], y[test])
    score = np.mean(np.sign(ridge.predict(X_test_trans)) == y_test_trans)
    print 'Performance of linear regression ', score
  19. Fabian Pedregosa created this gist Mar 12, 2012.
    141 changes: 141 additions & 0 deletions ranking.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,141 @@
    """
    Implementation of pairwise ranking using scikit-learn LinearSVC
    Reference: "Large Margin Rank Boundaries for Ordinal Regression", R. Herbrich,
    T. Graepel, K. Obermayer.
    Author: Fabian Pedregosa <fabian@fseoane.net>
    """

    import itertools
    import numpy as np

    from sklearn import svm, linear_model, cross_validation


    class RankSVM(svm.LinearSVC):
    """
    Performs pairwise ranking with an underlying LinearSVC model. Input should
    be a n-class ranking problem, this object will convert it into a two-class
    classification problem, a setting known as `pairwise ranking`.
    See object :ref:`svm.LinearSVC` for a full description of parameters.
    """

    def fit(self, X, y):
    """
    Fit a pairwise ranking model.
    Parameters
    ----------
    X : array, shape (n_samples, n_features)
    y : array, shape (n_samples,)
    Returns
    -------
    self
    """
    X_trans, y_trans = transform_pairwise(X, y)
    super(RankSVM, self).fit(X_trans, y_trans)
    return self

    def predict(self, X):
    """
    Predict an ordering on X. For a list of n samples, this method
    returns a list from 0 to n-1 with the relative order of the rows of X.
    Parameters
    ----------
    X : array, shape (n_samples, n_features)
    Returns
    -------
    ord : array, shape (n_samples,)
    Returns a list of integers representing the relative order of
    the rows in X.
    """
    if hasattr(self, 'coef_'):
    np.argsort(np.dot(X, self.coef_.T))
    else:
    raise ValueError("Model has no coefficients, must call fit() prior to predict()")

    def score(self, X, y):
    """
    Because we transformed into a pairwise problem, chance level is at 0.5
    """
    X_trans, y_trans = transform_pairwise(X, y)
    return np.mean(super(RankSVM, self).predict(X_trans) == y_trans)



    def transform_pairwise(X, y):
    """
    Transforms a n-class ranking problem into a two-class classification
    problem. Subclasses implementing particular strategies for choosing
    pairs should override this method.
    In this method, all pairs are choosen, except for those that have the
    same target value. The output is an array of balanced classes, i.e.
    there are the same number of -1 as +1
    Parameters
    ----------
    X : array, shape (n_samples, n_features)
    y : array, shape (n_samples,)
    Returns
    -------
    X_trans : array, shape (k, n_feaures)
    y_trans : array, shape (k,)
    Output class labels, where classes have values {-1, +1}
    """
    X_new = []
    y_new = []
    comb = itertools.combinations(range(X.shape[0]), 2)
    for k, (i, j) in enumerate(comb):
    if y[i] == y[j]:
    # skip if same target
    continue
    X_new.append(X[i] - X[j])
    y_new.append(np.sign(y[i] - y[j]))
    # output balanced classes
    if y_new[-1] != (-1) ** k:
    y_new[-1] = - y_new[-1]
    X_new[-1] = - X_new[-1]
    return np.asarray(X_new), np.asarray(y_new).ravel()



    if __name__ == '__main__':
    # as showcase, we will create some non-linear data
    # and print the performance of ranking vs linear regression

    np.random.seed(0)
    M, N = 5, 100
    true_coef, X = np.random.randn(M), np.random.randn(N, M)
    noise = np.random.randn(X.shape[0]) / np.linalg.norm(true_coef)
    y = np.dot(X, true_coef)
    y = np.sqrt(y - np.min(y)) + .1 * noise # add noise and non-linearities
    cv = cross_validation.KFold(X.shape[0], 5)
    train, test = iter(cv).next()

    # make a simple plot out of it
    import pylab as pl
    pl.scatter(np.dot(X, true_coef), y)
    pl.title('Data to be learned')
    pl.xlabel('<X, coef>')
    pl.ylabel('y')
    pl.show()

    # print the performance of ranking
    from sklearn import cross_validation
    rank_svm = RankSVM().fit(X[train], y[train])
    print 'Performance of ranking ', rank_svm.score(X[test], y[test])

    # and that of linear regression
    ridge = linear_model.Ridge(fit_intercept=False)
    ridge.fit(X[train], y[train])
    X_test_trans, y_test_trans = transform_pairwise(X[test], y[test])
    score = np.mean(np.sign(ridge.predict(X_test_trans)) == y_test_trans)
    print 'Performance of linear regression ', score