Created
September 12, 2011 01:48
-
-
Save marcelcaraciolo/1210432 to your computer and use it in GitHub Desktop.
spearman coefficient
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def spearman_coefficient(X, Y): | |
| """ | |
| Considering the rows of X (and Y=X) as vectors, compute the | |
| distance matrix between each pair of vectors. | |
| Like Pearson Coefficient , but compares relative ranking of preference | |
| values instead of preference values themselves. That is, each user's | |
| preferences are sorted and then assign a rank as their preference value, | |
| with 1 being assigned to the least preferred item. | |
| Parameters | |
| ---------- | |
| X: array of shape (n_samples_1, n_features) | |
| Y: array of shape (n_samples_2, n_features) | |
| Returns | |
| ------- | |
| distances: array of shape (n_samples_1, n_samples_2) | |
| Examples | |
| -------- | |
| >>> from scikits.crab.metrics.pairwise import spearman_coefficient | |
| >>> X = [[('a',2.5),('b', 3.5), ('c',3.0), ('d',3.5)],[ ('e', 2.5), | |
| ('f', 3.0),('g', 2.5), ('h', 4.0)] ] | |
| >>> # distance between rows of X | |
| >>> spearman_coefficient(X, X) | |
| array([[ 1., 0.], | |
| [ 0., 1.]]) | |
| >>> spearman_coefficient(X, [[('a',2.5),('b', 3.5), ('c',3.0), ('k',3.5)]]) | |
| array([[ 1.], | |
| [ 0.]]) | |
| """ | |
| # should not need X_norm_squared because if you could precompute that as | |
| # well as Y, then you should just pre-compute the output and not even | |
| # call this function. | |
| if X is Y: | |
| X = Y = np.asanyarray(X, dtype=[('x', 'S30'), ('y', float)]) | |
| else: | |
| X = np.asanyarray(X, dtype=[('x', 'S30'), ('y', float)]) | |
| Y = np.asanyarray(Y, dtype=[('x', 'S30'), ('y', float)]) | |
| if X.shape[1] != Y.shape[1]: | |
| raise ValueError("Incompatible dimension for X and Y matrices") | |
| X.sort(order='y') | |
| Y.sort(order='y') | |
| result = [] | |
| #TODO: Check if it is possible to optimize this function | |
| i = 0 | |
| for arrayX in X: | |
| result.append([]) | |
| for arrayY in Y: | |
| Y_keys = [key for key, value in arrayY] | |
| XY = [(key, value) for key, value in arrayX if key in Y_keys] | |
| sumDiffSq = 0.0 | |
| for index, tup in enumerate(XY): | |
| sumDiffSq += pow((index + 1) - (Y_keys.index(tup[0]) + 1), 2.0) | |
| n = len(XY) | |
| if n == 0: | |
| result[i].append(0.0) | |
| else: | |
| result[i].append(1.0 - ((6.0 * sumDiffSq) / (n * (n * n - 1)))) | |
| result[i] = np.asanyarray(result[i]) | |
| i += 1 | |
| return np.asanyarray(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment