Created
September 24, 2011 21:06
-
-
Save marcelcaraciolo/1239862 to your computer and use it in GitHub Desktop.
spearman_numpy_cython.pyx
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
import sys | |
import random | |
cimport numpy as np | |
def _rank_dists(np.ndarray ranks1, np.ndarray ranks2): | |
"""Finds the difference between the values in ranks1 and ranks2 for keys | |
present in both arrays. | |
""" | |
#Find keys in both arrays | |
d = ranks1[np.in1d(ranks1['keys'], ranks2['keys'])] | |
d2 = ranks2[np.in1d(ranks2['keys'], ranks1['keys'])] | |
#Sort the arrays (based on the keys) | |
d = np.sort(d, order=['ranks'])[::-1] | |
d2 = np.sort(d2, order=['ranks'])[::-1] | |
#Return the differences | |
return d['ranks'] - d2['ranks'] | |
#@profile | |
def spearman_correlation(ranks1, ranks2): | |
"""Returns the Spearman correlation coefficient for two rankings, which | |
should be dicts or sequences of (key, rank). The coefficient ranges from | |
-1.0 (ranks are opposite) to 1.0 (ranks are identical), and is only | |
calculated for keys in both rankings (for meaningful results, remove keys | |
present in only one list before ranking).""" | |
ranks1 = np.array(ranks1, dtype=[('keys', int), ('ranks', float)]) | |
ranks2 = np.array(ranks2, dtype=[('keys', int), ('ranks', float)]) | |
diffs = _rank_dists(ranks1, ranks2) | |
# diffs ** 2 | |
diffs_s2 = diffs * diffs | |
#sum all diffs | |
diffs = np.sum(diffs_s2) | |
n_diffs = diffs_s2.size | |
if diffs_s2.size > 0: | |
return 1 - (6 * diffs / (n_diffs * (n_diffs * n_diffs - 1))) | |
else: | |
return 0.0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment