Skip to content

Instantly share code, notes, and snippets.

@marcelcaraciolo
Created September 24, 2011 21:06
Show Gist options
  • Save marcelcaraciolo/1239862 to your computer and use it in GitHub Desktop.
Save marcelcaraciolo/1239862 to your computer and use it in GitHub Desktop.
spearman_numpy_cython.pyx
import datetime
import sys
import random
cimport numpy as np
def _rank_dists(np.ndarray ranks1, np.ndarray ranks2):
"""Finds the difference between the values in ranks1 and ranks2 for keys
present in both arrays.
"""
#Find keys in both arrays
d = ranks1[np.in1d(ranks1['keys'], ranks2['keys'])]
d2 = ranks2[np.in1d(ranks2['keys'], ranks1['keys'])]
#Sort the arrays (based on the keys)
d = np.sort(d, order=['ranks'])[::-1]
d2 = np.sort(d2, order=['ranks'])[::-1]
#Return the differences
return d['ranks'] - d2['ranks']
#@profile
def spearman_correlation(ranks1, ranks2):
"""Returns the Spearman correlation coefficient for two rankings, which
should be dicts or sequences of (key, rank). The coefficient ranges from
-1.0 (ranks are opposite) to 1.0 (ranks are identical), and is only
calculated for keys in both rankings (for meaningful results, remove keys
present in only one list before ranking)."""
ranks1 = np.array(ranks1, dtype=[('keys', int), ('ranks', float)])
ranks2 = np.array(ranks2, dtype=[('keys', int), ('ranks', float)])
diffs = _rank_dists(ranks1, ranks2)
# diffs ** 2
diffs_s2 = diffs * diffs
#sum all diffs
diffs = np.sum(diffs_s2)
n_diffs = diffs_s2.size
if diffs_s2.size > 0:
return 1 - (6 * diffs / (n_diffs * (n_diffs * n_diffs - 1)))
else:
return 0.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment