Created
September 24, 2011 20:49
-
-
Save marcelcaraciolo/1239843 to your computer and use it in GitHub Desktop.
spearman_scipy.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
import sys | |
import random | |
import numpy as np | |
from scipy.stats import spearmanr | |
def _rank_dists(ranks1, ranks2): | |
"""Finds the values in ranks1 and ranks2 for keys | |
present in both arrays. | |
""" | |
#Find keys in both arrays | |
d = ranks1[np.in1d(ranks1['keys'], ranks2['keys'])] | |
d2 = ranks2[np.in1d(ranks2['keys'], ranks1['keys'])] | |
return d['ranks'], d2['ranks'] | |
#@profile | |
def spearman_correlation(ranks1, ranks2): | |
"""Returns the Spearman correlation coefficient for two rankings, which | |
should be dicts or sequences of (key, rank). The coefficient ranges from | |
-1.0 (ranks are opposite) to 1.0 (ranks are identical), and is only | |
calculated for keys in both rankings (for meaningful results, remove keys | |
present in only one list before ranking).""" | |
ranks1 = np.array(ranks1, dtype=[('keys', int), ('ranks', float)]) | |
ranks2 = np.array(ranks2, dtype=[('keys', int), ('ranks', float)]) | |
d1, d2 = _rank_dists(ranks1, ranks2) | |
print d1, d2 | |
return spearmanr(list(d1), list(d2))[0] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment