Skip to content

Instantly share code, notes, and snippets.

@marcelcaraciolo
Created September 24, 2011 20:49
Show Gist options
  • Save marcelcaraciolo/1239843 to your computer and use it in GitHub Desktop.
Save marcelcaraciolo/1239843 to your computer and use it in GitHub Desktop.
spearman_scipy.py
import datetime
import sys
import random
import numpy as np
from scipy.stats import spearmanr
def _rank_dists(ranks1, ranks2):
"""Finds the values in ranks1 and ranks2 for keys
present in both arrays.
"""
#Find keys in both arrays
d = ranks1[np.in1d(ranks1['keys'], ranks2['keys'])]
d2 = ranks2[np.in1d(ranks2['keys'], ranks1['keys'])]
return d['ranks'], d2['ranks']
#@profile
def spearman_correlation(ranks1, ranks2):
"""Returns the Spearman correlation coefficient for two rankings, which
should be dicts or sequences of (key, rank). The coefficient ranges from
-1.0 (ranks are opposite) to 1.0 (ranks are identical), and is only
calculated for keys in both rankings (for meaningful results, remove keys
present in only one list before ranking)."""
ranks1 = np.array(ranks1, dtype=[('keys', int), ('ranks', float)])
ranks2 = np.array(ranks2, dtype=[('keys', int), ('ranks', float)])
d1, d2 = _rank_dists(ranks1, ranks2)
print d1, d2
return spearmanr(list(d1), list(d2))[0]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment