Skip to content

Instantly share code, notes, and snippets.

@gumption
Forked from bwhite/rank_metrics.py
Last active August 8, 2018 16:32
Show Gist options
  • Save gumption/b54278ec9bab2c0e0472816d1d7663be to your computer and use it in GitHub Desktop.
Save gumption/b54278ec9bab2c0e0472816d1d7663be to your computer and use it in GitHub Desktop.
Ranking Metrics
"""Information Retrieval metrics
Useful Resources:
http://www.cs.utexas.edu/~mooney/ir-course/slides/Evaluation.ppt
http://www.nii.ac.jp/TechReports/05-014E.pdf
http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
http://hal.archives-ouvertes.fr/docs/00/72/67/60/PDF/07-busa-fekete.pdf
Learning to Rank for Information Retrieval (Tie-Yan Liu)
"""
import numpy as np
def mean_reciprocal_rank(rs):
"""Score is reciprocal of the rank of the first relevant item
First element is 'rank 1'. Relevance is binary (nonzero is relevant).
Example from http://en.wikipedia.org/wiki/Mean_reciprocal_rank
>>> rs = [[0, 0, 1], [0, 1, 0], [1, 0, 0]]
>>> mean_reciprocal_rank(rs)
0.61111111111111105
>>> rs = np.array([[0, 0, 0], [0, 1, 0], [1, 0, 0]])
>>> mean_reciprocal_rank(rs)
0.5
>>> rs = [[0, 0, 0, 1], [1, 0, 0], [1, 0, 0]]
>>> mean_reciprocal_rank(rs)
0.75
Args:
rs: Iterator of relevance scores (list or numpy) in rank order
(first element is the first item)
Returns:
Mean reciprocal rank
"""
rs = (np.asarray(r).nonzero()[0] for r in rs)
return np.mean([1. / (r[0] + 1) if r.size else 0. for r in rs])
def r_precision(r):
"""Score is precision after all relevant documents have been retrieved
Relevance is binary (nonzero is relevant).
>>> r = [0, 0, 1]
>>> r_precision(r)
0.33333333333333331
>>> r = [0, 1, 0]
>>> r_precision(r)
0.5
>>> r = [1, 0, 0]
>>> r_precision(r)
1.0
Args:
r: Relevance scores (list or numpy) in rank order
(first element is the first item)
Returns:
R Precision
"""
r = np.asarray(r) != 0
z = r.nonzero()[0]
if not z.size:
return 0.
return np.mean(r[:z[-1] + 1])
def precision_at_k(r, k):
"""Score is precision @ k
Relevance is binary (nonzero is relevant).
>>> r = [0, 0, 1]
>>> precision_at_k(r, 1)
0.0
>>> precision_at_k(r, 2)
0.0
>>> precision_at_k(r, 3)
0.33333333333333331
>>> precision_at_k(r, 4)
Traceback (most recent call last):
File "<stdin>", line 1, in ?
ValueError: Relevance score length < k
Args:
r: Relevance scores (list or numpy) in rank order
(first element is the first item)
Returns:
Precision @ k
Raises:
ValueError: len(r) must be >= k
"""
assert k >= 1
r = np.asarray(r)[:k] != 0
if r.size != k:
raise ValueError('Relevance score length < k')
return np.mean(r)
def average_precision(r):
"""Score is average precision (area under PR curve)
Relevance is binary (nonzero is relevant).
>>> r = [1, 1, 0, 1, 0, 1, 0, 0, 0, 1]
>>> delta_r = 1. / sum(r)
>>> sum([sum(r[:x + 1]) / (x + 1.) * delta_r for x, y in enumerate(r) if y])
0.7833333333333333
>>> average_precision(r)
0.78333333333333333
Args:
r: Relevance scores (list or numpy) in rank order
(first element is the first item)
Returns:
Average precision
"""
r = np.asarray(r) != 0
out = [precision_at_k(r, k + 1) for k in range(r.size) if r[k]]
if not out:
return 0.
return np.mean(out)
def mean_average_precision(rs):
"""Score is mean average precision
Relevance is binary (nonzero is relevant).
>>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1]]
>>> mean_average_precision(rs)
0.78333333333333333
>>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1], [0]]
>>> mean_average_precision(rs)
0.39166666666666666
Args:
rs: Iterator of relevance scores (list or numpy) in rank order
(first element is the first item)
Returns:
Mean average precision
"""
return np.mean([average_precision(r) for r in rs])
def dcg_at_k(r, k, method=0):
"""Score is discounted cumulative gain (dcg)
Relevance is positive real values. Can use binary
as the previous methods.
There is a typographical error on the formula referenced in the original definition of this function:
http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
log2(i) should be log2(i+1)
The formulas here are derived from
https://en.wikipedia.org/wiki/Discounted_cumulative_gain#Discounted_Cumulative_Gain
The formulas return the same results when r contains only binary values
>>> r = [3, 2, 3, 0, 1, 2]
>>> dcg_at_k(r, 1)
3.0
>>> dcg_at_k(r, 1, method=1)
7.0
>>> dcg_at_k(r, 2)
4.2618595071429155
>>> dcg_at_k(r, 2, method=1)
8.8927892607143733
>>> dcg_at_k(r, 6)
6.8611266885935018
>>> dcg_at_k(r, 6, method=1)
13.848263629272981
Args:
r: Relevance scores (list or numpy array) in rank order
(first element is the most relevant item)
k: Number of results to consider
method: If 0 then sum rel_i / log2(i + 1) [not log2(i)]
If 1 then sum (2^rel_i - 1) / log2(i + 1)
Returns:
Discounted cumulative gain
"""
r = np.asfarray(r)[:k]
if r.size:
if method == 0:
return np.sum(r / np.log2(np.arange(2, r.size + 2)))
elif method == 1:
return np.sum(np.subtract(np.power(2, r), 1) / np.log2(np.arange(2, r.size + 2)))
else:
raise ValueError('method must be 0 or 1.')
return 0.
def ndcg_at_k(r, k, method=0):
"""Score is normalized discounted cumulative gain (ndcg)
Relevance is positive real values. Can use binary
as the previous methods.
>>> r = [3, 2, 3, 0, 1, 2]
>>> ndcg_at_k(r, 1)
1.0
>>> ndcg_at_k(r, 1, method=1)
1.0
>>> ndcg_at_k(r, 2)
0.87104906425515294
>>> ndcg_at_k(r, 2, method=1)
0.77894125300883343
>>> ndcg_at_k(r, 6)
0.96080819433606168
>>> ndcg_at_k(r, 6, method=1)
0.94881074856789849
Args:
r: Relevance scores (list or numpy array) in rank order
(first element is the most relevant item)
k: Number of results to consider
method: If 0 then sum rel_i / log2(i + 1) [not log2(i)]
If 1 then sum (2^rel_i - 1) / log2(i + 1)
Returns:
Normalized discounted cumulative gain
"""
dcg_max = dcg_at_k(sorted(r, reverse=True), k, method)
if not dcg_max:
return 0.
return dcg_at_k(r, k, method) / dcg_max
if __name__ == "__main__":
import doctest
doctest.testmod()
@dondon2475848
Copy link

dondon2475848 commented Feb 1, 2018

@gumption
Good finding :D
Thank you!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment