-
-
Save gumption/b54278ec9bab2c0e0472816d1d7663be to your computer and use it in GitHub Desktop.
Ranking Metrics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Information Retrieval metrics | |
Useful Resources: | |
http://www.cs.utexas.edu/~mooney/ir-course/slides/Evaluation.ppt | |
http://www.nii.ac.jp/TechReports/05-014E.pdf | |
http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf | |
http://hal.archives-ouvertes.fr/docs/00/72/67/60/PDF/07-busa-fekete.pdf | |
Learning to Rank for Information Retrieval (Tie-Yan Liu) | |
""" | |
import numpy as np | |
def mean_reciprocal_rank(rs): | |
"""Score is reciprocal of the rank of the first relevant item | |
First element is 'rank 1'. Relevance is binary (nonzero is relevant). | |
Example from http://en.wikipedia.org/wiki/Mean_reciprocal_rank | |
>>> rs = [[0, 0, 1], [0, 1, 0], [1, 0, 0]] | |
>>> mean_reciprocal_rank(rs) | |
0.61111111111111105 | |
>>> rs = np.array([[0, 0, 0], [0, 1, 0], [1, 0, 0]]) | |
>>> mean_reciprocal_rank(rs) | |
0.5 | |
>>> rs = [[0, 0, 0, 1], [1, 0, 0], [1, 0, 0]] | |
>>> mean_reciprocal_rank(rs) | |
0.75 | |
Args: | |
rs: Iterator of relevance scores (list or numpy) in rank order | |
(first element is the first item) | |
Returns: | |
Mean reciprocal rank | |
""" | |
rs = (np.asarray(r).nonzero()[0] for r in rs) | |
return np.mean([1. / (r[0] + 1) if r.size else 0. for r in rs]) | |
def r_precision(r): | |
"""Score is precision after all relevant documents have been retrieved | |
Relevance is binary (nonzero is relevant). | |
>>> r = [0, 0, 1] | |
>>> r_precision(r) | |
0.33333333333333331 | |
>>> r = [0, 1, 0] | |
>>> r_precision(r) | |
0.5 | |
>>> r = [1, 0, 0] | |
>>> r_precision(r) | |
1.0 | |
Args: | |
r: Relevance scores (list or numpy) in rank order | |
(first element is the first item) | |
Returns: | |
R Precision | |
""" | |
r = np.asarray(r) != 0 | |
z = r.nonzero()[0] | |
if not z.size: | |
return 0. | |
return np.mean(r[:z[-1] + 1]) | |
def precision_at_k(r, k): | |
"""Score is precision @ k | |
Relevance is binary (nonzero is relevant). | |
>>> r = [0, 0, 1] | |
>>> precision_at_k(r, 1) | |
0.0 | |
>>> precision_at_k(r, 2) | |
0.0 | |
>>> precision_at_k(r, 3) | |
0.33333333333333331 | |
>>> precision_at_k(r, 4) | |
Traceback (most recent call last): | |
File "<stdin>", line 1, in ? | |
ValueError: Relevance score length < k | |
Args: | |
r: Relevance scores (list or numpy) in rank order | |
(first element is the first item) | |
Returns: | |
Precision @ k | |
Raises: | |
ValueError: len(r) must be >= k | |
""" | |
assert k >= 1 | |
r = np.asarray(r)[:k] != 0 | |
if r.size != k: | |
raise ValueError('Relevance score length < k') | |
return np.mean(r) | |
def average_precision(r): | |
"""Score is average precision (area under PR curve) | |
Relevance is binary (nonzero is relevant). | |
>>> r = [1, 1, 0, 1, 0, 1, 0, 0, 0, 1] | |
>>> delta_r = 1. / sum(r) | |
>>> sum([sum(r[:x + 1]) / (x + 1.) * delta_r for x, y in enumerate(r) if y]) | |
0.7833333333333333 | |
>>> average_precision(r) | |
0.78333333333333333 | |
Args: | |
r: Relevance scores (list or numpy) in rank order | |
(first element is the first item) | |
Returns: | |
Average precision | |
""" | |
r = np.asarray(r) != 0 | |
out = [precision_at_k(r, k + 1) for k in range(r.size) if r[k]] | |
if not out: | |
return 0. | |
return np.mean(out) | |
def mean_average_precision(rs): | |
"""Score is mean average precision | |
Relevance is binary (nonzero is relevant). | |
>>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1]] | |
>>> mean_average_precision(rs) | |
0.78333333333333333 | |
>>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1], [0]] | |
>>> mean_average_precision(rs) | |
0.39166666666666666 | |
Args: | |
rs: Iterator of relevance scores (list or numpy) in rank order | |
(first element is the first item) | |
Returns: | |
Mean average precision | |
""" | |
return np.mean([average_precision(r) for r in rs]) | |
def dcg_at_k(r, k, method=0): | |
"""Score is discounted cumulative gain (dcg) | |
Relevance is positive real values. Can use binary | |
as the previous methods. | |
There is a typographical error on the formula referenced in the original definition of this function: | |
http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf | |
log2(i) should be log2(i+1) | |
The formulas here are derived from | |
https://en.wikipedia.org/wiki/Discounted_cumulative_gain#Discounted_Cumulative_Gain | |
The formulas return the same results when r contains only binary values | |
>>> r = [3, 2, 3, 0, 1, 2] | |
>>> dcg_at_k(r, 1) | |
3.0 | |
>>> dcg_at_k(r, 1, method=1) | |
7.0 | |
>>> dcg_at_k(r, 2) | |
4.2618595071429155 | |
>>> dcg_at_k(r, 2, method=1) | |
8.8927892607143733 | |
>>> dcg_at_k(r, 6) | |
6.8611266885935018 | |
>>> dcg_at_k(r, 6, method=1) | |
13.848263629272981 | |
Args: | |
r: Relevance scores (list or numpy array) in rank order | |
(first element is the most relevant item) | |
k: Number of results to consider | |
method: If 0 then sum rel_i / log2(i + 1) [not log2(i)] | |
If 1 then sum (2^rel_i - 1) / log2(i + 1) | |
Returns: | |
Discounted cumulative gain | |
""" | |
r = np.asfarray(r)[:k] | |
if r.size: | |
if method == 0: | |
return np.sum(r / np.log2(np.arange(2, r.size + 2))) | |
elif method == 1: | |
return np.sum(np.subtract(np.power(2, r), 1) / np.log2(np.arange(2, r.size + 2))) | |
else: | |
raise ValueError('method must be 0 or 1.') | |
return 0. | |
def ndcg_at_k(r, k, method=0): | |
"""Score is normalized discounted cumulative gain (ndcg) | |
Relevance is positive real values. Can use binary | |
as the previous methods. | |
>>> r = [3, 2, 3, 0, 1, 2] | |
>>> ndcg_at_k(r, 1) | |
1.0 | |
>>> ndcg_at_k(r, 1, method=1) | |
1.0 | |
>>> ndcg_at_k(r, 2) | |
0.87104906425515294 | |
>>> ndcg_at_k(r, 2, method=1) | |
0.77894125300883343 | |
>>> ndcg_at_k(r, 6) | |
0.96080819433606168 | |
>>> ndcg_at_k(r, 6, method=1) | |
0.94881074856789849 | |
Args: | |
r: Relevance scores (list or numpy array) in rank order | |
(first element is the most relevant item) | |
k: Number of results to consider | |
method: If 0 then sum rel_i / log2(i + 1) [not log2(i)] | |
If 1 then sum (2^rel_i - 1) / log2(i + 1) | |
Returns: | |
Normalized discounted cumulative gain | |
""" | |
dcg_max = dcg_at_k(sorted(r, reverse=True), k, method) | |
if not dcg_max: | |
return 0. | |
return dcg_at_k(r, k, method) / dcg_max | |
if __name__ == "__main__": | |
import doctest | |
doctest.testmod() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@gumption
Good finding :D
Thank you!