Last active
January 10, 2023 04:31
-
-
Save kell18/beee8a6c72879d56b7d61b520f8a2b65 to your computer and use it in GitHub Desktop.
NDCG for recommender systems
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from math import log | |
import unittest | |
def dcg_at_k(scores): | |
assert scores | |
return scores[0] + sum(sc / log(ind, 2) for sc, ind in zip(scores[1:], range(2, len(scores)+1))) | |
def ndcg_at_k(predicted_scores, user_scores): | |
assert len(predicted_scores) == len(user_scores) | |
idcg = dcg_at_k(sorted(user_scores, reverse=True)) | |
return (dcg_at_k(predicted_scores) / idcg) if idcg > 0.0 else 0.0 | |
class TestMetrics(unittest.TestCase): | |
def test_dcg_small(self): | |
scores = [3, 2] | |
self.assertAlmostEqual(dcg_at_k(scores), 5.0) | |
def test_dcg_large(self): | |
scores = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0] | |
self.assertAlmostEqual(dcg_at_k(scores), 9.6051177391888114) | |
def test_ndcg(self): | |
predicted1 = [.4, .1, .8] | |
predicted2 = [.0, .1, .4] | |
predicted3 = [.4, .1, .0] | |
actual = [.8, .4, .1, .0] | |
self.assertAlmostEqual(ndcg_at_k(predicted1, actual[:3]), 0.795, 3) | |
self.assertAlmostEqual(ndcg_at_k(predicted2, actual[:3]), 0.279, 3) | |
self.assertAlmostEqual(ndcg_at_k(predicted3, actual[:3]), 0.396, 3) |
Can you give me an idea of how to use your function if I have a vector of binary (ground truth) labels and then an output from an ALS model, for example: [ 1.09253478e-01 1.97033856e-01 5.51080274e-01 ..., 1.77992064e-03 1.90066773e-12 1.74711004e-04]
When evaluation my model using AUC, I can just feed in the binary ground truth vector and the output from my ALS model as the predicted scores as is, but I am wondering how this would work with your model if I am considering, for example, k=10 recommendations and would like to use NDCG to evaluate the output.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is a better implementation than https://gist.github.com/bwhite/3726239 because the ideal ranking should include all items in the collection, not only the retrieved items.