Skip to content

Instantly share code, notes, and snippets.

@kawa-kokosowa
Created November 23, 2017 11:01
Show Gist options
  • Save kawa-kokosowa/59da8e964bdc26f46ee3e3164c6a8b4d to your computer and use it in GitHub Desktop.
Save kawa-kokosowa/59da8e964bdc26f46ee3e3164c6a8b4d to your computer and use it in GitHub Desktop.
Jaccard Similarity of Bags (python3; builtin only)
"""Jaccard Similarity of Bags
Using artificial restriciton of using only builtin Python 3 only.
"""
import doctest
def multiset_intersection_cardinality(x: list, y: list) -> int:
"""Returns the number of elements of x and y intersection."""
cardinality = 0
fewest, most = (x, y) if len(x) < len(y) else (y, x)
most = most.copy()
for value in fewest:
try:
most.remove(value)
except ValueError:
pass
else:
cardinality += 1
return cardinality
def multiset_union_cardinality(x: list, y: list) -> int:
"""Return the number of elements in both x and y."""
return len(x) + len(y)
def jaccard_similarity_bags(x: list, y: list) -> float:
"""Get the Jaccard similarity of two bags (aka multisets).
Example:
>>> jaccard_similarity_bags([1,1,1,2], [1,1,2,2,3])
0.3333333333333333
>>> jaccard_similarity_bags([1,1,1,2], [1,2,3,4])
0.25
>>> jaccard_similarity_bags([1,1,2,2,3], [1,2,3,4])
0.3333333333333333
"""
intersection_cardinality = multiset_intersection_cardinality(x, y)
union_cardinality = multiset_union_cardinality(x, y)
return (
intersection_cardinality
/ union_cardinality
)
if __name__ == "__main__":
doctest.testmod()
@kawa-kokosowa
Copy link
Author

The docstring for multiset_union_cardinality is wrong

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment