-
-
Save danielhao5/fdda6391d6cc10e9c92b4e2bbe0d98b9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import perfplot | |
from collections import defaultdict, Counter | |
from itertools import chain | |
from pyroaring import BitMap | |
def setup(k): | |
lss = [f"list{i}" for i in range(1, k + 1)] | |
labels = [f"label{i}" for i in range(1, 101)] | |
data = {ls: random.sample(labels, k=10) for ls in lss} | |
return data | |
def baseline(data): | |
data = {key: set(value) for key, value in data.items()} | |
output = {key1: {key2: 0 for key2 in data.keys()} for key1 in data.keys()} | |
for key1, value1 in data.items(): | |
for key2, value2 in data.items(): | |
for element in value1: | |
if element in value2: | |
count = output[key1][key2] | |
output[key1][key2] = count + 1 | |
return output | |
def inverted_index(data): | |
index = defaultdict(list) | |
for key, values in data.items(): | |
for value in values: | |
index[value].append(key) | |
result = {key: Counter(chain.from_iterable(index[label] for label in labels)) for key, labels in data.items()} | |
return {key: {k: value.get(k, 0) for k in data} for key, value in result.items()} | |
def roaring_bitmap(data): | |
# all labels | |
labels = set().union(*data.values()) | |
# lookup mapping to an integer | |
lookup = {key: value for value, key in enumerate(labels)} | |
roaring_data = {key: BitMap(lookup[v] for v in value) for key, value in data.items()} | |
result = defaultdict(dict) | |
for k_out, outer in roaring_data.items(): | |
for k_in, inner in roaring_data.items(): | |
result[k_out][k_in] = len(outer & inner) | |
return result | |
def equality_check(a, b): | |
for k_out, d in a.items(): | |
for k_in, value in d.items(): | |
try: | |
if b[k_out][k_in] != value: | |
return False | |
except KeyError: | |
return False | |
return True | |
if __name__ == "__main__": | |
perfplot.show( | |
setup=setup, | |
n_range=[length for length in range(100, 5000, 100)], | |
kernels=[baseline, inverted_index, roaring_bitmap], | |
labels=["baseline", "inverted_index", "roaring_bitmap"], | |
xlabel="len(data)", | |
equality_check=equality_check, | |
relative_to=0, | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment