Skip to content

Instantly share code, notes, and snippets.

@rt121212121
Last active October 29, 2024 17:56
Show Gist options
  • Save rt121212121/d8991a89497cc7a1962bd28fd437d1a4 to your computer and use it in GitHub Desktop.
Save rt121212121/d8991a89497cc7a1962bd28fd437d1a4 to your computer and use it in GitHub Desktop.
Cuckoo filter testing
from collections import defaultdict
import os
import bsvcuckoo
LOOKUP_COUNT = 1000000
ITERATIONS = 100
MAX_KICK_COUNT = 1
with open("test.csv", "w") as f:
print("maximum entries, added entries, minimum, maximum, average", file=f)
maximum_entries = 16000
while maximum_entries <= 512000:
false_positive_map: dict[int, list[int]] = defaultdict(list)
for i in range(ITERATIONS):
print(f"{i}, ", end="", flush=True)
filter = bsvcuckoo.CuckooFilter(maximum_entries, MAX_KICK_COUNT, 1644963952)
addition_count = 2000
last_addition_count = 0
while addition_count <= maximum_entries:
for i in range(last_addition_count, addition_count):
k = os.urandom(32)
filter.add(k)
false_positives = 0
for j in range(LOOKUP_COUNT):
k = os.urandom(32)
if filter.contains(k) == 0:
false_positives += 1
false_positive_map[addition_count].append(false_positives)
last_addition_count = addition_count
addition_count *= 2
print()
for addition_count, false_positive_counts in false_positive_map.items():
print(f"{maximum_entries:15}, {addition_count:13}, {min(false_positive_counts):7}, "
f"{max(false_positive_counts):7}, {sum(false_positive_counts)/ITERATIONS:7.1f}",
file=f)
maximum_entries *= 2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment