Last active
April 4, 2023 15:30
-
-
Save earonesty/cb9b18cf2f27aa533a48b0b7942d1cd7 to your computer and use it in GitHub Desktop.
UniqueQueue
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class TestUniqueQueue(unittest.TestCase): | |
def test_basic(self): | |
q = util.UniqueQueue() | |
for i in range(100): | |
q.put(i) | |
res = set(q) | |
for i in range(100): | |
self.assertIn(i, res) | |
def test_dupes(self): | |
q = util.UniqueQueue() | |
q.put("hello") | |
q.put("hello") | |
q.put("world") | |
res = list(q) | |
self.assertEqual(len(res), 2) | |
def test_custom_key(self): | |
q = util.UniqueQueue(key=lambda a: a.split(".")[0]) | |
q.put("hello.1") | |
q.put("hello.2") | |
q.put("world.3") | |
res = list(q) | |
assert res == ["hello.2", "world.3"] | |
def test_multithreaded_wait(self): | |
q = util.UniqueQueue() | |
def producer(): | |
nonlocal prod_ran | |
prod_ran = True | |
q.put("g") | |
# Run the test many times to increase odds of running into race condition | |
for _ in range(1000): | |
prod_ran = False | |
t = threading.Thread(target=producer, daemon=True) | |
t.start() | |
q.wait_for_value() | |
# This should fail if producer() did not run | |
self.assertTrue(prod_ran) | |
self.assertEqual(list(q), ["g"]) | |
t.join() | |
def test_multithreaded(self): | |
import string | |
# We're going to have 4 threads adding values | |
num_producers = 4 | |
num_running = 0 | |
b = threading.Barrier(num_producers + 1) | |
lock = threading.Lock() | |
q = util.UniqueQueue() | |
# Function to add a bunch of values to the queue | |
def adder(val: str): | |
nonlocal q, b, lock, num_running | |
# Increment the count of running producers | |
with lock: | |
num_running += 1 | |
# Wait for other threads to catch up | |
b.wait() | |
for i in range(1000): | |
q.put(f"{val}_{i}") | |
with lock: | |
num_running -= 1 | |
# Assign a letter per producer | |
letters = [string.ascii_lowercase[i] for i in range(num_producers)] | |
# Spin up a thread per producer | |
threads = [threading.Thread(target=adder, args=(letter,), daemon=True) for letter in letters] | |
for t in threads: | |
t.start() | |
results = [] | |
# Synchronization point | |
b.wait() | |
while True: | |
# Add all new values to the list of results | |
results.extend(q) | |
with lock: | |
# The other threads have exited cleanly, so add any straggling values | |
if num_running == 0: | |
results.extend(q) | |
break | |
# This shouldn't be waiting for long | |
for t in threads: | |
t.join() | |
expected = [] | |
for i in range(1000): | |
for letter in letters: | |
expected.append(f"{letter}_{i}") | |
self.assertEqual(sorted(results), sorted(expected)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from queue import Queue | |
class UniqueQueue(Queue): | |
"""A simple multi-producer, single-consumer queue that only stores a given element once. | |
This derives from Queue, and inherits much of the safety/functionality of Queue. | |
There is a potential race condition during iteration--when entries are being added, the consumer may miss the | |
very latest of them if there is contention. This class should not be used in cases where this matters. | |
""" | |
def __init__(self, maxsize=0, key=None): | |
self.key = key | |
self.queue = set() if key is None else {} | |
super().__init__(maxsize) | |
def _init(self, maxsize): | |
self.queue = set() if self.key is None else {} | |
def _qsize(self): | |
return len(self.queue) | |
def _put(self, item): | |
if self.key is None: | |
self.queue.add(item) | |
else: | |
self.queue[self.key(item)] = item | |
def clear(self): | |
"""Remove all elements from the queue""" | |
with self.mutex: | |
self.queue.clear() | |
self.not_full.notify_all() | |
def _get(self): | |
try: | |
if self.key is None: | |
return self.queue.pop() | |
else: | |
k = next(iter(self.queue)) | |
return self.queue.pop(k) | |
except (KeyError, StopIteration): | |
raise IndexError | |
def __iter__(self) -> Iterable[T]: | |
"""Iterate over the elements of the queue. | |
This will return as soon as there are no more values. It may thus be desirable to use it in a loop, as producer | |
threads can actively be adding content that will be missed otherwise. | |
""" | |
while self.queue: | |
yield self.get() | |
def wait_for_value(self): | |
"""Block until the set has at least one element.""" | |
# We don't need to lock because we're only going to support *one* consumer at once | |
with self.not_empty: | |
self.not_empty.wait_for(lambda: bool(self.queue)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment