Skip to content

Instantly share code, notes, and snippets.

@luuil
Last active November 9, 2021 04:02
Show Gist options
  • Save luuil/4b01c2e6ce4ea0b1a0f7f29d94d537ac to your computer and use it in GitHub Desktop.
Save luuil/4b01c2e6ce4ea0b1a0f7f29d94d537ac to your computer and use it in GitHub Desktop.
Find k largest or smallest(reverse is True) elements from stream data using built-in sorted function.
import numpy as np
class TopK(object):
def __init__(self, k, key=None, reverse=False) -> None:
""" Find k largest or smallest(reverse is True) elements from stream data.
"""
super(TopK, self).__init__()
self._heap = list()
self._k = k
self._key = key if key is not None else lambda x: x
self._reverse = not reverse
self._cmp_op = np.less if reverse else np.greater
self._minmax = max if reverse else min
def feed(self, element):
if len(self._heap) < self._k:
self._heap.append(element)
else:
top_idx = self._minmax(range(self._k), key=lambda i: self._key(self._heap[i]))
if self._cmp_op(self._key(element), self._key(self._heap[top_idx])):
self._heap[top_idx] = element
def getop(self):
return sorted(self._heap, key=self._key, reverse=self._reverse)
################################## main ##################################
def main_topk():
arr = np.random.uniform(0, 1, 1000)
arr = [(f'dummy_{i}', v) for i, v in enumerate(arr)]
min1 = sorted(arr, key=lambda x: x[1], reverse=False)[:10]
max1 = sorted(arr, key=lambda x: x[1], reverse=True)[:10]
maxk = TopK(10, key=lambda x: x[1], reverse=False)
mink = TopK(10, key=lambda x: x[1], reverse=True)
for v in arr:
mink.feed(v)
maxk.feed(v)
min2 = mink.getop()
max2 = maxk.getop()
print(f'{min1}\n{min2}\n{min1==min2}')
print(f'{max1}\n{max2}\n{max1==max2}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment