Last active
November 9, 2021 04:02
-
-
Save luuil/4b01c2e6ce4ea0b1a0f7f29d94d537ac to your computer and use it in GitHub Desktop.
Find k largest or smallest(reverse is True) elements from stream data using built-in sorted function.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
class TopK(object): | |
def __init__(self, k, key=None, reverse=False) -> None: | |
""" Find k largest or smallest(reverse is True) elements from stream data. | |
""" | |
super(TopK, self).__init__() | |
self._heap = list() | |
self._k = k | |
self._key = key if key is not None else lambda x: x | |
self._reverse = not reverse | |
self._cmp_op = np.less if reverse else np.greater | |
self._minmax = max if reverse else min | |
def feed(self, element): | |
if len(self._heap) < self._k: | |
self._heap.append(element) | |
else: | |
top_idx = self._minmax(range(self._k), key=lambda i: self._key(self._heap[i])) | |
if self._cmp_op(self._key(element), self._key(self._heap[top_idx])): | |
self._heap[top_idx] = element | |
def getop(self): | |
return sorted(self._heap, key=self._key, reverse=self._reverse) | |
################################## main ################################## | |
def main_topk(): | |
arr = np.random.uniform(0, 1, 1000) | |
arr = [(f'dummy_{i}', v) for i, v in enumerate(arr)] | |
min1 = sorted(arr, key=lambda x: x[1], reverse=False)[:10] | |
max1 = sorted(arr, key=lambda x: x[1], reverse=True)[:10] | |
maxk = TopK(10, key=lambda x: x[1], reverse=False) | |
mink = TopK(10, key=lambda x: x[1], reverse=True) | |
for v in arr: | |
mink.feed(v) | |
maxk.feed(v) | |
min2 = mink.getop() | |
max2 = maxk.getop() | |
print(f'{min1}\n{min2}\n{min1==min2}') | |
print(f'{max1}\n{max2}\n{max1==max2}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment