Created
September 14, 2022 14:05
-
-
Save joocer/af73660cb2f09d2f485083ded80d0f0f to your computer and use it in GitHub Desktop.
LRU-K implementation in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
LRU-K evicts the page whose K-th most recent access is furthest in the past. | |
This is a basic implementation of LRU-2, which evicts entries according to the time | |
of their penultimate access. The main benefit of this approach is to prevent | |
a problem when the items being checked exceeds the number of items in the cache. A | |
classic LRU will evict and repopulate the cache for every call. LRU-2 reduces the | |
likelihood of this, but not preferring the MRU item to be retained. | |
LRU-K should be used in conjunction with eviction limits per query - this appears to | |
broadly be the solution used by Postgres. This can be supported by the calling | |
function using the return from the .set call to determine if an item was evicted from | |
the cache. | |
This can also be used as the index for an external cache (for example in plasma), where | |
the .set returns the evicted item which the calling function can then evict from the | |
external cache. | |
""" | |
import time | |
import numpy | |
class lru_2(): | |
def __init__(self, **kwargs): | |
""" | |
Parameters: | |
size: int (optional) | |
The maximim number of items maintained in the cache. | |
""" | |
self._size = int(kwargs.get("size", 50)) | |
self._cache = {} | |
self._hits = 0 | |
self._misses = 0 | |
def get(self, key): | |
""" | |
Parameters: | |
key: any (hashable) | |
The key to reference the cached item | |
""" | |
# we're disposing of access_2 (the penultimate access), recording this access | |
# as the latest and making the access_1 the new penultimate access | |
(value, access_1, _) = self._cache.get(key, (None, None, None)) | |
if value: | |
self._cache[key] = (value, time.time_ns(), access_1) | |
self._hits += 1 | |
return value | |
self._misses += 1 | |
return None | |
def set(self, key, value): | |
""" | |
Parameters: | |
key: any (hashable) | |
The key to reference the cached item | |
value: any | |
The value to hold in the cache | |
""" | |
# if we're already in the cache - do nothing | |
if key in self._cache: | |
return None | |
# create an initial entry for the new item | |
clock = time.monotonic_ns() | |
self._cache[key] = (value, clock, clock) | |
# If we're full, we want to remove an item from the cache. | |
# We choose the item to remove based on the penultimate access for that item. | |
if len(self._cache) > self._size: | |
keys = tuple(self._cache.keys()) | |
accesses = (c[2] for c in self._cache.values()) | |
least_recently_used = numpy.argmin(accesses) | |
evicted_key = keys[least_recently_used] | |
self._cache.pop(evicted_key) | |
return evicted_key | |
return None | |
@property | |
def stats(self): | |
# return hits, misses | |
return (self._hits, self._misses) | |
if __name__ == "__main__": | |
cache = lru_2(size=95) | |
t = time.time_ns() | |
for i in range(500000): | |
new_value = int(numpy.random.uniform(0, 100)) | |
cache.get(new_value) | |
cache.set(new_value, new_value) | |
print((time.time_ns() - t) / 1e9) | |
print(f"Hit ratio: {int(cache.stats[0] / cache.stats[1])}:1") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment