Skip to content

Instantly share code, notes, and snippets.

@joocer
Created September 14, 2022 14:05
Show Gist options
  • Save joocer/af73660cb2f09d2f485083ded80d0f0f to your computer and use it in GitHub Desktop.
Save joocer/af73660cb2f09d2f485083ded80d0f0f to your computer and use it in GitHub Desktop.
LRU-K implementation in Python
"""
LRU-K evicts the page whose K-th most recent access is furthest in the past.
This is a basic implementation of LRU-2, which evicts entries according to the time
of their penultimate access. The main benefit of this approach is to prevent
a problem when the items being checked exceeds the number of items in the cache. A
classic LRU will evict and repopulate the cache for every call. LRU-2 reduces the
likelihood of this, but not preferring the MRU item to be retained.
LRU-K should be used in conjunction with eviction limits per query - this appears to
broadly be the solution used by Postgres. This can be supported by the calling
function using the return from the .set call to determine if an item was evicted from
the cache.
This can also be used as the index for an external cache (for example in plasma), where
the .set returns the evicted item which the calling function can then evict from the
external cache.
"""
import time
import numpy
class lru_2():
def __init__(self, **kwargs):
"""
Parameters:
size: int (optional)
The maximim number of items maintained in the cache.
"""
self._size = int(kwargs.get("size", 50))
self._cache = {}
self._hits = 0
self._misses = 0
def get(self, key):
"""
Parameters:
key: any (hashable)
The key to reference the cached item
"""
# we're disposing of access_2 (the penultimate access), recording this access
# as the latest and making the access_1 the new penultimate access
(value, access_1, _) = self._cache.get(key, (None, None, None))
if value:
self._cache[key] = (value, time.time_ns(), access_1)
self._hits += 1
return value
self._misses += 1
return None
def set(self, key, value):
"""
Parameters:
key: any (hashable)
The key to reference the cached item
value: any
The value to hold in the cache
"""
# if we're already in the cache - do nothing
if key in self._cache:
return None
# create an initial entry for the new item
clock = time.monotonic_ns()
self._cache[key] = (value, clock, clock)
# If we're full, we want to remove an item from the cache.
# We choose the item to remove based on the penultimate access for that item.
if len(self._cache) > self._size:
keys = tuple(self._cache.keys())
accesses = (c[2] for c in self._cache.values())
least_recently_used = numpy.argmin(accesses)
evicted_key = keys[least_recently_used]
self._cache.pop(evicted_key)
return evicted_key
return None
@property
def stats(self):
# return hits, misses
return (self._hits, self._misses)
if __name__ == "__main__":
cache = lru_2(size=95)
t = time.time_ns()
for i in range(500000):
new_value = int(numpy.random.uniform(0, 100))
cache.get(new_value)
cache.set(new_value, new_value)
print((time.time_ns() - t) / 1e9)
print(f"Hit ratio: {int(cache.stats[0] / cache.stats[1])}:1")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment