Skip to content

Instantly share code, notes, and snippets.

@flavio-fernandes
Last active June 7, 2019 09:58
Show Gist options
  • Save flavio-fernandes/6123538d68e659dd10482d64e0336662 to your computer and use it in GitHub Desktop.
Save flavio-fernandes/6123538d68e659dd10482d64e0336662 to your computer and use it in GitHub Desktop.
Goggle interview challenge program with Joshua
#!/usr/bin/env python3
# Joshua challenge program
#
# Write a cache api that:
#
# - Is in memory storage;
# - Holds up to n bytes, based on strings for key and data;
# - Uses lru to evict entries to make up space for new entries;
# - Assume key and data are strings of variable lengths.
#
class LruEntry():
def __init__(self, key=None, prev=None, next=None):
self.key = key
self.prev = prev
self.next = next
class DataEntry():
def __init__(self, data=None, memorySize=None, lruEntry=None):
self.data = data
self.memorySize = memorySize
self.lruEntry = lruEntry
class Cache():
def __init__(self, maxMemory):
self.entries = {}
self.memoryAvailable = maxMemory
self.lruHead = self.lruTail = None
def set(self, key, data):
"""
Add or refresh entry in cache
"""
# Avoid dealing with double counting by simply removing entry that may
# exist under the same key. (TODO: this could be made smarter)
self._removeDataEntry(key)
memorySize = self._calculatedataEntrySize(key, data)
if self.memoryAvailable < memorySize:
self._evictEntries(memorySize - self.memoryAvailable)
if self.memoryAvailable < memorySize:
# If we still do not have enough memory available, simply
# pretend that we did anything useful. (TODO: or maybe assert?)
return
self.memoryAvailable -= memorySize
lruEntry = LruEntry(key, None, None)
self.entries[key] = DataEntry(data, memorySize, lruEntry)
self._addLruToDblLinkedList(lruEntry)
def get(self, key):
"""
Lookup cache entry
"""
if key not in self.entries:
return (False, None)
dataEntry = self.entries[key]
self._refreshLru(dataEntry.lruEntry)
return (True, dataEntry.data)
@staticmethod
def _calculatedataEntrySize(key, data):
# Estimate memory footprint by accounting key on lru and entries
# in addition to the length of data. This does not account for the
# other pointer like metadata for sake of simplicity.
return 2 * len(key) + len(data)
def _evictEntries(self, memoryNeeded):
availableNeeded = self.memoryAvailable + memoryNeeded
# Delete data entries based on lru until enough memory becomes available
# or we delete everything.
while self.lruTail and self.memoryAvailable < availableNeeded:
self._removeDataEntry(self.lruTail.key)
def _removeDataEntry(self, key):
dataEntry = self.entries.get(key)
if not dataEntry:
return # noop
self._removeLruFromDblLinkedList(dataEntry.lruEntry)
self.memoryAvailable += dataEntry.memorySize
del self.entries[key]
def _addLruToDblLinkedList(self, lruEntry):
assert lruEntry.prev is None
assert lruEntry.next is None
if self.lruTail is None:
assert self.lruHead is None
self.lruTail = lruEntry
else:
assert self.lruHead is not None
self.lruHead.prev = lruEntry
lruEntry.next = self.lruHead
self.lruHead = lruEntry
def _removeLruFromDblLinkedList(self, lruEntry):
if self.lruHead is lruEntry:
self.lruHead = lruEntry.next
if self.lruTail is lruEntry:
self.lruTail = lruEntry.prev
if lruEntry.next:
assert lruEntry.next.prev is lruEntry
lruEntry.next.prev = lruEntry.prev
if lruEntry.prev:
assert lruEntry.prev.next is lruEntry
lruEntry.prev.next = lruEntry.next
lruEntry.prev = lruEntry.next = None
def _refreshLru(self, lruEntry):
# Noop if lruEntry already is refreshed (i.e. head in lru).
if self.lruHead is not lruEntry:
self._removeLruFromDblLinkedList(lruEntry)
self._addLruToDblLinkedList(lruEntry)
# =-=-=-=-=-
# Driver test to exercise Cache routines.
#
def main():
oneSize = Cache._calculatedataEntrySize("1", "one")
twoSize = Cache._calculatedataEntrySize("2", "two")
cacheMemSize = oneSize + twoSize
c = Cache(cacheMemSize)
assert c.memoryAvailable == cacheMemSize
# Insert an entry and make sure that is okay.
c.set("1", "one")
oneFound, one = c.get("1")
assert oneFound
assert one == "one"
assert oneSize == 2 * len("1") + len(one)
assert c.memoryAvailable == cacheMemSize - oneSize
print("1 => {}".format(one))
# Insert another entry and make sure that is okay.
c.set("2", "two")
twoFound, two = c.get("2")
assert twoFound
assert two == "two"
assert c.memoryAvailable == cacheMemSize - (oneSize + twoSize)
print("2 => {}".format(two))
# Insert another entry and make sure "one" gets evicted to make room.
c.set("3", "x")
xFound, x = c.get("3")
assert xFound
assert x == "x"
xSize = c._calculatedataEntrySize("x", x)
assert c.memoryAvailable == cacheMemSize - (twoSize + xSize)
print("3 => {}".format(x))
oneFound, one = c.get("1")
assert not oneFound
assert one is None
twoFound, two = c.get("2")
assert twoFound
assert two == "two"
# Re-insert 3 and make sure x becomes xxx.
c.set("3", "xxx")
xxxFound, xxx = c.get("3")
assert xxxFound
assert xxx == "xxx"
xxxSize = c._calculatedataEntrySize("3", xxx)
assert c.memoryAvailable == cacheMemSize - (twoSize + xxxSize)
print("3 => {}".format(xxx))
# Explicitly remove xxx.
c._removeDataEntry("3")
xxxFound, xxx = c.get("3")
assert not xxxFound
assert xxx is None
# Try to insert something that just wont fit. This has the side effect of
# evicting everything.
c.set("TOO_BIG", "B" * cacheMemSize)
assert c.memoryAvailable == cacheMemSize
assert not c.entries
assert not c.lruHead
assert not c.lruTail
print("Happy happy, joy joy!!!")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment