Skip to content

Instantly share code, notes, and snippets.

@pythonhacker
Last active April 28, 2020 09:07
Show Gist options
  • Save pythonhacker/a10df1c015b81a872946ee40de7a8a82 to your computer and use it in GitHub Desktop.
Save pythonhacker/a10df1c015b81a872946ee40de7a8a82 to your computer and use it in GitHub Desktop.
"""
A simple get function which caches responses for a URL
using a LFU cache
"""
import requests
from collections import Counter
class LFUCache(object):
""" Least frequently used cache with caching done on SET """
def __init__(self, maxsize=128):
self.cache = {}
self.maxsize = maxsize
# eviction is 5% of maxsize
self.watermark = max(1, int(0.05*maxsize))
self.counter = Counter()
def __contains__(self, key):
return key in self.cache
def __setitem__(self, key, val):
""" Set an item into the cache """
# Ideally the counter increment should be done on GET.
# It is left as an exercise to the reader.
if key in self.cache:
# No need to evict anything - Just update
self.cache[key] = val
else:
# Check size
if len(self.cache)==self.maxsize:
self.evict()
self.cache[key] = val
self.counter[key] += 1
def __getitem__(self, key):
""" Get an item from the cache """
if key in self.cache:
return self.cache[key]
def evict(self):
""" Evict less frequently requested items """
# Ideally should block reads when doing this.
evict_keys = [item[0] for item in self.counter.most_common()[-self.watermark:]]
# print('Evict keys',evict_keys)
for key in evict_keys:
del self.cache[key]
del self.counter[key]
def get(url, cache=LFUCache()):
""" Get of a URL with LFU caching """
# CAVEAT: Uses side-effect of mutable default arguments
# as a feature! Use a decorator for "production" code.
if url in cache:
# print('From cache')
return cache[url]
else:
response = requests.get(url, timeout=15)
cache[url] = response
return response
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment