Last active
April 28, 2020 09:07
-
-
Save pythonhacker/a10df1c015b81a872946ee40de7a8a82 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
A simple get function which caches responses for a URL | |
using a LFU cache | |
""" | |
import requests | |
from collections import Counter | |
class LFUCache(object): | |
""" Least frequently used cache with caching done on SET """ | |
def __init__(self, maxsize=128): | |
self.cache = {} | |
self.maxsize = maxsize | |
# eviction is 5% of maxsize | |
self.watermark = max(1, int(0.05*maxsize)) | |
self.counter = Counter() | |
def __contains__(self, key): | |
return key in self.cache | |
def __setitem__(self, key, val): | |
""" Set an item into the cache """ | |
# Ideally the counter increment should be done on GET. | |
# It is left as an exercise to the reader. | |
if key in self.cache: | |
# No need to evict anything - Just update | |
self.cache[key] = val | |
else: | |
# Check size | |
if len(self.cache)==self.maxsize: | |
self.evict() | |
self.cache[key] = val | |
self.counter[key] += 1 | |
def __getitem__(self, key): | |
""" Get an item from the cache """ | |
if key in self.cache: | |
return self.cache[key] | |
def evict(self): | |
""" Evict less frequently requested items """ | |
# Ideally should block reads when doing this. | |
evict_keys = [item[0] for item in self.counter.most_common()[-self.watermark:]] | |
# print('Evict keys',evict_keys) | |
for key in evict_keys: | |
del self.cache[key] | |
del self.counter[key] | |
def get(url, cache=LFUCache()): | |
""" Get of a URL with LFU caching """ | |
# CAVEAT: Uses side-effect of mutable default arguments | |
# as a feature! Use a decorator for "production" code. | |
if url in cache: | |
# print('From cache') | |
return cache[url] | |
else: | |
response = requests.get(url, timeout=15) | |
cache[url] = response | |
return response | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment