Skip to content

Instantly share code, notes, and snippets.

@MercuryRising
Created October 13, 2012 20:06
Show Gist options
  • Save MercuryRising/3885957 to your computer and use it in GitHub Desktop.
Save MercuryRising/3885957 to your computer and use it in GitHub Desktop.
Caching Files
import redis
import time
import random
def load_file(fp, fpKey, r, expiry):
with open(fp, "rb") as f:
data = f.read()
p = r.pipeline()
p.set(fpKey, data)
p.expire(fpKey, expiry)
p.execute()
return data
# For use with a file list
def cache_or_get_gen_files(fps, expiry=300, r=redis.Redis(db=5)):
fpKeys = []
for fp in fps:
fpKey = "cached:"+fp
fpKeys.append(fpKey)
load_file(fp, fpKey, r, expiry)
while True:
yield load_file(fp, fpKey, r, expiry)
t = time.time()
while time.time() - t - expiry < 0:
yield r.get(random.choice(fpKeys))
def cache_or_get_gen(fp, expiry=300, r=redis.Redis(db=5)):
fpKey = "cached:"+fp
while True:
yield load_file(fp, fpKey, r, expiry)
t = time.time()
while time.time() - t - expiry < 0:
yield r.get(fpKey)
def cache_or_get(fp, expiry=300, r=redis.Redis(db=5)):
fpKey = "cached:"+fp
data = r.get(fpKey)
if data:
return data
else:
with open(fp, "rb") as f:
data = f.read()
p = r.pipeline()
p.set(fpKey, data)
p.expire(fpKey, expiry)
p.execute()
return data
def mem_cache(fp):
files = {}
while True:
if files.get(fp):
yield files[fp]
else:
with open(fp, "rb") as f:
data = f.readlines()
yield data
files[fp] = data
def stressTest(fp, trials = 40000):
r = redis.Redis(db=5)
# Read the file x number of times
a = time.time()
for x in range(trials):
with open(random.choice(fps), "rb") as f:
data = f.read()
b = time.time()
readAvg = trials/(b-a)
# Generator version
# Read the file, cache it, read it with a new instance each time
a = time.time()
gen = cache_or_get_gen(random.choice(fps), r=r)
for x in range(trials):
data = next(gen)
b = time.time()
cachedAvgGen = trials/(b-a)
# Non generator version
# Read the file, cache it, read it with a new instance each time
a = time.time()
for x in range(trials):
data = cache_or_get(random.choice(fps), r=r)
b = time.time()
cachedAvg = trials/(b-a)
# Read file, cache it in python object
a = time.time()
for x in range(trials):
data = mem_cache(random.choice(fps))
b = time.time()
memCachedAvg = trials/(b-a)
print "Total number of files: %s" %len(fps)
print "%s file reads: %.2f reads/second\n" %(trials, readAvg)
print "Yielding from generators for data:"
print "single redis instance: %.2f reads/second (%.2f percent)" %(cachedAvgGen, (100*(cachedAvgGen-readAvg)/(readAvg)))
print "\nFunction calls to get data:"
print "single redis instance: %.2f reads/second (%.2f percent)" %(cachedAvg, (100*(cachedAvg-readAvg)/(readAvg)))
print "\npython cached object: %.2f reads/second (%.2f percent)" %(memCachedAvg, (100*(memCachedAvg-readAvg)/(readAvg)))
if __name__ == "__main__":
numFiles = 700
# Input: a file that contains a list of file names (you could do this any other way too)
pathToFileList = ''
with open(, "rb") as f:
files = f.readlines()
# The file list I have has way too many files in it
fil = [random.choice(files) for x in range(numFiles)]
fps = []
for fp in fil:
fp = "FilePathBase/%s" %fp.strip()
fps.append(fp)
stressTest(fps)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment