Created
January 14, 2016 12:54
-
-
Save FrancescAlted/8e87c8762a49cf5fc897 to your computer and use it in GitHub Desktop.
A demonstration of a simple key-value store using numpy (.npz) and bcolz (ctable)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Benchmark to compare the times for storing numpy arrays in a key-value. | |
# The main point is to compare numpy serialization vs a bcolz approach. | |
from __future__ import print_function | |
import sys | |
import os | |
import os.path | |
import subprocess | |
import getopt | |
import shutil | |
from time import time | |
import numpy as np | |
import bcolz | |
from bcolz.py2help import xrange | |
# default for options | |
keystore_dir = None | |
flavor = "numpy" | |
nkeys = 100 | |
max_entries = 1e5 | |
cname = "blosclz" | |
clevel = 9 | |
# A time reference | |
tref = 0 | |
class KeyStore(object): | |
"""Class that provides a key-value store on-disk for numpy arrays. | |
""" | |
def __init__(self, flavor, keystore_dir): | |
if os.path.exists(keystore_dir): | |
shutil.rmtree(keystore_dir) | |
os.mkdir(keystore_dir) | |
self.keystore_dir = keystore_dir | |
self.flavor = flavor | |
def __getitem__(self, key): | |
keypath = os.path.join(self.keystore_dir, key) | |
if self.flavor == "numpy": | |
keypath += ".npz" | |
if not os.path.exists(keypath): | |
raise KeyError | |
if self.flavor == "numpy": | |
diskobj = np.load(keypath) | |
elif self.flavor == "bcolz": | |
diskobj = bcolz.ctable(rootdir=keypath) | |
return (diskobj['arr1'][:], diskobj['arr2'][:]) | |
def __setitem__(self, key, arrs): | |
keypath = os.path.join(self.keystore_dir, key) | |
if os.path.exists(keypath): | |
# Remove the existing entry | |
shutil.rmtree(keystore_dir) | |
arr1, arr2 = arrs | |
if self.flavor == "numpy": | |
if clevel > 0: | |
np.savez_compressed(keypath, arr1=arr1, arr2=arr2) | |
else: | |
np.savez(keypath, arr1=arr1, arr2=arr2) | |
elif self.flavor == "bcolz": | |
bcolz.ctable(columns=(arr1, arr2), | |
names=("arr1", "arr2"), | |
rootdir=keypath, | |
cparams=bcolz.cparams(clevel=clevel, cname=cname)) | |
def show_time(explain): | |
"Show the used time and RSS memory (only works for Linux > 2.6.x)." | |
global tref | |
# Build the command to obtain memory info | |
newtref = time() | |
print("Time (%20s) --> %.3f" % (explain, newtref - tref), end="") | |
tref = newtref | |
print() | |
def enter(): | |
global tref | |
tref = time() | |
def after_create(mess=""): | |
global tref | |
if mess: mess = ", " + mess | |
show_time("creation" + mess) | |
def after_query(mess=""): | |
global tref | |
if mess: mess = ", " + mess | |
show_time("query" + mess) | |
def test_flavor(): | |
enter() | |
print("Building database. Wait please...") | |
keystore = KeyStore(flavor, keystore_dir) | |
for key in range(nkeys): | |
nentries = np.random.randint(0, max_entries) | |
arr1 = np.random.randint(0, 1000, nentries) | |
arr2 = 1e9 + np.random.rand(nentries) | |
keystore[str(key)] = (arr1, arr2) | |
after_create() | |
# Query 100 arbiratry keys | |
keys = np.random.randint(0, nkeys, 100) | |
print("Retrieving 100 keys in arbitrary order...") | |
elem_out = 0 | |
for key in keys: | |
out = keystore[str(key)] | |
elem_out += len(out[0]) + len(out[1]) | |
after_query() | |
return elem_out | |
if __name__ == "__main__": | |
#global flavor, keystore_dir, nkeys, max_entries, clevel, cname | |
usage = """\ | |
usage: %s [-f flavor] [-d dir ][-k nkeys] [-n max_entries] [-c cname] [-l clevel] | |
-f select the flavor: '%s' (def.), 'bcolz' | |
-d The directory for doing the bench (def: '%s') | |
-k the number of keys in store (def. '%d') | |
-m the maximum number of elements in arrays (def. '%d') | |
-c the compressor name (def. '%s') | |
-l the compression level (def. %d) | |
""" % (sys.argv[0], flavor, keystore_dir, nkeys, max_entries, cname, clevel) | |
try: | |
opts, pargs = getopt.getopt(sys.argv[1:], 'sf:d:k:m:c:l:') | |
except: | |
sys.stderr.write(usage) | |
sys.exit(1) | |
# Get the options | |
for option in opts: | |
if option[0] == '-s': | |
show = True | |
elif option[0] == '-f': | |
flavor = option[1] | |
elif option[0] == '-d': | |
keystore_dir = option[1] | |
elif option[0] == '-k': | |
nkeys = int(option[1]) | |
elif option[0] == '-m': | |
max_entries = int(option[1]) | |
elif option[0] == '-c': | |
cname = option[1] | |
elif option[0] == '-l': | |
clevel = int(option[1]) | |
if not keystore_dir: | |
raise ValueError( | |
"Please specify into which directory the keystore will go. " | |
"BEWARE: all its contents will be nuked!") | |
np.random.seed(12) # so as to get reproducible results | |
if flavor == "numpy": | |
mess = "numpy (via .npz files)" | |
elif flavor == "bcolz": | |
mess = "bcolz (via ctable(clevel=%d, cname='%s')" % (clevel, cname) | |
else: | |
raise ValueError() | |
print("########## Checking method: %s ############" % mess) | |
out = test_flavor() | |
print("Number of elements out of getitem:", out) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment