Skip to content

Instantly share code, notes, and snippets.

@achalddave
Last active April 24, 2016 19:53
Show Gist options
  • Save achalddave/8e9490efd348cdcabf21232358ec55eb to your computer and use it in GitHub Desktop.
Save achalddave/8e9490efd348cdcabf21232358ec55eb to your computer and use it in GitHub Desktop.
import argparse
import logging
import random
import os
import lmdb
from tqdm import tqdm
logging.getLogger().setLevel(logging.INFO)
logging.basicConfig(format='%(asctime)s.%(msecs).03d: %(message)s',
datefmt='%H:%M:%S')
# Allow adding @profile decorator even when kernprof isn't being used.
try:
profile
except NameError:
profile = lambda x: x
def mkdir_p(path):
"""Taken from <http://stackoverflow.com/a/600612/1291812>"""
try:
os.makedirs(path)
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise
@profile
def main():
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('lmdb_path')
parser.add_argument('tmpdir',
help='LMDB values will be stored in tmpdir/key')
args = parser.parse_args()
lmdb_path = args.lmdb_path
logging.info('Reading keys from LMDB.')
with lmdb.open(lmdb_path, readonly=True) as lmdb_environment:
with lmdb_environment.begin() as transaction:
cursor = transaction.cursor()
keys = [key for (key, _) in cursor]
# Copy keys; we do this in a separate line so we can profile the
# above line separately.
keys = [str(key) for key in keys]
logging.info('Read keys from LMDB.')
logging.info('Writing key-values to disk as files.')
mkdir_p(args.tmpdir)
with lmdb.open(lmdb_path, readonly=True) as lmdb_environment:
with lmdb_environment.begin() as transaction:
cursor = transaction.cursor()
for key in tqdm(keys):
with open('{}/{}'.format(args.tmpdir, key), 'wb') as f:
f.write(cursor.get(key))
logging.info('Wrote key-values to disk as files.')
# Randomly shuffle the keys, then search for them in the database.
random.shuffle(keys)
logging.info('Reading keys in random order from LMDB.')
with lmdb.open(lmdb_path, readonly=True) as lmdb_environment:
with lmdb_environment.begin() as transaction:
cursor = transaction.cursor()
for key in tqdm(keys):
cursor.get(key)
logging.info('Reading keys in random order from file system.')
for key in tqdm(keys):
filename = '{}/{}'.format(args.tmpdir, key)
with open(filename) as f:
f.read()
logging.info('Finished reading keys.')
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment