Last active
April 24, 2016 19:53
-
-
Save achalddave/8e9490efd348cdcabf21232358ec55eb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import logging | |
import random | |
import os | |
import lmdb | |
from tqdm import tqdm | |
logging.getLogger().setLevel(logging.INFO) | |
logging.basicConfig(format='%(asctime)s.%(msecs).03d: %(message)s', | |
datefmt='%H:%M:%S') | |
# Allow adding @profile decorator even when kernprof isn't being used. | |
try: | |
profile | |
except NameError: | |
profile = lambda x: x | |
def mkdir_p(path): | |
"""Taken from <http://stackoverflow.com/a/600612/1291812>""" | |
try: | |
os.makedirs(path) | |
except OSError as exc: # Python >2.5 | |
if exc.errno == errno.EEXIST and os.path.isdir(path): | |
pass | |
else: | |
raise | |
@profile | |
def main(): | |
parser = argparse.ArgumentParser( | |
description=__doc__, | |
formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |
parser.add_argument('lmdb_path') | |
parser.add_argument('tmpdir', | |
help='LMDB values will be stored in tmpdir/key') | |
args = parser.parse_args() | |
lmdb_path = args.lmdb_path | |
logging.info('Reading keys from LMDB.') | |
with lmdb.open(lmdb_path, readonly=True) as lmdb_environment: | |
with lmdb_environment.begin() as transaction: | |
cursor = transaction.cursor() | |
keys = [key for (key, _) in cursor] | |
# Copy keys; we do this in a separate line so we can profile the | |
# above line separately. | |
keys = [str(key) for key in keys] | |
logging.info('Read keys from LMDB.') | |
logging.info('Writing key-values to disk as files.') | |
mkdir_p(args.tmpdir) | |
with lmdb.open(lmdb_path, readonly=True) as lmdb_environment: | |
with lmdb_environment.begin() as transaction: | |
cursor = transaction.cursor() | |
for key in tqdm(keys): | |
with open('{}/{}'.format(args.tmpdir, key), 'wb') as f: | |
f.write(cursor.get(key)) | |
logging.info('Wrote key-values to disk as files.') | |
# Randomly shuffle the keys, then search for them in the database. | |
random.shuffle(keys) | |
logging.info('Reading keys in random order from LMDB.') | |
with lmdb.open(lmdb_path, readonly=True) as lmdb_environment: | |
with lmdb_environment.begin() as transaction: | |
cursor = transaction.cursor() | |
for key in tqdm(keys): | |
cursor.get(key) | |
logging.info('Reading keys in random order from file system.') | |
for key in tqdm(keys): | |
filename = '{}/{}'.format(args.tmpdir, key) | |
with open(filename) as f: | |
f.read() | |
logging.info('Finished reading keys.') | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment