Skip to content

Instantly share code, notes, and snippets.

@achalddave
Last active April 24, 2016 19:53

Revisions

  1. achalddave revised this gist Apr 24, 2016. 1 changed file with 58 additions and 0 deletions.
    58 changes: 58 additions & 0 deletions timings.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,58 @@
    Timer unit: 1e-06 s

    Total time: 3852.08 s
    File: time_lmdb.py
    Function: main at line 30

    Line # Hits Time Per Hit % Time Line Contents
    ==============================================================
    30 @profile
    31 def main():
    32 1 2 2.0 0.0 parser = argparse.ArgumentParser(
    33 1 1 1.0 0.0 description=__doc__,
    34 1 1482 1482.0 0.0 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    35 1 59 59.0 0.0 parser.add_argument('lmdb_path')
    36 1 1 1.0 0.0 parser.add_argument('tmpdir',
    37 1 48 48.0 0.0 help='LMDB values will be stored in tmpdir/key')
    38
    39 1 550 550.0 0.0 args = parser.parse_args()
    40 1 1 1.0 0.0 lmdb_path = args.lmdb_path
    41
    42 1 144 144.0 0.0 logging.info('Reading keys from LMDB.')
    43 1 63 63.0 0.0 with lmdb.open(lmdb_path, readonly=True) as lmdb_environment:
    44 1 4 4.0 0.0 with lmdb_environment.begin() as transaction:
    45 1 4 4.0 0.0 cursor = transaction.cursor()
    46 138135 354683674 2567.7 9.2 keys = [key for (key, _) in cursor]
    47 # Copy keys; we do this in a separate line so we can profile the
    48 # above line separately.
    49 138135 610476 4.4 0.0 keys = [str(key) for key in keys]
    50 1 301 301.0 0.0 logging.info('Read keys from LMDB.')
    51
    52 1 141 141.0 0.0 logging.info('Writing key-values to disk as files.')
    53 1 10310 10310.0 0.0 mkdir_p(args.tmpdir)
    54 1 48 48.0 0.0 with lmdb.open(lmdb_path, readonly=True) as lmdb_environment:
    55 1 3 3.0 0.0 with lmdb_environment.begin() as transaction:
    56 1 2 2.0 0.0 cursor = transaction.cursor()
    57 138135 1400936 10.1 0.0 for key in tqdm(keys):
    58 138134 7496652 54.3 0.2 with open('{}/{}'.format(args.tmpdir, key), 'wb') as f:
    59 138134 645682628 4674.3 16.8 f.write(cursor.get(key))
    60 1 252 252.0 0.0 logging.info('Wrote key-values to disk as files.')
    61
    62 # Randomly shuffle the keys, then search for them in the database.
    63 1 117269 117269.0 0.0 random.shuffle(keys)
    64
    65 1 178 178.0 0.0 logging.info('Reading keys in random order from LMDB.')
    66 1 67 67.0 0.0 with lmdb.open(lmdb_path, readonly=True) as lmdb_environment:
    67 1 5 5.0 0.0 with lmdb_environment.begin() as transaction:
    68 1 2 2.0 0.0 cursor = transaction.cursor()
    69 138135 2275492 16.5 0.1 for key in tqdm(keys):
    70 138134 1316020659 9527.1 34.2 cursor.get(key)
    71
    72 1 265 265.0 0.0 logging.info('Reading keys in random order from file system.')
    73 138135 2024011 14.7 0.1 for key in tqdm(keys):
    74 138134 349713 2.5 0.0 filename = '{}/{}'.format(args.tmpdir, key)
    75 138134 3150209 22.8 0.1 with open(filename) as f:
    76 138134 1518250676 10991.1 39.4 f.read()
    77
    78 1 229 229.0 0.0 logging.info('Finished reading keys.')

  2. achalddave revised this gist Apr 24, 2016. 1 changed file with 82 additions and 1 deletion.
    83 changes: 82 additions & 1 deletion time_lmdb.py
    Original file line number Diff line number Diff line change
    @@ -1 +1,82 @@
    # Empty file.
    import argparse
    import logging
    import random
    import os

    import lmdb
    from tqdm import tqdm

    logging.getLogger().setLevel(logging.INFO)
    logging.basicConfig(format='%(asctime)s.%(msecs).03d: %(message)s',
    datefmt='%H:%M:%S')

    # Allow adding @profile decorator even when kernprof isn't being used.
    try:
    profile
    except NameError:
    profile = lambda x: x


    def mkdir_p(path):
    """Taken from <http://stackoverflow.com/a/600612/1291812>"""
    try:
    os.makedirs(path)
    except OSError as exc: # Python >2.5
    if exc.errno == errno.EEXIST and os.path.isdir(path):
    pass
    else:
    raise

    @profile
    def main():
    parser = argparse.ArgumentParser(
    description=__doc__,
    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('lmdb_path')
    parser.add_argument('tmpdir',
    help='LMDB values will be stored in tmpdir/key')

    args = parser.parse_args()
    lmdb_path = args.lmdb_path

    logging.info('Reading keys from LMDB.')
    with lmdb.open(lmdb_path, readonly=True) as lmdb_environment:
    with lmdb_environment.begin() as transaction:
    cursor = transaction.cursor()
    keys = [key for (key, _) in cursor]
    # Copy keys; we do this in a separate line so we can profile the
    # above line separately.
    keys = [str(key) for key in keys]
    logging.info('Read keys from LMDB.')

    logging.info('Writing key-values to disk as files.')
    mkdir_p(args.tmpdir)
    with lmdb.open(lmdb_path, readonly=True) as lmdb_environment:
    with lmdb_environment.begin() as transaction:
    cursor = transaction.cursor()
    for key in tqdm(keys):
    with open('{}/{}'.format(args.tmpdir, key), 'wb') as f:
    f.write(cursor.get(key))
    logging.info('Wrote key-values to disk as files.')

    # Randomly shuffle the keys, then search for them in the database.
    random.shuffle(keys)

    logging.info('Reading keys in random order from LMDB.')
    with lmdb.open(lmdb_path, readonly=True) as lmdb_environment:
    with lmdb_environment.begin() as transaction:
    cursor = transaction.cursor()
    for key in tqdm(keys):
    cursor.get(key)

    logging.info('Reading keys in random order from file system.')
    for key in tqdm(keys):
    filename = '{}/{}'.format(args.tmpdir, key)
    with open(filename) as f:
    f.read()

    logging.info('Finished reading keys.')


    if __name__ == "__main__":
    main()
  3. achalddave created this gist Apr 24, 2016.
    1 change: 1 addition & 0 deletions time_lmdb.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1 @@
    # Empty file.