Last active
April 24, 2016 19:53
-
-
Save achalddave/8e9490efd348cdcabf21232358ec55eb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import logging | |
import random | |
import os | |
import lmdb | |
from tqdm import tqdm | |
logging.getLogger().setLevel(logging.INFO) | |
logging.basicConfig(format='%(asctime)s.%(msecs).03d: %(message)s', | |
datefmt='%H:%M:%S') | |
# Allow adding @profile decorator even when kernprof isn't being used. | |
try: | |
profile | |
except NameError: | |
profile = lambda x: x | |
def mkdir_p(path): | |
"""Taken from <http://stackoverflow.com/a/600612/1291812>""" | |
try: | |
os.makedirs(path) | |
except OSError as exc: # Python >2.5 | |
if exc.errno == errno.EEXIST and os.path.isdir(path): | |
pass | |
else: | |
raise | |
@profile | |
def main(): | |
parser = argparse.ArgumentParser( | |
description=__doc__, | |
formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |
parser.add_argument('lmdb_path') | |
parser.add_argument('tmpdir', | |
help='LMDB values will be stored in tmpdir/key') | |
args = parser.parse_args() | |
lmdb_path = args.lmdb_path | |
logging.info('Reading keys from LMDB.') | |
with lmdb.open(lmdb_path, readonly=True) as lmdb_environment: | |
with lmdb_environment.begin() as transaction: | |
cursor = transaction.cursor() | |
keys = [key for (key, _) in cursor] | |
# Copy keys; we do this in a separate line so we can profile the | |
# above line separately. | |
keys = [str(key) for key in keys] | |
logging.info('Read keys from LMDB.') | |
logging.info('Writing key-values to disk as files.') | |
mkdir_p(args.tmpdir) | |
with lmdb.open(lmdb_path, readonly=True) as lmdb_environment: | |
with lmdb_environment.begin() as transaction: | |
cursor = transaction.cursor() | |
for key in tqdm(keys): | |
with open('{}/{}'.format(args.tmpdir, key), 'wb') as f: | |
f.write(cursor.get(key)) | |
logging.info('Wrote key-values to disk as files.') | |
# Randomly shuffle the keys, then search for them in the database. | |
random.shuffle(keys) | |
logging.info('Reading keys in random order from LMDB.') | |
with lmdb.open(lmdb_path, readonly=True) as lmdb_environment: | |
with lmdb_environment.begin() as transaction: | |
cursor = transaction.cursor() | |
for key in tqdm(keys): | |
cursor.get(key) | |
logging.info('Reading keys in random order from file system.') | |
for key in tqdm(keys): | |
filename = '{}/{}'.format(args.tmpdir, key) | |
with open(filename) as f: | |
f.read() | |
logging.info('Finished reading keys.') | |
if __name__ == "__main__": | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Timer unit: 1e-06 s | |
Total time: 3852.08 s | |
File: time_lmdb.py | |
Function: main at line 30 | |
Line # Hits Time Per Hit % Time Line Contents | |
============================================================== | |
30 @profile | |
31 def main(): | |
32 1 2 2.0 0.0 parser = argparse.ArgumentParser( | |
33 1 1 1.0 0.0 description=__doc__, | |
34 1 1482 1482.0 0.0 formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |
35 1 59 59.0 0.0 parser.add_argument('lmdb_path') | |
36 1 1 1.0 0.0 parser.add_argument('tmpdir', | |
37 1 48 48.0 0.0 help='LMDB values will be stored in tmpdir/key') | |
38 | |
39 1 550 550.0 0.0 args = parser.parse_args() | |
40 1 1 1.0 0.0 lmdb_path = args.lmdb_path | |
41 | |
42 1 144 144.0 0.0 logging.info('Reading keys from LMDB.') | |
43 1 63 63.0 0.0 with lmdb.open(lmdb_path, readonly=True) as lmdb_environment: | |
44 1 4 4.0 0.0 with lmdb_environment.begin() as transaction: | |
45 1 4 4.0 0.0 cursor = transaction.cursor() | |
46 138135 354683674 2567.7 9.2 keys = [key for (key, _) in cursor] | |
47 # Copy keys; we do this in a separate line so we can profile the | |
48 # above line separately. | |
49 138135 610476 4.4 0.0 keys = [str(key) for key in keys] | |
50 1 301 301.0 0.0 logging.info('Read keys from LMDB.') | |
51 | |
52 1 141 141.0 0.0 logging.info('Writing key-values to disk as files.') | |
53 1 10310 10310.0 0.0 mkdir_p(args.tmpdir) | |
54 1 48 48.0 0.0 with lmdb.open(lmdb_path, readonly=True) as lmdb_environment: | |
55 1 3 3.0 0.0 with lmdb_environment.begin() as transaction: | |
56 1 2 2.0 0.0 cursor = transaction.cursor() | |
57 138135 1400936 10.1 0.0 for key in tqdm(keys): | |
58 138134 7496652 54.3 0.2 with open('{}/{}'.format(args.tmpdir, key), 'wb') as f: | |
59 138134 645682628 4674.3 16.8 f.write(cursor.get(key)) | |
60 1 252 252.0 0.0 logging.info('Wrote key-values to disk as files.') | |
61 | |
62 # Randomly shuffle the keys, then search for them in the database. | |
63 1 117269 117269.0 0.0 random.shuffle(keys) | |
64 | |
65 1 178 178.0 0.0 logging.info('Reading keys in random order from LMDB.') | |
66 1 67 67.0 0.0 with lmdb.open(lmdb_path, readonly=True) as lmdb_environment: | |
67 1 5 5.0 0.0 with lmdb_environment.begin() as transaction: | |
68 1 2 2.0 0.0 cursor = transaction.cursor() | |
69 138135 2275492 16.5 0.1 for key in tqdm(keys): | |
70 138134 1316020659 9527.1 34.2 cursor.get(key) | |
71 | |
72 1 265 265.0 0.0 logging.info('Reading keys in random order from file system.') | |
73 138135 2024011 14.7 0.1 for key in tqdm(keys): | |
74 138134 349713 2.5 0.0 filename = '{}/{}'.format(args.tmpdir, key) | |
75 138134 3150209 22.8 0.1 with open(filename) as f: | |
76 138134 1518250676 10991.1 39.4 f.read() | |
77 | |
78 1 229 229.0 0.0 logging.info('Finished reading keys.') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment