Last active
April 13, 2020 23:08
-
-
Save nickva/e87aa2f7f896805bfee36a044a3800b0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# | |
# Script to benchmark CouchDB compaction. It creates a db, populates it with | |
# various types of data based on provided parameters, then times the | |
# compaction. | |
# | |
# By default this script uses the 15986 (local) port so it creates and operates | |
# on one shard only. | |
# | |
# Specifying a parameter multiple time runs the script will all possible | |
# combinations of that parameter along all possible combination fo other | |
# parameters. For example: | |
# | |
# $ ./compact_bench.py -r 1 -r 10 -r 50 -r 100 -n 10000 | |
# | |
# **************** num=100000,revisions=1 **************** | |
# Updating : 35.3s docs/s:2831 revs/s:2831 fsize:121258183 | |
# Compacting : 10.2s docs/s:9814 revs/s:9814 fsize:21127367 | |
# | |
# **************** num=100000,revisions=10 **************** | |
# Updating : 47.4s docs/s:2111 revs/s:21114 fsize:170561735 | |
# Compacting : 17.6s docs/s:5687 revs/s:56871 fsize:52142279 | |
# | |
# **************** num=100000,revisions=50 **************** | |
# Updating : 95.6s docs/s:1046 revs/s:52311 fsize:358736071 | |
# Compacting : 30.6s docs/s:3270 revs/s:163530 fsize:176410823 | |
# | |
# **************** num=100000,revisions=100 **************** | |
# Updating : 163.2s docs/s:612 revs/s:61260 fsize:594022599 | |
# Compacting : 44.8s docs/s:2232 revs/s:223206 fsize:321802439 | |
import argparse | |
import sys | |
import couchdb | |
import random | |
import string | |
import uuid | |
import time | |
import copy | |
import itertools | |
URL = 'http://adm:[email protected]:15984' | |
DBNAME = 'cbenchdb' | |
PARAMS = [ | |
('num', 'n', 10000, "Number of documents"), | |
('batch_size', 'b', 1000, "Batch size"), | |
('size', 's', 1, "Emit value size"), | |
('revisions', 'r', 1, "Number of revisions / doc"), | |
('min_id_size', 'm', 1, "Minimum ID size"), | |
('random_ids', 'x', True, "Use random IDs?"), | |
('alphabet', 'a', '', "Data generation alphabet"), | |
('attachment_size', 't', 0, "Attachment size") | |
] | |
def main(args): | |
param_names = [pn for (pn, _, _, _) in PARAMS] | |
param_values = [None for _ in xrange(len(param_names))] | |
is_default = set() | |
default_values = {} | |
for pname, _, val, _ in PARAMS: | |
default_values[pname] = val | |
for an, av in args._get_kwargs(): | |
if isinstance(av, list): | |
if av == []: | |
av = [default_values[an]] | |
is_default.add(an) | |
param_values[param_names.index(an)] = av | |
for vtup in itertools.product(*param_values): | |
zipped = zip(param_names, vtup) | |
paramstr = ",".join(["%s=%s" % (n, v) for (n, v) in zipped | |
if n not in is_default]) | |
run_args = copy.copy(args) | |
for (n, v) in zipped: | |
setattr(run_args, n, v) | |
run(run_args, paramstr) | |
def get_ids(args): | |
random.seed(42) | |
ids = [] | |
for i in xrange(args.num): | |
ids.append(_id(i, args)) | |
return ids | |
def update_docs(db, args, ids): | |
t0 = time.time() | |
n = args.num | |
b = args.batch_size | |
print " Updating :", | |
sys.stdout.flush() | |
for i in xrange(n / b): | |
docs = [_doc(ids[i * b +j], args) for j in xrange(b)] | |
db.update(docs, new_edits=False) | |
sys.stdout.flush() | |
docs = [_doc(ids[i], args) for i in xrange((n - n % b), n)] | |
res = db.update(docs, new_edits=False) | |
_show_info(args, time.time() - t0, db) | |
def populate_db(db, args): | |
ids = get_ids(args) | |
update_docs(db, args, ids) | |
def compact(db, args): | |
print " Compacting :", | |
sys.stdout.flush() | |
if args.wait_input_before_compaction: | |
raw_input(" --- Press enter to start compaction ---") | |
assert db.compact() == True, "Compaction failed to start" | |
dt = wait_compaction_done(db) | |
_show_info(args, dt, db) | |
def wait_compaction_done(db): | |
t0 = time.time() | |
while True: | |
info = db.info() | |
compact_running = info.get('compact_running', False) | |
if not compact_running: | |
return time.time() - t0 | |
time.sleep(0.25) | |
def run(args, paramstr): | |
print "*" * 16, paramstr, "*" * 16 | |
s = couchdb.Server(args.url) | |
version = s.version() | |
print "Version: %s" % version | |
if args.dbname in s: | |
s.delete(args.dbname) | |
s.create(args.dbname) | |
db = s[args.dbname] | |
populate_db(db, args) | |
compact(db, args) | |
sys.stdout.flush() | |
def _show_info(args, dt, db): | |
docrate = int(args.num / dt) | |
revrate = int(args.num * args.revisions / dt) | |
fsize = db.info()['sizes']['file'] | |
print "%.1fs docs/s:%s revs/s:%s fsize:%s " % (dt, docrate, revrate, fsize) | |
def _doc(_id, args): | |
data = _data(args) | |
revs = [_rand_id(6) for i in xrange(args.revisions)] | |
doc = { | |
'_id': _id, | |
'v': data, | |
'_rev': str(len(revs)) + '-' + revs[0], | |
'_revisions': { | |
'start': len(revs), | |
'ids': revs | |
} | |
} | |
if args.attachment_size > 0: | |
alphabet = _data_alphabet(args) | |
att_data = ''.join(random.choice(alphabet) for _ in xrange(args.attachment_size)) | |
doc['_attachments'] ={ | |
'att': {'content_type': 'app/binary', 'data': att_data.encode('base64').strip()} | |
} | |
return doc | |
_DEFAULT_ALPHABET = string.ascii_letters + string.digits | |
def _data_alphabet(args): | |
if args.alphabet: | |
return args.alphabet | |
else: | |
return _DEFAULT_ALPHABET | |
def _data(args): | |
alphabet = _data_alphabet(args) | |
return ''.join(random.choice(alphabet) for _ in xrange(args.size)) | |
def _rand_id(size): | |
fmt = "%0" + str(2 * size) + "x" | |
return fmt % random.getrandbits(size * 8) | |
def _id(i, args): | |
if args.random_ids: | |
_id = _rand_id(max(args.min_id_size, 16)) | |
else: | |
_id = '%06d' % i | |
extend = args.min_id_size - len(_id) | |
if extend > 0: | |
_id = _id + 'x' * extend | |
return _id | |
def _str2bool(val): | |
val = val.lower() | |
if val in ['true', 't', 'yes', 'yep']: | |
return True | |
else: | |
return False | |
def _args(): | |
description = "Make a db, add some docs and then compact" | |
p = argparse.ArgumentParser(description = description) | |
p.add_argument('-u', '--url', default=URL, help = "Server URL") | |
p.add_argument('-d', '--dbname', default=DBNAME, help = "DB name") | |
p.add_argument('-w', '--wait-input-before-compaction', action="store_true", | |
default=False, help="Pause and wait for keypress before compaction") | |
for pname, short, default, hstr in PARAMS: | |
atype = type(default) | |
ashort = '-' + short | |
along = '--' + pname | |
if atype is bool: | |
atype = _str2bool | |
p.add_argument(ashort, along, type=atype, action="append", | |
default=[], help=hstr) | |
return p.parse_args() | |
if __name__=='__main__': | |
main(_args()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment