Skip to content

Instantly share code, notes, and snippets.

@nickva
Created April 29, 2026 06:04
Show Gist options
  • Select an option

  • Save nickva/0c00947fc7405d260e1d5226f7e9c104 to your computer and use it in GitHub Desktop.

Select an option

Save nickva/0c00947fc7405d260e1d5226f7e9c104 to your computer and use it in GitHub Desktop.
Build a CouchDB view and time it
#!/usr/bin/env python3
#
# view_bench.py --ndocs 100000 --doc-size 1500
#
import time
import argparse
import requests
TIMEOUT = 120
AUTH = ('adm', 'pass')
URL = 'http://localhost:15984'
DBNAME = 'db'
Q = '2'
MAP_FUN_BODY = "emit(doc._id, null);"
def make_doc(counter, body_size):
doc = {'_id': f'{counter:010d}'}
# 30 bytes is ~ padding for json bits and id
pad = max(0, body_size - 30)
if pad:
doc['body'] = 'x' * pad
return doc
class Server:
def __init__(self, url=URL, auth=AUTH, timeout=TIMEOUT):
self.sess = requests.Session()
self.sess.auth = auth
self.url = url.rstrip('/')
self.timeout = timeout
def _apply_timeout(self, kw):
if self.timeout is not None and 'timeout' not in kw:
kw['timeout'] = self.timeout
return kw
def get(self, path = '', **kw):
kw = self._apply_timeout(kw)
r = self.sess.get(f'{self.url}/{path}', **kw)
r.raise_for_status()
return r.json()
def post(self, path, **kw):
kw = self._apply_timeout(kw)
r = self.sess.post(f'{self.url}/{path}', **kw)
r.raise_for_status()
return r.json()
def put(self, path, **kw):
kw = self._apply_timeout(kw)
r = self.sess.put(f'{self.url}/{path}', **kw)
r.raise_for_status()
return r.json()
def delete(self, path, **kw):
kw = self._apply_timeout(kw)
r = self.sess.delete(f'{self.url}/{path}', **kw)
r.raise_for_status()
return r.json()
def head(self, path, **kw):
kw = self._apply_timeout(kw)
r = self.sess.head(f'{self.url}/{path}', **kw)
return r.status_code
def version(self):
return self.get()['version']
def create_db(self, dbname, **kw):
if dbname not in self:
self.put(dbname, timeout=TIMEOUT, **kw)
if dbname not in self:
raise Exception(f"{dbname} could not be created")
else:
return True
def bulk_docs(self, dbname, docs, timeout=TIMEOUT):
return self.post(f'{dbname}/_bulk_docs', json = {'docs': docs})
def bulk_get(self, dbname, docs, timeout=TIMEOUT):
return self.post(f'{dbname}/_bulk_get', json = {'docs': docs})
def compact(self, dbname, **kw):
r = self.sess.post(f'{self.url}/{dbname}/_compact', json = {}, **kw)
r.raise_for_status()
return r.json()
def config_set(self, section, key, val):
url = f'_node/_local/_config/{section}/{key}'
return self.put(url, data='"'+val+'"')
def config_get(self, section, key):
url = f'_node/_local/_config/{section}/{key}'
return self.get(url)
def __iter__(self):
dbs = self.get('_all_dbs')
return iter(dbs)
def __str__(self):
return "<Server:%s>" % self.url
def __contains__(self, dbname):
res = self.head(dbname)
if res == 200:
return True
if res == 404:
return False
raise Exception(f"Unexpected head status code {res}")
DDOC = 'ddoc'
VIEW = 'view'
def add_view(srv, db):
srv.put(f'{db}/_design/{DDOC}', json = {
"views": {
VIEW: {
"map": f"function(doc){{ {MAP_FUN_BODY} }}",
},
},
"autoupdate": False
})
def wait_build(srv, db, ndocs, poll=0.2, max_wait=1800):
t0 = time.monotonic()
srv.sess.get(
f'{srv.url}/{db}/_design/{DDOC}/_view/{VIEW}',
params={'limit': 1, 'stale': 'update_after'},
timeout=srv.timeout,
)
deadline = time.monotonic() + max_wait
while time.monotonic() < deadline:
info = srv.get(f'{db}/_design/{DDOC}/_info')['view_index']
if (
info.get('updater_running') is False
and info.get('update_seq', 0) >= ndocs
):
return int((time.monotonic() - t0) * 1000)
time.sleep(poll)
raise RuntimeError("build did not complete in time")
def main(args):
dbname = args.db
ndocs = args.ndocs
batch = args.batch
print(f"URL: {args.url} db: {dbname} docs: {ndocs} doc_size: ~{args.doc_size} B batch: {batch}")
s = Server(url=args.url, timeout=TIMEOUT)
if dbname in set(s):
s.delete(dbname)
s.create_db(dbname, params={'q': Q})
print("Loading docs...")
t0 = time.monotonic()
for i in range(0, ndocs, batch):
end = min(i + batch, ndocs)
s.bulk_docs(dbname, [make_doc(j, args.doc_size) for j in range(i, end)])
print(f"Load done in {time.monotonic()-t0:.1f}s")
add_view(s, dbname)
print(f"View URL: {dbname}/_design/{DDOC}/_view/{VIEW}")
print("Timing build...")
elapsed = wait_build(s, dbname, ndocs)
rate = ndocs / (elapsed / 1000) if elapsed else 0
print(f"Build time: {elapsed} ms ({elapsed/1000:.1f} s) for {ndocs} docs ({rate:,.0f} docs/s)")
if __name__ == '__main__':
p = argparse.ArgumentParser(description=__doc__)
p.add_argument('--url', default=URL)
p.add_argument('--db', default=DBNAME)
p.add_argument('--ndocs', type=int, default=500000,
help='Total number of docs to load (default: 500000)')
p.add_argument('--doc-size', type=int, default=1024,
help='Approximate JSON-encoded doc size in bytes (default: 1024)')
p.add_argument('--batch', type=int, default=500,
help='Bulk-docs batch size (default: 500)')
main(p.parse_args())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment