Created
April 29, 2026 06:04
-
-
Save nickva/0c00947fc7405d260e1d5226f7e9c104 to your computer and use it in GitHub Desktop.
Build a CouchDB view and time it
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # | |
| # view_bench.py --ndocs 100000 --doc-size 1500 | |
| # | |
| import time | |
| import argparse | |
| import requests | |
| TIMEOUT = 120 | |
| AUTH = ('adm', 'pass') | |
| URL = 'http://localhost:15984' | |
| DBNAME = 'db' | |
| Q = '2' | |
| MAP_FUN_BODY = "emit(doc._id, null);" | |
| def make_doc(counter, body_size): | |
| doc = {'_id': f'{counter:010d}'} | |
| # 30 bytes is ~ padding for json bits and id | |
| pad = max(0, body_size - 30) | |
| if pad: | |
| doc['body'] = 'x' * pad | |
| return doc | |
| class Server: | |
| def __init__(self, url=URL, auth=AUTH, timeout=TIMEOUT): | |
| self.sess = requests.Session() | |
| self.sess.auth = auth | |
| self.url = url.rstrip('/') | |
| self.timeout = timeout | |
| def _apply_timeout(self, kw): | |
| if self.timeout is not None and 'timeout' not in kw: | |
| kw['timeout'] = self.timeout | |
| return kw | |
| def get(self, path = '', **kw): | |
| kw = self._apply_timeout(kw) | |
| r = self.sess.get(f'{self.url}/{path}', **kw) | |
| r.raise_for_status() | |
| return r.json() | |
| def post(self, path, **kw): | |
| kw = self._apply_timeout(kw) | |
| r = self.sess.post(f'{self.url}/{path}', **kw) | |
| r.raise_for_status() | |
| return r.json() | |
| def put(self, path, **kw): | |
| kw = self._apply_timeout(kw) | |
| r = self.sess.put(f'{self.url}/{path}', **kw) | |
| r.raise_for_status() | |
| return r.json() | |
| def delete(self, path, **kw): | |
| kw = self._apply_timeout(kw) | |
| r = self.sess.delete(f'{self.url}/{path}', **kw) | |
| r.raise_for_status() | |
| return r.json() | |
| def head(self, path, **kw): | |
| kw = self._apply_timeout(kw) | |
| r = self.sess.head(f'{self.url}/{path}', **kw) | |
| return r.status_code | |
| def version(self): | |
| return self.get()['version'] | |
| def create_db(self, dbname, **kw): | |
| if dbname not in self: | |
| self.put(dbname, timeout=TIMEOUT, **kw) | |
| if dbname not in self: | |
| raise Exception(f"{dbname} could not be created") | |
| else: | |
| return True | |
| def bulk_docs(self, dbname, docs, timeout=TIMEOUT): | |
| return self.post(f'{dbname}/_bulk_docs', json = {'docs': docs}) | |
| def bulk_get(self, dbname, docs, timeout=TIMEOUT): | |
| return self.post(f'{dbname}/_bulk_get', json = {'docs': docs}) | |
| def compact(self, dbname, **kw): | |
| r = self.sess.post(f'{self.url}/{dbname}/_compact', json = {}, **kw) | |
| r.raise_for_status() | |
| return r.json() | |
| def config_set(self, section, key, val): | |
| url = f'_node/_local/_config/{section}/{key}' | |
| return self.put(url, data='"'+val+'"') | |
| def config_get(self, section, key): | |
| url = f'_node/_local/_config/{section}/{key}' | |
| return self.get(url) | |
| def __iter__(self): | |
| dbs = self.get('_all_dbs') | |
| return iter(dbs) | |
| def __str__(self): | |
| return "<Server:%s>" % self.url | |
| def __contains__(self, dbname): | |
| res = self.head(dbname) | |
| if res == 200: | |
| return True | |
| if res == 404: | |
| return False | |
| raise Exception(f"Unexpected head status code {res}") | |
| DDOC = 'ddoc' | |
| VIEW = 'view' | |
| def add_view(srv, db): | |
| srv.put(f'{db}/_design/{DDOC}', json = { | |
| "views": { | |
| VIEW: { | |
| "map": f"function(doc){{ {MAP_FUN_BODY} }}", | |
| }, | |
| }, | |
| "autoupdate": False | |
| }) | |
| def wait_build(srv, db, ndocs, poll=0.2, max_wait=1800): | |
| t0 = time.monotonic() | |
| srv.sess.get( | |
| f'{srv.url}/{db}/_design/{DDOC}/_view/{VIEW}', | |
| params={'limit': 1, 'stale': 'update_after'}, | |
| timeout=srv.timeout, | |
| ) | |
| deadline = time.monotonic() + max_wait | |
| while time.monotonic() < deadline: | |
| info = srv.get(f'{db}/_design/{DDOC}/_info')['view_index'] | |
| if ( | |
| info.get('updater_running') is False | |
| and info.get('update_seq', 0) >= ndocs | |
| ): | |
| return int((time.monotonic() - t0) * 1000) | |
| time.sleep(poll) | |
| raise RuntimeError("build did not complete in time") | |
| def main(args): | |
| dbname = args.db | |
| ndocs = args.ndocs | |
| batch = args.batch | |
| print(f"URL: {args.url} db: {dbname} docs: {ndocs} doc_size: ~{args.doc_size} B batch: {batch}") | |
| s = Server(url=args.url, timeout=TIMEOUT) | |
| if dbname in set(s): | |
| s.delete(dbname) | |
| s.create_db(dbname, params={'q': Q}) | |
| print("Loading docs...") | |
| t0 = time.monotonic() | |
| for i in range(0, ndocs, batch): | |
| end = min(i + batch, ndocs) | |
| s.bulk_docs(dbname, [make_doc(j, args.doc_size) for j in range(i, end)]) | |
| print(f"Load done in {time.monotonic()-t0:.1f}s") | |
| add_view(s, dbname) | |
| print(f"View URL: {dbname}/_design/{DDOC}/_view/{VIEW}") | |
| print("Timing build...") | |
| elapsed = wait_build(s, dbname, ndocs) | |
| rate = ndocs / (elapsed / 1000) if elapsed else 0 | |
| print(f"Build time: {elapsed} ms ({elapsed/1000:.1f} s) for {ndocs} docs ({rate:,.0f} docs/s)") | |
| if __name__ == '__main__': | |
| p = argparse.ArgumentParser(description=__doc__) | |
| p.add_argument('--url', default=URL) | |
| p.add_argument('--db', default=DBNAME) | |
| p.add_argument('--ndocs', type=int, default=500000, | |
| help='Total number of docs to load (default: 500000)') | |
| p.add_argument('--doc-size', type=int, default=1024, | |
| help='Approximate JSON-encoded doc size in bytes (default: 1024)') | |
| p.add_argument('--batch', type=int, default=500, | |
| help='Bulk-docs batch size (default: 500)') | |
| main(p.parse_args()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment