Skip to content

Instantly share code, notes, and snippets.

@sergray
Created February 21, 2012 19:41
Show Gist options
  • Save sergray/1878413 to your computer and use it in GitHub Desktop.
Save sergray/1878413 to your computer and use it in GitHub Desktop.
Python script for automated analysis of slow queries in mongodb
"""
Script for automated analysis of profiling data in MongoDB,
gathered by Mongo with db.setProfilingLevel(1).
See <http://www.mongodb.org/display/DOCS/Database+Profiler>
TODO: pass collection and database with profiling data in arguments
TODO: make thread-safe
TODO: handle map-reduce operations
"""
from collections import defaultdict
MONGO_DB = 'test'
PROFILE_COLLECTION = 'system.profile' # default name of collection with profiling data
# global mapping of (collection, query_fields) to their statistics data
QSTATS = defaultdict(lambda: {
'count': 0, 'millis_sum': 0, 'millis_min': None, 'millis_max': None,
'nscanned_sum': 0, 'nscanned_min': None, 'nscanned_max': None
})
def get_profile_collection():
"""Return mongo collection containing profiling records"""
from pymongo import Connection
con = Connection()
db = con[MONGO_DB]
col = db[PROFILE_COLLECTION]
return col
def extract_collection_query(prof_rec):
"""Returns tuple of collection name and list of query fields"""
ns = prof_rec[u'ns']
if ns.endswith(u'$cmd'):
cmd_info = prof_rec[u'command']
qry_fields = extract_fields(cmd_info.pop(u'query', {}))
fields = cmd_info.pop(u'fields')
command, collection = cmd_info.popitem()
else:
collection = ns.rsplit(u'.').pop()
query = prof_rec[u'query']
if u'$query' in query:
qry_fields = extract_fields(query[u'$query'])
else:
qry_fields = extract_fields(query)
if u'$orderby' in query:
ord_fields = [f + [u'$orderby'] for f in extract_fields(query[u'$orderby'])]
qry_fields.extend(ord_fields)
return (collection, [u'.'.join(f) for f in qry_fields])
def extract_fields(query, parent_fields=None):
"""Recursively descend query prototype and return list of field names"""
fields = []
if not parent_fields:
parent_fields = []
field_path = lambda k: '.'.join(parent_fields + [k])
for k,v in query.items():
if isinstance(v, dict):
fields.extend(extract_fields(v, parent_fields + [k]))
else:
fields.append(parent_fields + [k])
return fields
def _update_stats(col, qry_fields, prof_rec):
stat_key = (col, tuple(qry_fields))
stats = QSTATS[stat_key]
stats['count'] += 1
millis = prof_rec.get(u'millis')
if millis:
stats['millis_sum'] += millis
if stats['millis_min'] is None or stats['millis_min'] > millis:
stats['millis_min'] = millis
if stats['millis_max'] is None or stats['millis_max'] < millis:
stats['millis_max'] = millis
nscanned = prof_rec.get(u'nscanned')
if nscanned:
stats['nscanned_sum'] += nscanned
if stats['nscanned_min'] is None or stats['nscanned_min'] > nscanned:
stats['nscanned_min'] = nscanned
if stats['nscanned_max'] is None or stats['nscanned_max'] < nscanned:
stats['nscanned_max'] = nscanned
def show_stats():
for (col, fields), stats in QSTATS.items():
print col, fields,
info = stats.copy()
if info['count']:
if info['millis_sum'] is not None:
info['avg_millis'] = info['millis_sum'] / info['count']
else:
info['avg_millis'] = None
if info['nscanned_sum'] is not None:
info['avg_nscanned'] = info['nscanned_sum'] / info['count']
else:
info['avg_nscanned'] = None
print "count=%(count)d avg_millis=%(avg_millis)r avg_nscanned=%(avg_nscanned)r" % info
def analyze_profiling_data():
"""Process all records in profiling collection and gather statistics"""
prof_col = get_profile_collection()
for rec in prof_col.find():
try:
col, qry_fields = extract_collection_query(rec)
except:
# quick workaround, needs better handling
continue
_update_stats(col, qry_fields, rec)
if __name__ == '__main__':
analyze_profiling_data()
show_stats()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment