Created
February 21, 2012 19:41
-
-
Save sergray/1878413 to your computer and use it in GitHub Desktop.
Python script for automated analysis of slow queries in mongodb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Script for automated analysis of profiling data in MongoDB, | |
gathered by Mongo with db.setProfilingLevel(1). | |
See <http://www.mongodb.org/display/DOCS/Database+Profiler> | |
TODO: pass collection and database with profiling data in arguments | |
TODO: make thread-safe | |
TODO: handle map-reduce operations | |
""" | |
from collections import defaultdict | |
MONGO_DB = 'test' | |
PROFILE_COLLECTION = 'system.profile' # default name of collection with profiling data | |
# global mapping of (collection, query_fields) to their statistics data | |
QSTATS = defaultdict(lambda: { | |
'count': 0, 'millis_sum': 0, 'millis_min': None, 'millis_max': None, | |
'nscanned_sum': 0, 'nscanned_min': None, 'nscanned_max': None | |
}) | |
def get_profile_collection(): | |
"""Return mongo collection containing profiling records""" | |
from pymongo import Connection | |
con = Connection() | |
db = con[MONGO_DB] | |
col = db[PROFILE_COLLECTION] | |
return col | |
def extract_collection_query(prof_rec): | |
"""Returns tuple of collection name and list of query fields""" | |
ns = prof_rec[u'ns'] | |
if ns.endswith(u'$cmd'): | |
cmd_info = prof_rec[u'command'] | |
qry_fields = extract_fields(cmd_info.pop(u'query', {})) | |
fields = cmd_info.pop(u'fields') | |
command, collection = cmd_info.popitem() | |
else: | |
collection = ns.rsplit(u'.').pop() | |
query = prof_rec[u'query'] | |
if u'$query' in query: | |
qry_fields = extract_fields(query[u'$query']) | |
else: | |
qry_fields = extract_fields(query) | |
if u'$orderby' in query: | |
ord_fields = [f + [u'$orderby'] for f in extract_fields(query[u'$orderby'])] | |
qry_fields.extend(ord_fields) | |
return (collection, [u'.'.join(f) for f in qry_fields]) | |
def extract_fields(query, parent_fields=None): | |
"""Recursively descend query prototype and return list of field names""" | |
fields = [] | |
if not parent_fields: | |
parent_fields = [] | |
field_path = lambda k: '.'.join(parent_fields + [k]) | |
for k,v in query.items(): | |
if isinstance(v, dict): | |
fields.extend(extract_fields(v, parent_fields + [k])) | |
else: | |
fields.append(parent_fields + [k]) | |
return fields | |
def _update_stats(col, qry_fields, prof_rec): | |
stat_key = (col, tuple(qry_fields)) | |
stats = QSTATS[stat_key] | |
stats['count'] += 1 | |
millis = prof_rec.get(u'millis') | |
if millis: | |
stats['millis_sum'] += millis | |
if stats['millis_min'] is None or stats['millis_min'] > millis: | |
stats['millis_min'] = millis | |
if stats['millis_max'] is None or stats['millis_max'] < millis: | |
stats['millis_max'] = millis | |
nscanned = prof_rec.get(u'nscanned') | |
if nscanned: | |
stats['nscanned_sum'] += nscanned | |
if stats['nscanned_min'] is None or stats['nscanned_min'] > nscanned: | |
stats['nscanned_min'] = nscanned | |
if stats['nscanned_max'] is None or stats['nscanned_max'] < nscanned: | |
stats['nscanned_max'] = nscanned | |
def show_stats(): | |
for (col, fields), stats in QSTATS.items(): | |
print col, fields, | |
info = stats.copy() | |
if info['count']: | |
if info['millis_sum'] is not None: | |
info['avg_millis'] = info['millis_sum'] / info['count'] | |
else: | |
info['avg_millis'] = None | |
if info['nscanned_sum'] is not None: | |
info['avg_nscanned'] = info['nscanned_sum'] / info['count'] | |
else: | |
info['avg_nscanned'] = None | |
print "count=%(count)d avg_millis=%(avg_millis)r avg_nscanned=%(avg_nscanned)r" % info | |
def analyze_profiling_data(): | |
"""Process all records in profiling collection and gather statistics""" | |
prof_col = get_profile_collection() | |
for rec in prof_col.find(): | |
try: | |
col, qry_fields = extract_collection_query(rec) | |
except: | |
# quick workaround, needs better handling | |
continue | |
_update_stats(col, qry_fields, rec) | |
if __name__ == '__main__': | |
analyze_profiling_data() | |
show_stats() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment