Created
March 28, 2017 03:48
-
-
Save elliotchance/1fa316b559a4ed30dc6ecbc9bbbcde05 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| import sys | |
| import operator | |
| def generic_sql(sql): | |
| sql = re.sub(r"'.*?'", "?", sql, 0, re.DOTALL) | |
| sql = re.sub(r'".*?"', "?", sql, 0, re.DOTALL) | |
| sql = re.sub(r"`(.*?)`", '\\1', sql, 0, re.DOTALL) | |
| sql = re.sub(r"\d[\d\.]*", "?", sql, 0, re.DOTALL) | |
| sql = re.sub(r"\(\?(,\s*\?\)*)*", "?", sql, 0, re.DOTALL) | |
| sql = re.sub(r"(\n|\s\s*)", " ", sql, 0, re.DOTALL) | |
| return sql.lower().strip(' ;\n\r') | |
| def extract_queries(log): | |
| regex = r"# Query_time: (?P<query_time>[\d\.]+) Lock_time: (?P<lock_time>[\d\.]+) Rows_sent: (?P<rows_sent>[\d\.]+) Rows_examined: (?P<rows_examined>\d+)(.*?)(\n#|$)" | |
| matches = re.finditer(regex, log, re.DOTALL) | |
| queries = {} | |
| for match in matches: | |
| sql = generic_sql(match.group(5)) | |
| if sql not in queries: | |
| queries[sql] = [] | |
| queries[sql].append(match.groupdict()) | |
| return queries | |
| def generate_summary(queries): | |
| summary = [] | |
| for query in queries: | |
| summary.append({ | |
| 'query': query, | |
| 'lock_time': 0, | |
| 'query_time': 0, | |
| 'rows_examined': 0, | |
| 'rows_sent': 0, | |
| 'count': 0 | |
| }) | |
| for t in queries[query]: | |
| summary[-1]['lock_time'] += float(t['lock_time']) | |
| summary[-1]['query_time'] += float(t['query_time']) | |
| summary[-1]['rows_examined'] += int(t['rows_examined']) | |
| summary[-1]['rows_sent'] += int(t['rows_sent']) | |
| summary[-1]['count'] += 1 | |
| return summary | |
| with open(sys.argv[1]) as log_file: | |
| queries = extract_queries(log_file.read()) | |
| summary = generate_summary(queries) | |
| summary.sort(key=lambda x: -x['query_time']) | |
| for s in summary: | |
| print('Count: %s Query_time: %s Lock_time: %s Rows_sent: %s Rows_examined: %s:' % ( | |
| s['count'], s['query_time'], s['lock_time'], s['rows_sent'], s['rows_examined'] | |
| )) | |
| print(s['query'] + "\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment