Created
July 25, 2016 14:12
-
-
Save primiano/2c429e8ba993c26264e66800f46fca0f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import json | |
import gzip | |
import collections | |
import os | |
import re | |
import sys | |
def sizeof_fmt(num): | |
for x in ['B'] + map(lambda x: x + 'iB', list('KMGT')): | |
if abs(num) < 1000: | |
return '%3.1f %s' % (num, x) | |
num /= 1000 | |
return '%3.1f %s' % (num, x) | |
def main(): | |
if len(sys.argv) < 2: | |
print 'Usage: %s trace.json[.gz]' % sys.argv[0] | |
return 1 | |
in_path = sys.argv[1] | |
if in_path.lower().endswith('.gz'): | |
fin = gzip.open(in_path, 'rb') | |
else: | |
fin = open(in_path, 'r') | |
out_path = in_path.split('.json')[0] + '-filtered.json' | |
print 'Loading trace (can take 1 min on a z620 for a 1GB trace)...' | |
trace = json.load(fin) | |
num_bytes_read = fin.tell() | |
fin.close() | |
print ' Done. Read ' + sizeof_fmt(num_bytes_read) | |
phase_count = collections.defaultdict(int) | |
last_dump_for_pid = {} | |
out_events = [] | |
global_dumps = collections.OrderedDict() | |
print 'Filtering events' | |
in_events = trace['traceEvents'] | |
evt_count = 0 | |
in_events_len = len(in_events) | |
for evt in in_events: | |
evt_count += 1 | |
if evt_count & 511 == 0: | |
print '\r%-80s' % ('[%d / %d] %d %%' % (evt_count, in_events_len, evt_count * 100 / in_events_len)), | |
sys.stdout.flush() | |
ph = evt.get('ph', '?') | |
phase_count[ph] += 1 | |
# Drop all diagnostic events (flow and async for runtime debugging). | |
if ph not in ('v', 'V') and evt.get('cat', '').endswith('memory-infra'): | |
continue | |
if ph != 'v': | |
out_events.append(evt) | |
continue | |
eid = evt['id'] | |
global_dumps.setdefault(eid, []) | |
global_dumps[eid].append(evt) | |
print '\r%s80\n' % '' | |
print 'Detected %d memory-infra global dumps' % len(global_dumps) | |
max_procs = max(len(global_dump) for global_dump in global_dumps.itervalues()) | |
print 'Max number of processes seen: %d' % max_procs | |
NDUMPS = 2 | |
print 'Preserving the last %d dumps' % NDUMPS | |
detailed_dumps = [] | |
non_detailed_dumps = [] | |
for global_dump in global_dumps.itervalues(): | |
is_detailed = global_dump[0]['args']['dumps'].get('level_of_detail') == 'detailed' | |
if is_detailed: | |
detailed_dumps.append(global_dump) | |
else: | |
non_detailed_dumps.append(global_dump) | |
dumps_to_preserve = detailed_dumps[-NDUMPS:] | |
if len(dumps_to_preserve) < NDUMPS: | |
dumps_to_preserve += non_detailed_dumps[-(NDUMPS - len(dumps_to_preserve)):] | |
for global_dump in dumps_to_preserve: | |
out_events += global_dump | |
print '\n' | |
print '%d events, %d processes' % (in_events_len, len(last_dump_for_pid)) | |
print 'Events histogram (count)' | |
print '------------------------' | |
for k,v in sorted(phase_count.items(), key=lambda x:x[1]): | |
print '%s %d' % (k,v) | |
print '' | |
print 'Writing filtered trace to ' + out_path | |
with open(out_path, 'w') as fout: | |
json.dump({'traceEvents': out_events}, fout) | |
num_bytes_written = fout.tell() | |
print 'Wrote ' + sizeof_fmt(num_bytes_written) | |
if __name__ == '__main__': | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment