Skip to content

Instantly share code, notes, and snippets.

@primiano
Created July 25, 2016 14:12
Show Gist options
  • Save primiano/2c429e8ba993c26264e66800f46fca0f to your computer and use it in GitHub Desktop.
Save primiano/2c429e8ba993c26264e66800f46fca0f to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import json
import gzip
import collections
import os
import re
import sys
def sizeof_fmt(num):
for x in ['B'] + map(lambda x: x + 'iB', list('KMGT')):
if abs(num) < 1000:
return '%3.1f %s' % (num, x)
num /= 1000
return '%3.1f %s' % (num, x)
def main():
if len(sys.argv) < 2:
print 'Usage: %s trace.json[.gz]' % sys.argv[0]
return 1
in_path = sys.argv[1]
if in_path.lower().endswith('.gz'):
fin = gzip.open(in_path, 'rb')
else:
fin = open(in_path, 'r')
out_path = in_path.split('.json')[0] + '-filtered.json'
print 'Loading trace (can take 1 min on a z620 for a 1GB trace)...'
trace = json.load(fin)
num_bytes_read = fin.tell()
fin.close()
print ' Done. Read ' + sizeof_fmt(num_bytes_read)
phase_count = collections.defaultdict(int)
last_dump_for_pid = {}
out_events = []
global_dumps = collections.OrderedDict()
print 'Filtering events'
in_events = trace['traceEvents']
evt_count = 0
in_events_len = len(in_events)
for evt in in_events:
evt_count += 1
if evt_count & 511 == 0:
print '\r%-80s' % ('[%d / %d] %d %%' % (evt_count, in_events_len, evt_count * 100 / in_events_len)),
sys.stdout.flush()
ph = evt.get('ph', '?')
phase_count[ph] += 1
# Drop all diagnostic events (flow and async for runtime debugging).
if ph not in ('v', 'V') and evt.get('cat', '').endswith('memory-infra'):
continue
if ph != 'v':
out_events.append(evt)
continue
eid = evt['id']
global_dumps.setdefault(eid, [])
global_dumps[eid].append(evt)
print '\r%s80\n' % ''
print 'Detected %d memory-infra global dumps' % len(global_dumps)
max_procs = max(len(global_dump) for global_dump in global_dumps.itervalues())
print 'Max number of processes seen: %d' % max_procs
NDUMPS = 2
print 'Preserving the last %d dumps' % NDUMPS
detailed_dumps = []
non_detailed_dumps = []
for global_dump in global_dumps.itervalues():
is_detailed = global_dump[0]['args']['dumps'].get('level_of_detail') == 'detailed'
if is_detailed:
detailed_dumps.append(global_dump)
else:
non_detailed_dumps.append(global_dump)
dumps_to_preserve = detailed_dumps[-NDUMPS:]
if len(dumps_to_preserve) < NDUMPS:
dumps_to_preserve += non_detailed_dumps[-(NDUMPS - len(dumps_to_preserve)):]
for global_dump in dumps_to_preserve:
out_events += global_dump
print '\n'
print '%d events, %d processes' % (in_events_len, len(last_dump_for_pid))
print 'Events histogram (count)'
print '------------------------'
for k,v in sorted(phase_count.items(), key=lambda x:x[1]):
print '%s %d' % (k,v)
print ''
print 'Writing filtered trace to ' + out_path
with open(out_path, 'w') as fout:
json.dump({'traceEvents': out_events}, fout)
num_bytes_written = fout.tell()
print 'Wrote ' + sizeof_fmt(num_bytes_written)
if __name__ == '__main__':
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment