Skip to content

Instantly share code, notes, and snippets.

@andreas-wilm
Created January 25, 2019 01:53
Show Gist options
  • Save andreas-wilm/8342ea50934dd63ef7d33d0a48fbcb26 to your computer and use it in GitHub Desktop.
Save andreas-wilm/8342ea50934dd63ef7d33d0a48fbcb26 to your computer and use it in GitHub Desktop.
Incomplete five minute hack to massage NF trace.txt
#!/bin/env python
import csv
import sys
import numpy
trace = sys.argv[1]
data = []
with open(trace) as fh:
csvr = csv.DictReader(fh, delimiter="\t")
for line in csvr:
data.append(line)
# massage raw rows into something useful
stats_per_name = dict()
for row in data:
if row['exit'] == '0':
# duration longer than realtime. including submission?
time = row['duration']
name = row['name'].split()[0]
cpu = row['%cpu']
rss = row['rss']
if name not in stats_per_name:
stats_per_name[name] = {'time': [], 'cpu': [], 'rss': []}
# FIXME just using time string. therefore skipping below
stats_per_name[name]['time'].append(time)
stats_per_name[name]['cpu'].append(float(cpu[:-1]))
if ' MB' in rss:
rss = rss.replace(' MB', "")
fac = 0.001
if ' GB' in rss:
rss = rss.replace(' GB', "")
fac = 1.0
rss = float(rss) * fac
stats_per_name[name]['rss'].append(rss)
# output in some format
for name, valdict in stats_per_name.items():
for resource, values in valdict.items():
#print(name, resource)
if resource == 'time':
continue
print('\t'.join([str(x) for x in [name, resource, len(values), numpy.mean(values), numpy.median(values), numpy.percentile(values, 75), numpy.percentile(values, 95), numpy.percentile(values, 99)]]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment