Last active
November 4, 2017 14:29
-
-
Save stantonk/977de66d2d9749a62623 to your computer and use it in GitHub Desktop.
Get a histogram, mean, median, stddev, and percentiles from a pipe on the command line with numpy
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Note: requires numpy. `sudo pip install numpy` | |
Example: | |
$ echo -e "1\n2\n5\n10\n20\n" | get-stats | |
mean=7.6 | |
median=5.0 | |
std=6.94550214167 | |
95th=18.0 | |
99th=19.6 | |
2 1.0 - 3.0 | |
1 3.0 - 8.0 | |
2 8.0 - 21.0 | |
0 21.0 - 55.0 | |
0 55.0 - 144.0 | |
0 144.0 - 377.0 | |
0 377.0 - 987.0 | |
0 987.0 - 2584.0 | |
0 2584.0 - 6765.0 | |
0 6765.0 - 17711.0 | |
0 17711.0 - 1000000.0 | |
""" | |
import argparse | |
import re | |
import sys | |
from numpy import mean | |
from numpy import median | |
from numpy import std | |
from numpy import percentile | |
from numpy import histogram | |
stats = ('mean', 'median', 'std') | |
def csv_list(s): | |
try: | |
return [float(i) for i in s.split(',')] | |
except Exception: | |
raise argparse.ArgumentTypeError('') | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser(description='compute stats from newline separated stdin') | |
parser.add_argument('-b', '--bins', type=csv_list, default='1,5,10,20,40,80') | |
parser.add_argument('-p', '--percentiles', type=csv_list, default='50,95,99') | |
args = parser.parse_args() | |
vals = [] | |
for l in sys.stdin: | |
try: | |
vals.append(float(l.strip(' \n'))) | |
except ValueError as e: | |
pass | |
for stat in stats: | |
print '%s=%s' % (stat, vars()[stat](vals)) | |
for pct in args.percentiles: | |
print '%sth=%s' % (pct, percentile(vals, pct)) | |
# TODO make bins configurable on cmdline | |
hist, bin_edges = histogram(vals, bins=args.bins) | |
for i, (val, bn) in enumerate(zip(hist, bin_edges)): | |
print val, '\t', bn, '-', bin_edges[i+1] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment