Created
August 27, 2021 09:09
-
-
Save gzcf/b32c51d21b327adb9e19a23456de0025 to your computer and use it in GitHub Desktop.
Calculate data statistics in bash, including pencentile, min, max, average, standard deviation. Suitable for inspecting system latencies distribution.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Calculate data statistics, including pencentile, min, max, average, standard deviation. | |
# Derived from https://gist.github.com/lewisd32/4be2605400acf0bb562d | |
# | |
# Usage & Output | |
# $ cat nums.txt | ./stats.sh | |
# total 100 | |
# p50 49 | |
# p90 89 | |
# p95 94 | |
# min 0 | |
# max 99 | |
# avg 49.5 | |
# stddev 29.0115 | |
# stdin should be integers, one per line. | |
function random_string () { | |
length=${1:-6} | |
s=`tr -dc A-Za-z0-9 </dev/urandom | head -c $length` | |
echo $s | |
} | |
tmp="/tmp/stats_`random_string 6`" | |
total=$(sort -n | tee "$tmp" | wc -l) | |
echo "total $total" | |
percentiles=(50 90 95) | |
for percentile in ${percentiles[@]}; do | |
# (n + 99) / 100 with integers is effectively ceil(n/100) with floats | |
pos=$(((total * percentile + 99) / 100)) | |
printf "p${percentile} " | |
sed -n ${pos}p "$tmp" | |
done | |
awk ' | |
(NR==1){min=$0; max=$0} | |
(NR>=2){ | |
if ($0 > max) max = $0; | |
if ($0 < min) min = $0; | |
} | |
{ | |
sum+=$0; | |
a[NR]=$0 | |
} | |
END { | |
avg=sum/NR; | |
for(i in a) y+=(a[i]-avg)^2; | |
print "min", min; | |
print "max", max; | |
print "avg", avg; | |
print "stddev", sqrt(y/(NR-1)); | |
}' "$tmp" | |
rm "$tmp" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment