Created
March 5, 2023 20:44
-
-
Save skeeto/03e8d08254006f28897ea26b5562d0ce to your computer and use it in GitHub Desktop.
P-square summary
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Summarize a numeric sequence | |
// Ref: https://old.reddit.com/r/commandline/comments/11hw06b | |
// Ref: https://github.com/ahmedakef/summarize | |
// This is free and unencumbered software released into the public domain. | |
#include <float.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
typedef struct { | |
double sum, err; | |
} Kahan; | |
static Kahan sum(Kahan k, double x) | |
{ | |
double y = x - k.err; | |
double t = k.sum + y; | |
k.err = t - k.sum - y; | |
k.sum = t; | |
return k; | |
} | |
// Jain, Raj, Chlamtac. "The P^2 Algorithm for Dynamic Calculation of | |
// Quantiles and Histograms Without Storing Observations." (1985) | |
typedef struct { | |
double markers[5]; | |
double desired[5]; | |
double increment[5]; | |
double positions[5]; | |
} Psquare; | |
static Psquare psquare(double p) | |
{ | |
Psquare ps = {0}; | |
ps.desired[0] = 1; | |
ps.desired[1] = 1 + 2*p; | |
ps.desired[2] = 1 + 4*p; | |
ps.desired[3] = 3 + 2*p; | |
ps.desired[4] = 5; | |
ps.increment[0] = 0; | |
ps.increment[1] = p/2; | |
ps.increment[2] = p; | |
ps.increment[3] = (1 + p)/2; | |
ps.increment[4] = 1; | |
ps.positions[0] = 1; | |
ps.positions[1] = 2; | |
ps.positions[2] = 3; | |
ps.positions[3] = 4; | |
ps.positions[4] = 5; | |
return ps; | |
} | |
static void insert(Psquare *ps, double x, long long idx) | |
{ | |
if (idx < 5) { | |
int n = (int)idx; | |
for (int i = 0; i < n; i++) { | |
if (ps->markers[i] > x) { | |
double tmp = ps->markers[i]; | |
ps->markers[i] = x; | |
x = tmp; | |
} | |
} | |
ps->markers[n] = x; | |
} else { | |
int k; | |
if (x < ps->markers[0]) { | |
k = 0; | |
ps->markers[0] = x; | |
} else if (x > ps->markers[4]) { | |
k = 4; | |
ps->markers[4] = x; | |
} else { | |
for (k = 0; k < 3; k++) { | |
if (x < ps->markers[k+1]) { | |
break; | |
} | |
} | |
} | |
for (int i = k+1; i < 5; i++) { | |
ps->positions[i] += 1.0; | |
} | |
for (int i = 0; i < 5; i++) { | |
ps->desired[i] += ps->increment[i]; | |
} | |
for (int i = 1; i <= 3; i++) { | |
double d = ps->desired[i] - ps->positions[i]; | |
double ip = ps->positions[i-1]; | |
double ii = ps->positions[ i ]; | |
double in = ps->positions[i+1]; | |
if ((d>=1 && in-ii>1) || (d<=-1 && ip-ii<-1)) { | |
int di = d<0 ? -1 : +1; | |
double dd = di; | |
double qp = ps->markers[i-1]; | |
double q = ps->markers[ i ]; | |
double qn = ps->markers[i+1]; | |
double qq = q + dd/(in - ip) * ((ii-ip+dd)*(qn-q)/(in-ii) + | |
(in-ii-dd)*(q-qp)/(ii-ip)); | |
if (qp<qq && qq<qn) { | |
ps->markers[i] = qq; | |
} else { | |
double qd = ps->markers[i+di]; | |
double id = ps->positions[i+di]; | |
q += dd*(qd-q)/(id-ii); | |
ps->markers[i] = q; | |
} | |
ps->positions[i] += dd; | |
} | |
} | |
} | |
} | |
typedef struct { | |
long long count; | |
double min, max; | |
Kahan kahan; | |
Psquare p95; | |
Psquare p99; | |
} Accumulator; | |
static Accumulator accumulator(void) | |
{ | |
Accumulator acc = {0}; | |
acc.min = DBL_MAX; | |
acc.max = DBL_MIN; | |
acc.p95 = psquare(0.95); | |
acc.p99 = psquare(0.99); | |
return acc; | |
} | |
static void accumulate(Accumulator *acc, double x) | |
{ | |
acc->kahan = sum(acc->kahan, x); | |
acc->min = x<acc->min ? x : acc->min; | |
acc->max = x>acc->max ? x : acc->max; | |
insert(&acc->p95, x, acc->count); | |
insert(&acc->p99, x, acc->count); | |
acc->count++; | |
} | |
int main(void) | |
{ | |
Accumulator acc = accumulator(); | |
// TODO: better/smarter input handle | |
char line[256]; | |
while (fgets(line, sizeof(line), stdin)) { | |
double n = strtod(line, 0); | |
accumulate(&acc, n); | |
} | |
// TODO: Handle count==0 and count<5 special cases | |
double mean = acc.kahan.sum / (double)acc.count; | |
printf("%-12s%-12s%-12s%-12s%-12s%-12s\n", | |
"Count", "Mean", "Min", "Max", "P95", "P99"); | |
printf("%-12lld%-12.9g%-12.9g%-12.9g%-12.9g%-12.9g\n", | |
acc.count, mean, acc.min, acc.max, | |
acc.p95.markers[2], acc.p99.markers[2]); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment