Skip to content

Instantly share code, notes, and snippets.

@skeeto
Created March 5, 2023 20:44
Show Gist options
  • Save skeeto/03e8d08254006f28897ea26b5562d0ce to your computer and use it in GitHub Desktop.
Save skeeto/03e8d08254006f28897ea26b5562d0ce to your computer and use it in GitHub Desktop.
P-square summary
// Summarize a numeric sequence
// Ref: https://old.reddit.com/r/commandline/comments/11hw06b
// Ref: https://github.com/ahmedakef/summarize
// This is free and unencumbered software released into the public domain.
#include <float.h>
#include <stdio.h>
#include <stdlib.h>
typedef struct {
double sum, err;
} Kahan;
static Kahan sum(Kahan k, double x)
{
double y = x - k.err;
double t = k.sum + y;
k.err = t - k.sum - y;
k.sum = t;
return k;
}
// Jain, Raj, Chlamtac. "The P^2 Algorithm for Dynamic Calculation of
// Quantiles and Histograms Without Storing Observations." (1985)
typedef struct {
double markers[5];
double desired[5];
double increment[5];
double positions[5];
} Psquare;
static Psquare psquare(double p)
{
Psquare ps = {0};
ps.desired[0] = 1;
ps.desired[1] = 1 + 2*p;
ps.desired[2] = 1 + 4*p;
ps.desired[3] = 3 + 2*p;
ps.desired[4] = 5;
ps.increment[0] = 0;
ps.increment[1] = p/2;
ps.increment[2] = p;
ps.increment[3] = (1 + p)/2;
ps.increment[4] = 1;
ps.positions[0] = 1;
ps.positions[1] = 2;
ps.positions[2] = 3;
ps.positions[3] = 4;
ps.positions[4] = 5;
return ps;
}
static void insert(Psquare *ps, double x, long long idx)
{
if (idx < 5) {
int n = (int)idx;
for (int i = 0; i < n; i++) {
if (ps->markers[i] > x) {
double tmp = ps->markers[i];
ps->markers[i] = x;
x = tmp;
}
}
ps->markers[n] = x;
} else {
int k;
if (x < ps->markers[0]) {
k = 0;
ps->markers[0] = x;
} else if (x > ps->markers[4]) {
k = 4;
ps->markers[4] = x;
} else {
for (k = 0; k < 3; k++) {
if (x < ps->markers[k+1]) {
break;
}
}
}
for (int i = k+1; i < 5; i++) {
ps->positions[i] += 1.0;
}
for (int i = 0; i < 5; i++) {
ps->desired[i] += ps->increment[i];
}
for (int i = 1; i <= 3; i++) {
double d = ps->desired[i] - ps->positions[i];
double ip = ps->positions[i-1];
double ii = ps->positions[ i ];
double in = ps->positions[i+1];
if ((d>=1 && in-ii>1) || (d<=-1 && ip-ii<-1)) {
int di = d<0 ? -1 : +1;
double dd = di;
double qp = ps->markers[i-1];
double q = ps->markers[ i ];
double qn = ps->markers[i+1];
double qq = q + dd/(in - ip) * ((ii-ip+dd)*(qn-q)/(in-ii) +
(in-ii-dd)*(q-qp)/(ii-ip));
if (qp<qq && qq<qn) {
ps->markers[i] = qq;
} else {
double qd = ps->markers[i+di];
double id = ps->positions[i+di];
q += dd*(qd-q)/(id-ii);
ps->markers[i] = q;
}
ps->positions[i] += dd;
}
}
}
}
typedef struct {
long long count;
double min, max;
Kahan kahan;
Psquare p95;
Psquare p99;
} Accumulator;
static Accumulator accumulator(void)
{
Accumulator acc = {0};
acc.min = DBL_MAX;
acc.max = DBL_MIN;
acc.p95 = psquare(0.95);
acc.p99 = psquare(0.99);
return acc;
}
static void accumulate(Accumulator *acc, double x)
{
acc->kahan = sum(acc->kahan, x);
acc->min = x<acc->min ? x : acc->min;
acc->max = x>acc->max ? x : acc->max;
insert(&acc->p95, x, acc->count);
insert(&acc->p99, x, acc->count);
acc->count++;
}
int main(void)
{
Accumulator acc = accumulator();
// TODO: better/smarter input handle
char line[256];
while (fgets(line, sizeof(line), stdin)) {
double n = strtod(line, 0);
accumulate(&acc, n);
}
// TODO: Handle count==0 and count<5 special cases
double mean = acc.kahan.sum / (double)acc.count;
printf("%-12s%-12s%-12s%-12s%-12s%-12s\n",
"Count", "Mean", "Min", "Max", "P95", "P99");
printf("%-12lld%-12.9g%-12.9g%-12.9g%-12.9g%-12.9g\n",
acc.count, mean, acc.min, acc.max,
acc.p95.markers[2], acc.p99.markers[2]);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment