Skip to content

Instantly share code, notes, and snippets.

@markpapadakis
Created September 19, 2016 19:36
Show Gist options
  • Save markpapadakis/410dda4d8da5cd91942ca6147cdccca6 to your computer and use it in GitHub Desktop.
Save markpapadakis/410dda4d8da5cd91942ca6147cdccca6 to your computer and use it in GitHub Desktop.
#include "llqhistogram.h"
#include <alloca.h>
#ifndef HAVE_SWITCH
#include <cmath>
#include <string.h>
#include <algorithm>
#endif
double const LLQHistogram::power_of_ten[256] = {
1, 10, 100, 1000, 10000, 100000, 1e+06, 1e+07, 1e+08, 1e+09, 1e+10,
1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17, 1e+18, 1e+19, 1e+20,
1e+21, 1e+22, 1e+23, 1e+24, 1e+25, 1e+26, 1e+27, 1e+28, 1e+29, 1e+30,
1e+31, 1e+32, 1e+33, 1e+34, 1e+35, 1e+36, 1e+37, 1e+38, 1e+39, 1e+40,
1e+41, 1e+42, 1e+43, 1e+44, 1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50,
1e+51, 1e+52, 1e+53, 1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60,
1e+61, 1e+62, 1e+63, 1e+64, 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70,
1e+71, 1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80,
1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89, 1e+90,
1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98, 1e+99, 1e+100,
1e+101, 1e+102, 1e+103, 1e+104, 1e+105, 1e+106, 1e+107, 1e+108, 1e+109,
1e+110, 1e+111, 1e+112, 1e+113, 1e+114, 1e+115, 1e+116, 1e+117, 1e+118,
1e+119, 1e+120, 1e+121, 1e+122, 1e+123, 1e+124, 1e+125, 1e+126, 1e+127,
1e-128, 1e-127, 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120,
1e-119, 1e-118, 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111,
1e-110, 1e-109, 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102,
1e-101, 1e-100, 1e-99, 1e-98, 1e-97, 1e-96,
1e-95, 1e-94, 1e-93, 1e-92, 1e-91, 1e-90, 1e-89, 1e-88, 1e-87, 1e-86,
1e-85, 1e-84, 1e-83, 1e-82, 1e-81, 1e-80, 1e-79, 1e-78, 1e-77, 1e-76,
1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66,
1e-65, 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56,
1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46,
1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39, 1e-38, 1e-37, 1e-36,
1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28, 1e-27, 1e-26,
1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17, 1e-16,
1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-09, 1e-08, 1e-07, 1e-06,
1e-05, 0.0001, 0.001, 0.01, 0.1};
LLQHistogram::bucket::bucket(int64_t value, int32_t scale)
{
if (!value)
{
val = 0;
exp = 0;
return;
}
int8_t sign;
if (value < 0)
{
value = 0 - value;
sign = -1;
}
else
sign = 1;
++scale;
if (value < 10)
{
value *= 10;
scale -= 1;
}
while (value > 100)
{
value /= 10;
++scale;
}
if (scale < -128)
{
val = 0;
exp = 0;
return;
}
else if (scale > 127)
{
value = int8_t(0xff);
exp = 0;
return;
}
val = sign * value;
exp = scale;
}
LLQHistogram::bucket::bucket(double d)
{
if (std::isnan(d) || std::isinf(d))
{
val = int8_t(0xff);
exp = 0;
return;
}
else if (d == 0)
val = 0;
else
{
const int8_t sign = (d < 0) ? -1 : 1;
d = fabs(d);
const auto big_exp = (int32_t)floor(log10(d));
exp = (int8_t)big_exp;
if (exp != big_exp)
{
// overflow
if (big_exp < 0)
{
// d is in [0 .. 1e-128). Return 0
val = 0;
exp = 0;
}
else
{
// d is >= 1e128. Return NaN
val = (int8_t)0xff;
exp = 0;
}
return;
}
const auto *const pidx = (uint8_t *)&exp;
d /= power_of_ten[*pidx];
d *= 10;
// avoid rounding problem at the bucket boundary
// e.g. d=0.11 results in hb.val = 10 (shoud be 11)
// by allowing a error margin (in the order or magintude
// of the exected rounding errors of the above transformations)
val = sign * (int)floor(d + 1e-13);
if (val == 100 || val == -100)
{
if (exp < 127)
{
val /= 10;
++exp;
}
else
{
// can't increase exponent. Return NaN
val = (int8_t)0xff;
exp = 0;
}
}
if (val == 0)
{
exp = 0;
}
else if (!((val >= 10 && val < 100) || (val <= -10 && val > -100)))
{
val = (int8_t)0xff;
exp = 0;
}
}
}
std::pair<bool, uint32_t> LLQHistogram::find(const bucket b) const noexcept
{
// This is a simple binary search returning the idx as pair.second in which
// the specified bucket belongs... returning true if it is there
// or false if the value would need to be inserted here (moving the
// rest of the buckets forward one.
if (used == 0)
return {false, 0};
int32_t l{0}, r{used - 1};
while (l < r)
{
const int32_t check = (r + l) / 2;
const auto rv = bvs[check].bucket.cmp(b);
if (rv == 0)
return {true, check};
else if (rv > 0)
l = check + 1;
else
r = check - 1;
}
if (bvs[l].bucket.cmp(b) < 0)
return {false, l};
else
return {false, l + 1};
}
LLQHistogram::count_t LLQHistogram::insert_impl(const bucket hb, count_t count)
{
const auto findRes = find(hb);
const auto idx = findRes.second;
if (!findRes.first)
{
if (used == allocd)
{
allocd += DefaultHistSize;
auto *const newBvs = (decltype(bvs))malloc((allocd) * sizeof(*bvs));
assert(newBvs);
if (idx > 0)
memcpy(newBvs, bvs, idx * sizeof(*bvs));
newBvs[idx].bucket = hb;
newBvs[idx].count = count;
if (idx < used)
memcpy(newBvs + idx + 1, bvs + idx, (used - idx) * sizeof(*bvs));
free(bvs);
bvs = newBvs;
}
else
{
memmove(bvs + idx + 1, bvs + idx, (used - idx) * sizeof(*bvs));
bvs[idx].bucket = hb;
bvs[idx].count = count;
}
++used;
}
else
{
// Just need to update the counters
auto newval = bvs[idx].count + count;
if (newval < bvs[idx].count)
{
// rolled
newval = ~(count_t)0;
}
count = newval - bvs[idx].count;
bvs[idx].count = newval;
}
return count;
}
LLQHistogram::count_t LLQHistogram::remove(const double val, count_t count) noexcept
{
const auto res = find(val);
if (res.first)
{
const auto idx = res.second;
auto newval = bvs[idx].count - count;
if (newval > bvs[idx].count)
newval = 0;
count = bvs[idx].count - newval;
bvs[idx].count = newval;
return count;
}
else
return 0;
}
double LLQHistogram::approx_sum() const noexcept
{
double sum{0};
for (uint32_t i{0}; i != used; ++i)
{
if (bvs[i].bucket.isnan())
continue;
const auto value = double(bvs[i].bucket);
const auto cardinality = double(bvs[i].count);
sum += value * cardinality;
}
return sum;
}
int8_t LLQHistogram::approx_quantile(const double *const q_in, const uint32_t nq, double *const q_out) const noexcept
{
if (!nq)
return 0;
double total_cnt{0};
// Sum up all samples from all the bins */
for (uint32_t i{0}; i != used; ++i)
{
if (bvs[i].bucket.isnan())
continue;
total_cnt += double(bvs[i].count);
}
// Run through the quantiles and make sure they are in order
for (uint32_t i{1}; i < nq; ++i)
{
if (q_in[i - 1] > q_in[i])
return -2;
}
// We use q_out as temporary space to hold the count-normailzed quantiles
for (uint32_t i{0}; i != nq; ++i)
{
if (q_in[i] < 0.0 || q_in[i] > 1.0)
return -3;
q_out[i] = total_cnt * q_in[i];
}
#define TRACK_VARS(idx) \
do \
{ \
bucket_width = bvs[idx].bucket.bin_width(); \
bucket_left = bvs[idx].bucket.left(); \
lower_cnt = upper_cnt; \
upper_cnt = lower_cnt + bvs[idx].count; \
} while (0)
// Find the least bin (first)
double lower_cnt{0}, upper_cnt{0}, bucket_left{0}, bucket_width{0};
uint32_t i_b;
for (i_b = 0; i_b != used; ++i_b)
{
if (bvs[i_b].bucket.isnan())
continue;
TRACK_VARS(i_b);
break;
}
// Next walk the bins and the quantiles together
for (uint32_t i{0}; i != nq; ++i)
{
// And within that, advance the bins as needed
while (i_b < used - 1 && upper_cnt < q_out[i])
{
++i_b;
TRACK_VARS(i_b);
}
if (lower_cnt == q_out[i])
q_out[i] = bucket_left;
else if (upper_cnt == q_out[i])
q_out[i] = bucket_left + bucket_width;
else
{
if (bucket_width == 0)
q_out[i] = bucket_left;
else
q_out[i] = bucket_left + (q_out[i] - lower_cnt) / (upper_cnt - lower_cnt) * bucket_width;
}
}
#undef TRACK_VARS
return 0;
}
double LLQHistogram::approx_mean() const noexcept
{
double divisor{0}, sum{0};
for (uint32_t i{0}; i != used; ++i)
{
if (bvs[i].bucket.isnan())
continue;
const auto midpoint = bvs[i].bucket.midpoint();
const auto cardinality = double(bvs[i].count);
divisor += cardinality;
sum += midpoint * cardinality;
}
return divisor ? sum / divisor : LLQHistogram::nan();
}
void LLQHistogram::accumulate(const LLQHistogram **const src, const uint32_t srcCnt)
{
auto list = (LLQHistogram **)alloca(sizeof(LLQHistogram *) * (srcCnt + 1));
uint16_t cnt{0};
uint32_t max{0};
assert(list);
for (uint32_t i{0}; i != srcCnt; ++i)
{
if (const auto n = src[i]->used)
{
list[cnt++] = (LLQHistogram *)src[i];
max = std::max<uint32_t>(max, n);
}
}
if (const auto n = used)
{
list[cnt++] = this;
max = std::max<uint32_t>(max, n);
}
if (0 == cnt)
return;
auto toAdvance = (uint16_t *)alloca(cnt * sizeof(uint16_t)), itv = (uint16_t *)alloca(cnt * sizeof(uint16_t));
auto newList = (decltype(bvs))malloc(sizeof(*bvs) * max);
uint16_t toAdvanceCnt;
uint32_t out{0};
assert(toAdvance);
assert(itv);
memset(itv, 0, cnt * sizeof(uint16_t));
do
{
auto lowest = list[0]->bvs[itv[0]].bucket;
toAdvanceCnt = 1;
toAdvance[0] = 0;
for (uint32_t i{1}; i < cnt; ++i)
{
const auto &b = list[i]->bvs[itv[i]].bucket;
const auto r = b.cmp(lowest);
if (r == 0)
toAdvance[toAdvanceCnt++] = i;
else if (r < 0)
{
lowest = b;
toAdvanceCnt = 1;
toAdvance[0] = i;
}
}
if (out == max)
{
max += DefaultHistSize;
newList = (decltype(newList))realloc(newList, max * sizeof(*bvs));
}
count_t oldValue{0};
newList[out].bucket = lowest;
while (toAdvanceCnt)
{
const auto idx = toAdvance[--toAdvanceCnt];
const auto ptr = &list[idx]->bvs[itv[idx]++];
auto newVal = oldValue + ptr->count;
if (newVal < oldValue)
newVal = ~count_t(0);
oldValue = newVal;
if (itv[idx] == list[idx]->used)
{
const auto span = (--cnt - idx);
memmove(list + idx, list + idx + 1, sizeof(LLQHistogram *) * span);
memmove(itv + idx, itv + idx + 1, sizeof(uint16_t) * span);
}
}
newList[out++].count = oldValue;
} while (cnt);
if (bvs)
free(bvs);
bvs = newList;
used = out;
allocd = max;
}
#ifdef HAVE_SWITCH
void LLQHistogram::serialize(IOBuffer *const out) const
{
out->SerializeVarUInt32(used);
for (const auto it : Switch::make_range(bvs, used))
{
out->Serialize(it->bucket.val);
out->Serialize(it->bucket.exp);
out->SerializeVarUInt32(it->count);
}
}
LLQHistogram::LLQHistogram(IOBuffer *const in)
{
used = in->UnserializeVarUInt32();
allocd = used;
if (allocd)
{
bvs = (decltype(bvs))malloc(sizeof(*bvs) * allocd);
for (uint32_t i{0}; i != used; ++i)
{
auto it = bvs + i;
it->bucket.val = in->Unserialize<uint8_t>();
it->bucket.exp = in->Unserialize<uint8_t>();
it->count = in->UnserializeVarUInt32();
}
}
}
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment