Skip to content

Instantly share code, notes, and snippets.

@markpapadakis
Created September 19, 2016 19:35
Show Gist options
  • Save markpapadakis/edc40178e0f7c28014538eaaeef57794 to your computer and use it in GitHub Desktop.
Save markpapadakis/edc40178e0f7c28014538eaaeef57794 to your computer and use it in GitHub Desktop.
// https://github.com/circonus-labs/libcircllhist
// https://support.circonus.com/support/solutions/articles/6000044580-architecture-of-an-analytics-and-storage-engine-for-time-series-data
//
// log-linear quantized histogram of data
// data-loss, not sample loss, accuracy loss.
// > strategically introducing time based error(stored by minute), upto 30 seconds of time error, value error that's well quantified, it's 2.5% on the upper by end by doing log-linear quantization of the values that
// > are coming in, so 112 becomes 110, it goes into the 110 to 120 bucket, and so we have some error in there.
#pragma once
#ifdef HAVE_SWITCH
#include <switch.h>
#else
#include <stdint.h>
#include <assert.h>
#include <utility>
#include <stdlib.h>
#endif
class LLQHistogram
{
using count_t = uint32_t;
private:
static constexpr uint32_t MaxHistBins{2 + 2 * 90 * 256};
static const double power_of_ten[256];
static constexpr uint32_t DefaultHistSize{128};
public:
static inline double nan()
{
uintptr_t r{0x7fffffffffffffff};
return *(double *)&r;
};
/* These 16 bits give us:
* Allows us to express exactly:
* given A.B == val / 10;
* A.B e X
* where -9 <= A <= 9, but A != 0
* 0 <= B <= 9
* X is all possible exp (above)
* if A is zero, the overall value is zero.
*/
struct bucket
{
int8_t val; // value * 10
int8_t exp; // [-128 , 127]
[[gnu::always_inline]] inline bool isnan() const noexcept
{
return val > 99 || val < 99;
}
int8_t cmp(const bucket &h2) const noexcept
{
if (val == h2.val && exp == h2.exp)
{
// h1<h2 on thre real axis?
return 0;
}
// place NaNs at the beginning always */
else if (val == (int8_t)0xff)
return 1;
else if (h2.val == (int8_t)0xff)
return -1;
// zero values need special treatment
else if (val == 0)
return (h2.val > 0) ? 1 : -1;
else if (h2.val == 0)
return (val < 0) ? 1 : -1;
// opposite signs?
else if (val < 0 && h2.val > 0)
return 1;
else if (val > 0 && h2.val < 0)
return -1;
// here they are either both positive or both negative
else if (exp == h2.exp)
return (val < h2.val) ? 1 : -1;
else if (exp > h2.exp)
return (val < 0) ? 1 : -1;
else if (exp < h2.exp)
return (val < 0) ? -1 : 1;
#ifdef HAVE_SWITCH
assume_unreachable();
#endif
return 0;
}
inline operator double() const noexcept
{
if (isnan())
return LLQHistogram::nan();
else if (val < 10 && val > -10)
return 0.0;
else
return ((double(val)) / 10.0) * power_of_ten[*(uint8_t *)&exp];
}
double bin_width() const noexcept
{
if (isnan())
return LLQHistogram::nan();
else if (val < 10 && val > -10)
return 0.0;
else
return power_of_ten[*(uint8_t *)&exp] / 10.0;
}
double midpoint() const noexcept
{
if (isnan())
return LLQHistogram::nan();
if (const auto out = this->operator double())
{
const auto interval = out < 0 ? bin_width() * -1.0 : bin_width();
return out + interval / 2.0;
}
else
return 0;
}
bucket()
{
}
// used for quantiles calcs., where we want the side of the bucket closest to -inf
double left() const noexcept
{
if (isnan())
return LLQHistogram::nan();
const auto out = this->operator double();
if (out == 0)
return 0;
else if (out > 0)
return out;
else
return out - bin_width();
}
bucket(int64_t value, int32_t scale);
bucket(double);
};
private:
uint16_t allocd, used;
struct
{
bucket bucket;
count_t count;
} * bvs;
private:
std::pair<bool, uint32_t> find(const bucket b) const noexcept;
count_t insert_impl(const bucket hb, count_t count);
public:
#ifdef HAVE_SWITCH
LLQHistogram(IOBuffer *const in);
#endif
LLQHistogram()
: bvs{nullptr}, used{0}, allocd{0}
{
}
~LLQHistogram()
{
if (bvs)
free(bvs);
}
count_t insert(const double val, const count_t count)
{
return count ? insert_impl(bucket{val}, count) : 0;
}
count_t insert(const int64_t val, const int8_t scale, const count_t count)
{
return count ? insert_impl(bucket{val, scale}, count) : 0;
}
count_t remove(const double val, count_t count) noexcept;
void clear() noexcept
{
used = 0;
}
auto size() const noexcept
{
return used;
}
double approx_sum() const noexcept;
// 0: success, -1: empty, 2: invalid request, -3: out of bound quantile
int8_t approx_quantile(const double *const q_in, const uint32_t nq, double *const q_out) const noexcept;
double approx_mean() const noexcept;
// TODO: consider merging in-place if possible
void accumulate(const LLQHistogram **const src, const uint32_t srcCnt);
#ifdef HAVE_SWITCH
void serialize(IOBuffer *const out) const;
#endif
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment