Created
September 19, 2016 19:35
-
-
Save markpapadakis/edc40178e0f7c28014538eaaeef57794 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// https://github.com/circonus-labs/libcircllhist | |
// https://support.circonus.com/support/solutions/articles/6000044580-architecture-of-an-analytics-and-storage-engine-for-time-series-data | |
// | |
// log-linear quantized histogram of data | |
// data-loss, not sample loss, accuracy loss. | |
// > strategically introducing time based error(stored by minute), upto 30 seconds of time error, value error that's well quantified, it's 2.5% on the upper by end by doing log-linear quantization of the values that | |
// > are coming in, so 112 becomes 110, it goes into the 110 to 120 bucket, and so we have some error in there. | |
#pragma once | |
#ifdef HAVE_SWITCH | |
#include <switch.h> | |
#else | |
#include <stdint.h> | |
#include <assert.h> | |
#include <utility> | |
#include <stdlib.h> | |
#endif | |
class LLQHistogram | |
{ | |
using count_t = uint32_t; | |
private: | |
static constexpr uint32_t MaxHistBins{2 + 2 * 90 * 256}; | |
static const double power_of_ten[256]; | |
static constexpr uint32_t DefaultHistSize{128}; | |
public: | |
static inline double nan() | |
{ | |
uintptr_t r{0x7fffffffffffffff}; | |
return *(double *)&r; | |
}; | |
/* These 16 bits give us: | |
* Allows us to express exactly: | |
* given A.B == val / 10; | |
* A.B e X | |
* where -9 <= A <= 9, but A != 0 | |
* 0 <= B <= 9 | |
* X is all possible exp (above) | |
* if A is zero, the overall value is zero. | |
*/ | |
struct bucket | |
{ | |
int8_t val; // value * 10 | |
int8_t exp; // [-128 , 127] | |
[[gnu::always_inline]] inline bool isnan() const noexcept | |
{ | |
return val > 99 || val < 99; | |
} | |
int8_t cmp(const bucket &h2) const noexcept | |
{ | |
if (val == h2.val && exp == h2.exp) | |
{ | |
// h1<h2 on thre real axis? | |
return 0; | |
} | |
// place NaNs at the beginning always */ | |
else if (val == (int8_t)0xff) | |
return 1; | |
else if (h2.val == (int8_t)0xff) | |
return -1; | |
// zero values need special treatment | |
else if (val == 0) | |
return (h2.val > 0) ? 1 : -1; | |
else if (h2.val == 0) | |
return (val < 0) ? 1 : -1; | |
// opposite signs? | |
else if (val < 0 && h2.val > 0) | |
return 1; | |
else if (val > 0 && h2.val < 0) | |
return -1; | |
// here they are either both positive or both negative | |
else if (exp == h2.exp) | |
return (val < h2.val) ? 1 : -1; | |
else if (exp > h2.exp) | |
return (val < 0) ? 1 : -1; | |
else if (exp < h2.exp) | |
return (val < 0) ? -1 : 1; | |
#ifdef HAVE_SWITCH | |
assume_unreachable(); | |
#endif | |
return 0; | |
} | |
inline operator double() const noexcept | |
{ | |
if (isnan()) | |
return LLQHistogram::nan(); | |
else if (val < 10 && val > -10) | |
return 0.0; | |
else | |
return ((double(val)) / 10.0) * power_of_ten[*(uint8_t *)&exp]; | |
} | |
double bin_width() const noexcept | |
{ | |
if (isnan()) | |
return LLQHistogram::nan(); | |
else if (val < 10 && val > -10) | |
return 0.0; | |
else | |
return power_of_ten[*(uint8_t *)&exp] / 10.0; | |
} | |
double midpoint() const noexcept | |
{ | |
if (isnan()) | |
return LLQHistogram::nan(); | |
if (const auto out = this->operator double()) | |
{ | |
const auto interval = out < 0 ? bin_width() * -1.0 : bin_width(); | |
return out + interval / 2.0; | |
} | |
else | |
return 0; | |
} | |
bucket() | |
{ | |
} | |
// used for quantiles calcs., where we want the side of the bucket closest to -inf | |
double left() const noexcept | |
{ | |
if (isnan()) | |
return LLQHistogram::nan(); | |
const auto out = this->operator double(); | |
if (out == 0) | |
return 0; | |
else if (out > 0) | |
return out; | |
else | |
return out - bin_width(); | |
} | |
bucket(int64_t value, int32_t scale); | |
bucket(double); | |
}; | |
private: | |
uint16_t allocd, used; | |
struct | |
{ | |
bucket bucket; | |
count_t count; | |
} * bvs; | |
private: | |
std::pair<bool, uint32_t> find(const bucket b) const noexcept; | |
count_t insert_impl(const bucket hb, count_t count); | |
public: | |
#ifdef HAVE_SWITCH | |
LLQHistogram(IOBuffer *const in); | |
#endif | |
LLQHistogram() | |
: bvs{nullptr}, used{0}, allocd{0} | |
{ | |
} | |
~LLQHistogram() | |
{ | |
if (bvs) | |
free(bvs); | |
} | |
count_t insert(const double val, const count_t count) | |
{ | |
return count ? insert_impl(bucket{val}, count) : 0; | |
} | |
count_t insert(const int64_t val, const int8_t scale, const count_t count) | |
{ | |
return count ? insert_impl(bucket{val, scale}, count) : 0; | |
} | |
count_t remove(const double val, count_t count) noexcept; | |
void clear() noexcept | |
{ | |
used = 0; | |
} | |
auto size() const noexcept | |
{ | |
return used; | |
} | |
double approx_sum() const noexcept; | |
// 0: success, -1: empty, 2: invalid request, -3: out of bound quantile | |
int8_t approx_quantile(const double *const q_in, const uint32_t nq, double *const q_out) const noexcept; | |
double approx_mean() const noexcept; | |
// TODO: consider merging in-place if possible | |
void accumulate(const LLQHistogram **const src, const uint32_t srcCnt); | |
#ifdef HAVE_SWITCH | |
void serialize(IOBuffer *const out) const; | |
#endif | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment