markpapadakis · September 19, 2016 19:35
diff --git a/llqhistogram.h b/llqhistogram.h
 // https://github.com/circonus-labs/libcircllhist
 // https://support.circonus.com/support/solutions/articles/6000044580-architecture-of-an-analytics-and-storage-engine-for-time-series-data
 //
 // log-linear quantized histogram of data
 // data-loss, not sample loss, accuracy loss.
 // > strategically introducing time based error(stored by minute), upto 30 seconds of time error, value error that's well quantified, it's 2.5% on the upper by end by doing log-linear quantization of the values that
 // > are coming in, so 112 becomes 110, it goes into the 110 to 120 bucket, and so we have some error in there.
 #pragma once
 #ifdef HAVE_SWITCH
 #include <switch.h>
 #else
 #include <stdint.h>
 #include <assert.h>
 #include <utility>
 #include <stdlib.h>
 #endif

 class LLQHistogram
 {
        using count_t = uint32_t;

      private:
        static constexpr uint32_t MaxHistBins{2 + 2 * 90 * 256};
        static const double power_of_ten[256];
        static constexpr uint32_t DefaultHistSize{128};

      public:
        static inline double nan()
        {
                uintptr_t r{0x7fffffffffffffff};

                return *(double *)&r;
        };

        /* These 16 bits give us:
 	 * Allows us to express exactly:
 	 *   given A.B == val / 10;
 	 *   A.B e X
 	 *   where -9 <= A <= 9, but A != 0
 	 *         0 <= B <= 9
 	 *         X is all possible exp (above)
 	 *  if A is zero, the overall value is zero.
 	 */
        struct bucket
        {
                int8_t val; // value * 10
                int8_t exp; // [-128 , 127]

 		[[gnu::always_inline]] inline bool isnan() const noexcept
 		{
 			return val > 99 || val < 99;
 		}

                int8_t cmp(const bucket &h2) const noexcept
                {
                        if (val == h2.val && exp == h2.exp)
                        {
                                // h1<h2 on thre real axis?
                                return 0;
                        }
                        // place NaNs at the beginning always */
                        else if (val == (int8_t)0xff)
                                return 1;
                        else if (h2.val == (int8_t)0xff)
                                return -1;
                        // zero values need special treatment
                        else if (val == 0)
                                return (h2.val > 0) ? 1 : -1;
                        else if (h2.val == 0)
                                return (val < 0) ? 1 : -1;
                        // opposite signs?
                        else if (val < 0 && h2.val > 0)
                                return 1;
                        else if (val > 0 && h2.val < 0)
                                return -1;
                        // here they are either both positive or both negative
                        else if (exp == h2.exp)
                                return (val < h2.val) ? 1 : -1;
                        else if (exp > h2.exp)
                                return (val < 0) ? 1 : -1;
                        else if (exp < h2.exp)
                                return (val < 0) ? -1 : 1;

 #ifdef HAVE_SWITCH
                        assume_unreachable();
 #endif
                        return 0;
                }

                inline operator double() const noexcept
                {
 			if (isnan())
                                return LLQHistogram::nan();
                        else if (val < 10 && val > -10)
                                return 0.0;
                        else
                                return ((double(val)) / 10.0) * power_of_ten[*(uint8_t *)&exp];
                }

                double bin_width() const noexcept
                {
 			if (isnan())
                                return LLQHistogram::nan();
                        else if (val < 10 && val > -10)
                                return 0.0;
                        else
                                return power_of_ten[*(uint8_t *)&exp] / 10.0;
                }

                double midpoint() const noexcept
                {
 			if (isnan())
                                return LLQHistogram::nan();

                        if (const auto out = this->operator double())
                        {
                                const auto interval = out < 0 ? bin_width() * -1.0 : bin_width();

                                return out + interval / 2.0;
                        }
                        else
                                return 0;
                }

                bucket()
                {
                }

                // used for quantiles calcs., where we want the side of the bucket closest to -inf
                double left() const noexcept
                {
 			if (isnan())
                                return LLQHistogram::nan();

                        const auto out = this->operator double();

                        if (out == 0)
                                return 0;
                        else if (out > 0)
                                return out;
                        else
                                return out - bin_width();
                }

                bucket(int64_t value, int32_t scale);

                bucket(double);
        };

      private:
        uint16_t allocd, used;

        struct
        {
                bucket bucket;
                count_t count;
        } * bvs;

      private:
        std::pair<bool, uint32_t> find(const bucket b) const noexcept;

        count_t insert_impl(const bucket hb, count_t count);

      public:
 #ifdef HAVE_SWITCH
        LLQHistogram(IOBuffer *const in);
 #endif

        LLQHistogram()
            : bvs{nullptr}, used{0}, allocd{0}
        {
        }

        ~LLQHistogram()
        {
                if (bvs)
                        free(bvs);
        }

        count_t insert(const double val, const count_t count)
        {
                return count ? insert_impl(bucket{val}, count) : 0;
        }

        count_t insert(const int64_t val, const int8_t scale, const count_t count)
        {
                return count ? insert_impl(bucket{val, scale}, count) : 0;
        }

        count_t remove(const double val, count_t count) noexcept;

        void clear() noexcept
        {
                used = 0;
        }

        auto size() const noexcept
        {
                return used;
        }

        double approx_sum() const noexcept;

        // 0: success, -1: empty, 2: invalid request, -3: out of bound quantile
        int8_t approx_quantile(const double *const q_in, const uint32_t nq, double *const q_out) const noexcept;

        double approx_mean() const noexcept;

        // TODO: consider merging in-place if possible
        void accumulate(const LLQHistogram **const src, const uint32_t srcCnt);

 #ifdef HAVE_SWITCH
        void serialize(IOBuffer *const out) const;
 #endif
 };
	// https://github.com/circonus-labs/libcircllhist
	// https://support.circonus.com/support/solutions/articles/6000044580-architecture-of-an-analytics-and-storage-engine-for-time-series-data
	//
	// log-linear quantized histogram of data
	// data-loss, not sample loss, accuracy loss.
	// > strategically introducing time based error(stored by minute), upto 30 seconds of time error, value error that's well quantified, it's 2.5% on the upper by end by doing log-linear quantization of the values that
	// > are coming in, so 112 becomes 110, it goes into the 110 to 120 bucket, and so we have some error in there.
	#pragma once
	#ifdef HAVE_SWITCH
	#include <switch.h>
	#else
	#include <stdint.h>
	#include <assert.h>
	#include <utility>
	#include <stdlib.h>
	#endif

	class LLQHistogram
	{
	using count_t = uint32_t;

	private:
	static constexpr uint32_t MaxHistBins{2 + 2 * 90 * 256};
	static const double power_of_ten[256];
	static constexpr uint32_t DefaultHistSize{128};

	public:
	static inline double nan()
	{
	uintptr_t r{0x7fffffffffffffff};

	return (double )&r;
	};

	/* These 16 bits give us:
	* Allows us to express exactly:
	* given A.B == val / 10;
	* A.B e X
	* where -9 <= A <= 9, but A != 0
	* 0 <= B <= 9
	* X is all possible exp (above)
	* if A is zero, the overall value is zero.
	*/
	struct bucket
	{
	int8_t val; // value * 10
	int8_t exp; // [-128 , 127]

	[[gnu::always_inline]] inline bool isnan() const noexcept
	{
	return val > 99 \|\| val < 99;
	}

	int8_t cmp(const bucket &h2) const noexcept
	{
	if (val == h2.val && exp == h2.exp)
	{
	// h1<h2 on thre real axis?
	return 0;
	}
	// place NaNs at the beginning always */
	else if (val == (int8_t)0xff)
	return 1;
	else if (h2.val == (int8_t)0xff)
	return -1;
	// zero values need special treatment
	else if (val == 0)
	return (h2.val > 0) ? 1 : -1;
	else if (h2.val == 0)
	return (val < 0) ? 1 : -1;
	// opposite signs?
	else if (val < 0 && h2.val > 0)
	return 1;
	else if (val > 0 && h2.val < 0)
	return -1;
	// here they are either both positive or both negative
	else if (exp == h2.exp)
	return (val < h2.val) ? 1 : -1;
	else if (exp > h2.exp)
	return (val < 0) ? 1 : -1;
	else if (exp < h2.exp)
	return (val < 0) ? -1 : 1;

	#ifdef HAVE_SWITCH
	assume_unreachable();
	#endif
	return 0;
	}

	inline operator double() const noexcept
	{
	if (isnan())
	return LLQHistogram::nan();
	else if (val < 10 && val > -10)
	return 0.0;
	else
	return ((double(val)) / 10.0) * power_of_ten[(uint8_t )&exp];
	}

	double bin_width() const noexcept
	{
	if (isnan())
	return LLQHistogram::nan();
	else if (val < 10 && val > -10)
	return 0.0;
	else
	return power_of_ten[(uint8_t )&exp] / 10.0;
	}

	double midpoint() const noexcept
	{
	if (isnan())
	return LLQHistogram::nan();

	if (const auto out = this->operator double())
	{
	const auto interval = out < 0 ? bin_width() * -1.0 : bin_width();

	return out + interval / 2.0;
	}
	else
	return 0;
	}

	bucket()
	{
	}

	// used for quantiles calcs., where we want the side of the bucket closest to -inf
	double left() const noexcept
	{
	if (isnan())
	return LLQHistogram::nan();

	const auto out = this->operator double();

	if (out == 0)
	return 0;
	else if (out > 0)
	return out;
	else
	return out - bin_width();
	}

	bucket(int64_t value, int32_t scale);

	bucket(double);
	};

	private:
	uint16_t allocd, used;

	struct
	{
	bucket bucket;
	count_t count;
	} * bvs;

	private:
	std::pair<bool, uint32_t> find(const bucket b) const noexcept;

	count_t insert_impl(const bucket hb, count_t count);

	public:
	#ifdef HAVE_SWITCH
	LLQHistogram(IOBuffer *const in);
	#endif

	LLQHistogram()
	: bvs{nullptr}, used{0}, allocd{0}
	{
	}

	~LLQHistogram()
	{
	if (bvs)
	free(bvs);
	}

	count_t insert(const double val, const count_t count)
	{
	return count ? insert_impl(bucket{val}, count) : 0;
	}

	count_t insert(const int64_t val, const int8_t scale, const count_t count)
	{
	return count ? insert_impl(bucket{val, scale}, count) : 0;
	}

	count_t remove(const double val, count_t count) noexcept;

	void clear() noexcept
	{
	used = 0;
	}

	auto size() const noexcept
	{
	return used;
	}

	double approx_sum() const noexcept;

	// 0: success, -1: empty, 2: invalid request, -3: out of bound quantile
	int8_t approx_quantile(const double const q_in, const uint32_t nq, double const q_out) const noexcept;

	double approx_mean() const noexcept;

	// TODO: consider merging in-place if possible
	void accumulate(const LLQHistogram **const src, const uint32_t srcCnt);

	#ifdef HAVE_SWITCH
	void serialize(IOBuffer *const out) const;
	#endif
	};