Skip to content

Instantly share code, notes, and snippets.

@nmmmnu
Last active September 30, 2022 09:57
Show Gist options
  • Save nmmmnu/e1a0ad3d2a737ffc52d460e4814925b1 to your computer and use it in GitHub Desktop.
Save nmmmnu/e1a0ad3d2a737ffc52d460e4814925b1 to your computer and use it in GitHub Desktop.
High performance HyperLogLog for HM4
#include "hyperloglog.h"
#include "murmur_hash_64a.h"
#include <cmath>
namespace{
uint32_t getLeadingbits(uint64_t const hash, uint8_t bits){
return uint32_t( hash >> (64 - bits));
}
uint8_t countLeadingZeros(uint64_t const hash, uint8_t bits){
uint64_t const mask = (1 << bits) - 1;
uint64_t const x = (hash << bits) | mask;
auto const result = __builtin_clzll(x);
// assert(result < 0xff);
return static_cast<uint8_t>(result);
}
uint32_t countZeros(const uint8_t *data, uint32_t size) {
uint32_t zeros = 0;
for(size_t i = 0; i < size; ++i)
if (data[i] == 0)
++zeros;
return zeros;
}
constexpr static double calcAlpha(uint32_t m){
switch (m) {
case 16: return 0.673;
case 32: return 0.697;
case 64: return 0.709;
default: return 0.7213 / (1.0 + 1.079 / m);
}
}
}
namespace hyperloglog::hyperloglog_implementation{
CalcAddResult calcAdd(const char *s, size_t size, uint8_t bits){
uint64_t const hash = murmur_hash64a(s, size);
uint32_t const index = getLeadingbits (hash, bits);
uint8_t const rank = countLeadingZeros(hash, bits) + 1;
return { index, rank };
}
double estimate(const uint8_t *data, uint32_t size){
double sum = 0.0;
for(size_t i = 0; i < size; ++i)
sum += 1.0 / (1 << data[i]);
uint32_t const m = size;
double estimate = calcAlpha(m) * m * m / sum;
// small range correction
if (estimate <= 2.5 * m){
uint32_t zeros = countZeros(data, size);
if (zeros == 0){
// if no empty registers, use the estimate we already have
return estimate;
}else{
// balls and bins correction
return m * std::log(static_cast<double>(m)/ zeros);
}
}
static constexpr double pow_2_32 = uint64_t(2) << 31;
// large range correction
if (estimate > pow_2_32 / 30){
return ( - pow_2_32 ) * log(1.0 - (estimate / pow_2_32));
}
return estimate;
}
} // namespace hyperloglog::hyperloglog_implementation
#ifndef HYPER_LOG_LOG_H_
#define HYPER_LOG_LOG_H_
#include <cstdint>
#include <cassert>
#include <cstring>
#include <cstdlib>
#include <string_view>
#include <array>
namespace hyperloglog{
namespace hyperloglog_implementation{
struct CalcAddResult{
uint32_t index;
uint8_t rank;
};
CalcAddResult calcAdd(const char *s, size_t size, uint8_t bits);
double estimate(const uint8_t *data, uint32_t size);
}
class HyperLogLogOperationsRaw;
template<uint8_t Bits>
class HyperLogLogOperations;
struct HyperLogLogRaw{
uint8_t bits;
uint32_t m = 1 << bits;
constexpr HyperLogLogRaw(uint8_t bits) : bits(bits){
assert(bits > 4 && bits < 30);
}
// --------------------------
void clear(uint8_t *M) const{
memset(M, 0, m);
}
void load(uint8_t *M, const void *src) const{
memcpy(M, src, m);
}
void store(const uint8_t *M, void *dest) const{
memcpy(dest, M, m);
}
// --------------------------
void add(uint8_t *M, const char *s, size_t size) const{
auto const &[index, rank] = hyperloglog_implementation::calcAdd(s, size, bits);
M[index] = std::max(M[index], rank);
}
auto add(uint8_t *M, std::string_view s) const{
return add(M, s.data(), s.size());
}
auto add(uint8_t *M, const void *s, size_t size) const{
return add(M, static_cast<const char *>(s), size);
}
// --------------------------
double estimate(const uint8_t *M) const{
return hyperloglog_implementation::estimate(M, m);
}
void merge(uint8_t *M, const uint8_t *OM) const{
for(size_t i = 0; i < m; ++i)
M[i] = std::max(M[i], OM[i]);
}
// --------------------------
constexpr HyperLogLogOperationsRaw getOperations() const;
};
template<uint8_t Bits>
struct HyperLogLogConfig{
constexpr static size_t bits = HyperLogLogRaw{Bits}.bits;
constexpr static uint32_t m = HyperLogLogRaw{Bits}.m;
static_assert(bits > 4 && bits < 30);
};
template<uint8_t Bits>
struct HyperLogLog{
constexpr static uint8_t bits = HyperLogLogRaw{Bits}.bits;
constexpr static uint32_t m = HyperLogLogRaw{Bits}.m;
static_assert(bits > 4 && bits < 30);
// --------------------------
std::array<uint8_t, m> M;
// --------------------------
constexpr HyperLogLog(){
clear();
}
constexpr HyperLogLog(std::nullptr_t){
// no clear.
}
constexpr HyperLogLog(const void *src){
load(src);
}
// --------------------------
void clear(){
return HyperLogLogRaw{Bits}.clear(std::data(M));
}
void load(const void *src){
return HyperLogLogRaw{Bits}.load(std::data(M));
}
void store(void *dest) const{
return HyperLogLogRaw{Bits}.load(std::data(M), dest);
}
// --------------------------
template<typename ...Args>
void add(Args &&...args){
return HyperLogLogRaw{Bits}.add(std::data(M), args...);
}
// --------------------------
double estimate() const{
return HyperLogLogRaw{Bits}.estimate(std::data(M));
}
void merge(HyperLogLog const &other){
return HyperLogLogRaw{Bits}.merge(std::data(M), std::data(other.M));
}
// --------------------------
operator uint8_t *(){
return M.data();
}
operator const uint8_t *() const{
return M.data();
}
// --------------------------
constexpr static HyperLogLogOperations<Bits> getOperations();
};
class HyperLogLogOperationsRaw{
HyperLogLogRaw hll;
public:
constexpr HyperLogLogOperationsRaw(uint8_t bits) : hll(bits){
assert(bits > 4 && bits < 30);
}
public:
constexpr static double merge(uint8_t *){
return 0;
}
double merge(uint8_t *, const uint8_t *A) const{
return hll.estimate(A);
}
double merge(uint8_t *_, const uint8_t *A, const uint8_t *B) const{
hll.load (_, A);
hll.merge(_, B);
return hll.estimate(_);
}
double merge(uint8_t *_, const uint8_t *A, const uint8_t *B, const uint8_t *C) const{
hll.load (_, A);
hll.merge(_, B);
hll.merge(_, C);
return hll.estimate(_);
}
double merge(uint8_t *_, const uint8_t *A, const uint8_t *B, const uint8_t *C, const uint8_t *D) const{
hll.load (_, A);
hll.merge(_, B);
hll.merge(_, C);
hll.merge(_, D);
return hll.estimate(_);
}
private:
template<typename ...Args>
auto e(uint8_t *_, Args &&...args) const{
return merge(_, args...);
}
public:
constexpr static double intersect(uint8_t *){
return 0;
}
double intersect(uint8_t *, const uint8_t *A) const{
return hll.estimate(A);
}
double intersect(uint8_t *_, const uint8_t *A, const uint8_t *B) const{
// ^ = intersect, u = union
// A ^ B =
// + (A) + (B)
// - (A u B)
// 3 calcs total
return
+ e(_, A) + e(_, B)
- e(_, A, B)
;
}
double intersect(uint8_t *_, const uint8_t *A, const uint8_t *B, const uint8_t *C) const{
// ^ = intersect, u = union
// A ^ B ^ C =
// + (A) + (B) + (C)
// - (A u B) - (B u C) - (C u A)
// + (A u B u C)
// 7 calcs total
return
+ e(_, A) + e(_, B) + e(_, C)
- e(_, A, B) - e(_, B, C) - e(_, C, A)
+ e(_, A, B, C)
;
}
double intersect(uint8_t *_, const uint8_t *A, const uint8_t *B, const uint8_t *C, const uint8_t *D) const{
// https://upwikibg.top/wiki/Inclusion%E2%80%93exclusion_principle
// https://math.stackexchange.com/questions/688019/what-is-the-inclusion-exclusion-principle-for-4-sets
// ^ = intersect, u = union
// A ^ B ^ C ^ D =
// + (A) + (B) + (C) + (D)
// - (A u B) - (A u C) - (A u D) - (B u C) - (B u D) - (C u D)
// + (A u B u C) + (A u B u D) + (A u C u D) + (B u C u D)
// - (A u B u C u D)
// 15 calcs total
return
+ e(_, A) + e(_, B) + e(_, C) + e(_, D)
- e(_, A, B) - e(_, A, C) - e(_, A, D) - e(_, B, C) - e(_, B, D) - e(_, C, D)
+ e(_, A, B, C) + e(_, A, B, D) + e(_, A, C, D) + e(_, B, C, D)
- e(_, A, B, C, D)
;
}
};
template<uint8_t Bits>
struct HyperLogLogOperations{
constexpr static uint8_t bits = HyperLogLogRaw{Bits}.bits;
constexpr static uint32_t m = HyperLogLogRaw{Bits}.m;
static_assert(bits > 4 && bits < 30);
using HLL = HyperLogLog<Bits>;
public:
constexpr static double merge(){
return 0;
}
static double merge(const uint8_t *a){
HyperLogLogRaw hll{bits};
return hll.estimate(a);
}
static double merge(const uint8_t *a, const uint8_t *b){
return merge__(a, b);
}
static double merge(const uint8_t *a, const uint8_t *b, const uint8_t *c){
return merge__(a, b, c);
}
static double merge(const uint8_t *a, const uint8_t *b, const uint8_t *c, const uint8_t *d){
return merge__(a, b, c, d);
}
public:
constexpr static double intersect(){
return 0;
}
static double intersect(const uint8_t *a){
HyperLogLogRaw hll{bits};
return hll.estimate(a);
}
static double intersect(const uint8_t *a, const uint8_t *b){
return intersect__(a, b);
}
static double intersect(const uint8_t *a, const uint8_t *b, const uint8_t *c){
return intersect__(a, b, c);
}
static double intersect(const uint8_t *a, const uint8_t *b, const uint8_t *c, const uint8_t *d){
return intersect__(a, b, c, d);
}
private:
template<typename ...Args>
static auto merge__(Args &&...args){
// static_assert(sizeof...(args) >= 0 && sizeof...(args) <= 4);
std::array<uint8_t, m> tmp;
HyperLogLogRaw hll{bits};
auto operations = hll.getOperations();
return operations.merge(tmp.data(), args...);
}
template<typename ...Args>
static auto intersect__(Args &&...args){
// static_assert(sizeof...(args) >= 0 && sizeof...(args) <= 4);
std::array<uint8_t, m> tmp;
HyperLogLogRaw hll{bits};
auto operations = hll.getOperations();
return operations.intersect(tmp.data(), args...);
}
};
constexpr auto HyperLogLogRaw::getOperations() const -> HyperLogLogOperationsRaw{
return HyperLogLogOperationsRaw{bits};
}
template<uint8_t Bits>
constexpr auto HyperLogLog<Bits>::getOperations() -> HyperLogLogOperations<Bits>{
return HyperLogLogOperations<Bits>{};
}
} // namespace hyperloglog
#endif
#include "hyperloglog.h"
#include <cstdio>
#include <cmath>
using namespace hyperloglog;
constexpr uint8_t bits = 10;
using MyHLL = HyperLogLog<bits>;
constexpr int val[] {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 20, 30, 40, 50, 60, 70, 80, 90,
100, 200, 300, 400, 500, 600, 700, 800, 900,
1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000,
10'000,
100'000,
1'000'000,
10'000'000,
100'000'000,
// 1'000'000'000
};
int main(int argc, char **argv) {
MyHLL A;
printf("size %u\n", A.m);
for(int x : val){
for(size_t i = 0; double(i) < x; ++i)
A.add(
&i,
sizeof(i)
);
// hll.print();
auto const est = A.estimate();
printf("%10d %14.4f %14.4f\n", x, est, 100 * (x - est) / x);
}
MyHLL users_with_email;
users_with_email.add("john");
users_with_email.add("ivan");
users_with_email.add("boris");
users_with_email.add("pete");
users_with_email.add("boko");
users_with_email.add("pete");
MyHLL users_with_skype;
users_with_skype.add("john");
users_with_skype.add("ivan");
users_with_skype.add("boris");
// users_with_skype.add("pete");
users_with_skype.add("clara");
users_with_skype.add("mimi");
MyHLL users_from_la;
users_from_la.add("john");
users_from_la.add("ivan");
// users_from_la.add("boris");
// users_from_la.add("pete");
users_from_la.add("vicky");
users_from_la.add("pepi");
MyHLL users_male;
users_male.add("john");
// users_male.add("ivan");
// users_male.add("boris");
// users_male.add("pete");
users_male.add("donald");
users_male.add("silvester");
auto mr = [](auto a){
// return round(a);
return a;
};
const char *mask = "%-40s %14.4f\n";
auto hll_op = MyHLL::getOperations();
printf(mask, "users: email & skype", mr(hll_op.intersect(users_with_email, users_with_skype )) );
printf(mask, "users: email & skype & la", mr(hll_op.intersect(users_with_email, users_with_skype, users_from_la )) );
printf(mask, "users: email & skype & la & male", mr(hll_op.intersect(users_with_email, users_with_skype, users_from_la, users_male )) );
}
#include "hyperloglog.h"
#include <cstdio>
#include <cmath>
using namespace hyperloglog;
constexpr uint8_t bits = 10;
constexpr auto MyHLLM = HyperLogLogConfig<bits>::m;
constexpr int val[] {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 20, 30, 40, 50, 60, 70, 80, 90,
100, 200, 300, 400, 500, 600, 700, 800, 900,
1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000,
10'000,
100'000,
1'000'000,
10'000'000,
100'000'000,
// 1'000'000'000
};
int main(int argc, char **argv) {
auto hll = HyperLogLogRaw{bits};
uint8_t A[MyHLLM] = {0};
printf("size %u\n", hll.m);
for(int x : val){
for(size_t i = 0; double(i) < x; ++i)
hll.add(
A,
&i,
sizeof(i)
);
// hll.print();
auto const est = hll.estimate(A);
printf("%10d %14.4f %14.4f\n", x, est, 100 * (x - est) / x);
}
uint8_t users_with_email[MyHLLM] = {0};
hll.add(users_with_email, "john");
hll.add(users_with_email, "ivan");
hll.add(users_with_email, "boris");
hll.add(users_with_email, "pete");
hll.add(users_with_email, "boko");
hll.add(users_with_email, "pete");
uint8_t users_with_skype[MyHLLM] = {0};
hll.add(users_with_skype, "john");
hll.add(users_with_skype, "ivan");
hll.add(users_with_skype, "boris");
// hll.add(users_with_skype, "pete");
hll.add(users_with_skype, "clara");
hll.add(users_with_skype, "mimi");
uint8_t users_from_la[MyHLLM] = {0};
hll.add(users_from_la, "john");
hll.add(users_from_la, "ivan");
// hll.add(users_from_la, "boris");
// hll.add(users_from_la, "pete");
hll.add(users_from_la, "vicky");
hll.add(users_from_la, "pepi");
uint8_t users_male[MyHLLM] = {0};
hll.add(users_male, "john");
// hll.add(users_male, "ivan");
// hll.add(users_male, "boris");
// hll.add(users_male, "pete");
hll.add(users_male, "donald");
hll.add(users_male, "silvester");
auto mr = [](auto a){
// return round(a);
return a;
};
const char *mask = "%-40s %14.4f\n";
auto hll_op = hll.getOperations();
uint8_t tmp[MyHLLM];
printf(mask, "users: email & skype", mr(hll_op.intersect(tmp, users_with_email, users_with_skype )) );
printf(mask, "users: email & skype & la", mr(hll_op.intersect(tmp, users_with_email, users_with_skype, users_from_la )) );
printf(mask, "users: email & skype & la & male", mr(hll_op.intersect(tmp, users_with_email, users_with_skype, users_from_la, users_male )) );
}
#include "murmur_hash_64a.h"
// Based on
// https://github.com/hhrhhr/MurmurHash-for-Lua
// https://github.com/hhrhhr/MurmurHash-for-Lua/blob/master/MurmurHash64A.c
uint64_t murmur_hash64a(const void *key, size_t size, uint64_t seed){
const uint64_t m = 0xc6a4a7935bd1e995LLU;
const int r = 47;
uint64_t h = seed ^ (size * m);
const uint64_t *data = (const uint64_t *)key;
const uint64_t *end = (size >> 3) + data;
while(data != end){
uint64_t k = *data++;
k *= m;
k ^= k >> r;
k *= m;
h ^= k;
h *= m;
}
const unsigned char * data2 = (const unsigned char *)data;
switch(size & 7){
case 7: h ^= uint64_t{data2[6]} << (6 * 8);
case 6: h ^= uint64_t{data2[5]} << (5 * 8);
case 5: h ^= uint64_t{data2[4]} << (4 * 8);
case 4: h ^= uint64_t{data2[3]} << (3 * 8);
case 3: h ^= uint64_t{data2[2]} << (2 * 8);
case 2: h ^= uint64_t{data2[1]} << (1 * 8);
case 1: h ^= uint64_t{data2[0]} << (0 * 8);
h *= m;
};
h ^= h >> r;
h *= m;
h ^= h >> r;
return h;
}
#ifndef MURMUR_HASH_64A_H_
#define MURMUR_HASH_64A_H_
#include <cstdint>
#include <string_view>
uint64_t murmur_hash64a(const void *key, size_t size, uint64_t seed = 0);
inline uint64_t murmur_hash64a(std::string_view s, uint64_t seed = 0){
return murmur_hash64a(s.data(), s.size(), seed);
}
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment