Skip to content

Instantly share code, notes, and snippets.

@xeioex
Created January 8, 2026 05:41
Show Gist options
  • Select an option

  • Save xeioex/0450f875538c86a8b3419d0a32383396 to your computer and use it in GitHub Desktop.

Select an option

Save xeioex/0450f875538c86a8b3419d0a32383396 to your computer and use it in GitHub Desktop.
package main
import (
"context"
"flag"
"fmt"
"io"
"math/rand"
"net/http"
"sync"
"sync/atomic"
"time"
)
type Stats struct {
requests atomic.Int64
errors atomic.Int64
totalLatency atomic.Int64
minLatency atomic.Int64
maxLatency atomic.Int64
bytesReceived atomic.Int64
uniqueIDs sync.Map
idCount atomic.Int64
}
type Config struct {
targetURL string
numUniqueIDs int
workers int
duration time.Duration
requestsPerID int
headerName string
distribution string
zipfS float64
}
func main() {
config := parseFlags()
fmt.Printf("Benchmark Configuration:\n")
fmt.Printf(" Target URL: %s\n", config.targetURL)
fmt.Printf(" Unique Visitor IDs: %d\n", config.numUniqueIDs)
fmt.Printf(" Workers: %d\n", config.workers)
fmt.Printf(" Duration: %s\n", config.duration)
fmt.Printf(" Header Name: %s\n", config.headerName)
fmt.Printf(" Distribution: %s", config.distribution)
if config.distribution == "zipf" {
fmt.Printf(" (s=%.2f)", config.zipfS)
}
fmt.Printf("\n")
if config.requestsPerID > 0 {
fmt.Printf(" Requests per ID: %d\n", config.requestsPerID)
} else {
fmt.Printf(" Requests per ID: unlimited (random selection)\n")
}
fmt.Printf("\n")
stats := &Stats{}
stats.minLatency.Store(int64(time.Hour))
ctx, cancel := context.WithTimeout(context.Background(), config.duration)
defer cancel()
runBenchmark(ctx, config, stats)
printResults(stats, config)
}
func parseFlags() *Config {
config := &Config{}
flag.StringVar(&config.targetURL, "url", "http://127.0.0.1:8080/",
"Target URL to benchmark")
flag.IntVar(&config.numUniqueIDs, "unique-ids", 1000,
"Number of unique visitor IDs")
flag.IntVar(&config.workers, "workers", 100,
"Number of concurrent workers")
flag.DurationVar(&config.duration, "duration", 10*time.Second,
"Benchmark duration")
flag.IntVar(&config.requestsPerID, "requests-per-id", 0,
"Requests per ID (0 = unlimited with random selection)")
flag.StringVar(&config.headerName, "header", "X-Visitor-ID",
"HTTP header name for visitor ID")
flag.StringVar(&config.distribution, "distribution", "uniform",
"ID selection distribution: uniform, zipf")
flag.Float64Var(&config.zipfS, "zipf-s", 1.5,
"Zipf distribution parameter (higher = more skewed)")
flag.Parse()
return config
}
func generateVisitorIDs(numIDs int) []string {
ids := make([]string, numIDs)
for i := 0; i < numIDs; i++ {
ids[i] = fmt.Sprintf("visitor-%d", i+1)
}
return ids
}
type IDSelector interface {
SelectID() string
}
type UniformSelector struct {
ids []string
rng *rand.Rand
mu sync.Mutex
}
func NewUniformSelector(ids []string) *UniformSelector {
return &UniformSelector{
ids: ids,
rng: rand.New(rand.NewSource(time.Now().UnixNano())),
}
}
func (s *UniformSelector) SelectID() string {
s.mu.Lock()
defer s.mu.Unlock()
return s.ids[s.rng.Intn(len(s.ids))]
}
type ZipfSelector struct {
ids []string
zipf *rand.Zipf
mu sync.Mutex
}
func NewZipfSelector(ids []string, s float64) *ZipfSelector {
rng := rand.New(rand.NewSource(time.Now().UnixNano()))
return &ZipfSelector{
ids: ids,
zipf: rand.NewZipf(rng, s, 1.0, uint64(len(ids)-1)),
}
}
func (s *ZipfSelector) SelectID() string {
s.mu.Lock()
defer s.mu.Unlock()
return s.ids[s.zipf.Uint64()]
}
func createHTTPClient() *http.Client {
return &http.Client{
Transport: &http.Transport{
MaxIdleConns: 100,
MaxIdleConnsPerHost: 100,
IdleConnTimeout: 90 * time.Second,
},
Timeout: 10 * time.Second,
}
}
func runBenchmark(ctx context.Context, config *Config, stats *Stats) {
visitorIDs := generateVisitorIDs(config.numUniqueIDs)
fmt.Printf("Generated %d unique visitor IDs\n", len(visitorIDs))
var selector IDSelector
if config.distribution == "zipf" {
selector = NewZipfSelector(visitorIDs, config.zipfS)
fmt.Printf("Using Zipf distribution (popular IDs will repeat more)\n")
} else {
selector = NewUniformSelector(visitorIDs)
fmt.Printf("Using uniform distribution\n")
}
fmt.Printf("Starting benchmark...\n\n")
var wg sync.WaitGroup
idChannel := make(chan string, config.workers*2)
for i := 0; i < config.workers; i++ {
wg.Add(1)
go func(workerID int) {
defer wg.Done()
worker(ctx, workerID, config, idChannel, stats)
}(i)
}
go func() {
defer close(idChannel)
if config.requestsPerID > 0 {
for _, id := range visitorIDs {
for j := 0; j < config.requestsPerID; j++ {
select {
case idChannel <- id:
case <-ctx.Done():
return
}
}
}
} else {
for {
select {
case idChannel <- selector.SelectID():
case <-ctx.Done():
return
}
}
}
}()
go func() {
ticker := time.NewTicker(1 * time.Second)
defer ticker.Stop()
lastRequests := int64(0)
for {
select {
case <-ticker.C:
current := stats.requests.Load()
rps := current - lastRequests
lastRequests = current
uniqueCount := stats.idCount.Load()
fmt.Printf(
"Progress: %d requests, %d RPS, %d unique IDs, %d errors\n",
current, rps, uniqueCount, stats.errors.Load())
case <-ctx.Done():
return
}
}
}()
wg.Wait()
}
func worker(ctx context.Context, id int, config *Config,
idChannel <-chan string, stats *Stats) {
client := createHTTPClient()
for {
select {
case visitorID, ok := <-idChannel:
if !ok {
return
}
if _, exists := stats.uniqueIDs.LoadOrStore(visitorID, true); !exists {
stats.idCount.Add(1)
}
makeRequest(client, config.targetURL, config.headerName,
visitorID, stats)
case <-ctx.Done():
return
}
}
}
func makeRequest(client *http.Client, url, headerName, visitorID string,
stats *Stats) {
start := time.Now()
req, err := http.NewRequest("GET", url, nil)
if err != nil {
stats.errors.Add(1)
return
}
req.Header.Set(headerName, visitorID)
resp, err := client.Do(req)
if err != nil {
stats.errors.Add(1)
return
}
defer resp.Body.Close()
bytes, err := io.Copy(io.Discard, resp.Body)
if err != nil {
stats.errors.Add(1)
return
}
latency := time.Since(start)
stats.requests.Add(1)
stats.bytesReceived.Add(bytes)
stats.totalLatency.Add(int64(latency))
for {
current := stats.minLatency.Load()
if int64(latency) >= current {
break
}
if stats.minLatency.CompareAndSwap(current, int64(latency)) {
break
}
}
for {
current := stats.maxLatency.Load()
if int64(latency) <= current {
break
}
if stats.maxLatency.CompareAndSwap(current, int64(latency)) {
break
}
}
}
func printResults(stats *Stats, config *Config) {
requests := stats.requests.Load()
errors := stats.errors.Load()
totalLatency := stats.totalLatency.Load()
minLatency := stats.minLatency.Load()
maxLatency := stats.maxLatency.Load()
bytes := stats.bytesReceived.Load()
uniqueCount := stats.idCount.Load()
fmt.Printf("\n=== Results ===\n")
fmt.Printf("Duration: %s\n", config.duration)
fmt.Printf("Requests: %d\n", requests)
fmt.Printf("Errors: %d\n", errors)
fmt.Printf("\nUnique Visitor IDs:\n")
fmt.Printf(" Configured: %d\n", config.numUniqueIDs)
fmt.Printf(" Actually Sent: %d\n", uniqueCount)
if requests > 0 {
avgLatency := time.Duration(totalLatency / requests)
rps := float64(requests) / config.duration.Seconds()
throughput := float64(bytes) / config.duration.Seconds() / 1024 / 1024
fmt.Printf("\nSuccess Rate: %.2f%%\n",
float64(requests-errors)/float64(requests)*100)
fmt.Printf("\nThroughput:\n")
fmt.Printf(" Requests/sec: %.2f\n", rps)
fmt.Printf(" Transfer/sec: %.2f MB\n", throughput)
fmt.Printf("\nLatency:\n")
fmt.Printf(" Min: %s\n", time.Duration(minLatency))
fmt.Printf(" Avg: %s\n", avgLatency)
fmt.Printf(" Max: %s\n", time.Duration(maxLatency))
fmt.Printf("\nData:\n")
fmt.Printf(" Total transferred: %.2f MB\n",
float64(bytes)/1024/1024)
if uniqueCount > 0 {
fmt.Printf("\nID Distribution:\n")
fmt.Printf(" Avg requests per ID: %.2f\n",
float64(requests)/float64(uniqueCount))
}
}
}
# gcc -fPIC -O2 -I/path/to/quickjs/ -shared -o murmur3.so murmur3.c
js_load_http_native_module /home/xeioex/workspace/nginx/nginx/conf/murmur3.so as murmur3.so;
error_log /dev/stdout info;
daemon off;
worker_processes auto;
events {
worker_connections 16384;
}
http {
js_engine qjs;
js_import main from hll.js;
js_shared_array_zone zone=hll:32k;
js_set $track_visitor main.track_visitor;
log_format hll '$remote_addr - $time_local "$request"$track_visitor';
access_log /dev/stdout hll;
server {
listen 8080;
location / {
return 200 'Hello, HLL!';
}
location /unique_count {
allow 127.0.0.1;
deny all;
js_content main.get_unique_count;
}
}
}
import { murmur3 } from 'murmur3.so';
class HyperLogLog {
constructor(buffer, precision = 14) {
this.registers = new Uint8Array(buffer);
this.m = 1 << precision;
this.precision = precision;
if (this.registers.length < this.m) {
throw new Error('Buffer too small');
}
this.alpha = this.m === 16 ? 0.673 :
this.m === 32 ? 0.697 :
this.m === 64 ? 0.709 :
0.7213 / (1 + 1.079 / this.m);
}
add(item) {
const hash = murmur3(item);
const idx = hash & (this.m - 1);
const w = hash >>> this.precision;
const leadingZeros = w === 0 ? 32 - this.precision + 1 :
Math.clz32(w) - this.precision + 1;
while (true) {
const old = Atomics.load(this.registers, idx);
if (leadingZeros <= old) break;
if (Atomics.compareExchange(this.registers, idx, old,
leadingZeros) === old) {
break;
}
}
}
count() {
let sum = 0;
let zeros = 0;
for (let i = 0; i < this.m; i++) {
const val = Atomics.load(this.registers, i);
sum += Math.pow(2, -val);
if (val === 0) zeros++;
}
let estimate = this.alpha * this.m * this.m / sum;
if (estimate <= 2.5 * this.m) {
if (zeros > 0) {
estimate = this.m * Math.log(this.m / zeros);
}
} else if (estimate > (2**32) / 30) {
estimate = -(2**32) * Math.log(1 - estimate / (2**32));
}
return Math.round(estimate);
}
}
const hll = new HyperLogLog(ngx.sharedArray.hll.buffer);
function track_visitor(r) {
hll.add(r.headersIn['X-Visitor-ID'] || r.remoteAddress);
return "";
}
function get_unique_count(r) {
const count = hll.count();
r.headersOut['Content-Type'] = 'application/json';
return r.return(200, JSON.stringify({
unique_visitors: count,
error_rate: '~2%'
}));
}
export default { track_visitor, get_unique_count };
/*
* MurmurHash3 native extension for QuickJS
* Copyright (C) F5, Inc.
*/
#include <quickjs.h>
#define countof(x) (sizeof(x) / sizeof((x)[0]))
static inline uint32_t
murmur3_scramble(uint32_t k)
{
k *= 0xcc9e2d51;
k = (k << 15) | (k >> 17);
k *= 0x1b873593;
return k;
}
static uint32_t
murmur3_32(const uint8_t *key, size_t len, uint32_t seed)
{
uint32_t h, k;
size_t i;
h = seed;
k = 0;
/* Read in groups of 4. */
for (i = len >> 2; i; i--) {
k = key[0];
k |= key[1] << 8;
k |= key[2] << 16;
k |= key[3] << 24;
key += 4;
h ^= murmur3_scramble(k);
h = (h << 13) | (h >> 19);
h = h * 5 + 0xe6546b64;
}
/* Read the rest. */
k = 0;
for (i = len & 3; i; i--) {
k <<= 8;
k |= key[i - 1];
}
h ^= murmur3_scramble(k);
/* Finalize. */
h ^= len;
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}
static JSValue
js_murmur3(JSContext *ctx, JSValueConst this_val, int argc,
JSValueConst *argv)
{
size_t len;
uint32_t seed, hash;
const char *str;
seed = 0;
str = JS_ToCStringLen(ctx, &len, argv[0]);
if (str == NULL) {
return JS_EXCEPTION;
}
if (argc > 1 && !JS_IsUndefined(argv[1])) {
if (JS_ToUint32(ctx, &seed, argv[1]) < 0) {
JS_FreeCString(ctx, str);
return JS_EXCEPTION;
}
}
hash = murmur3_32((const uint8_t *) str, len, seed);
JS_FreeCString(ctx, str);
return JS_NewUint32(ctx, hash);
}
static const JSCFunctionListEntry js_murmur_funcs[] = {
JS_CFUNC_DEF("murmur3", 2, js_murmur3),
};
static int
js_murmur_init(JSContext *ctx, JSModuleDef *m)
{
return JS_SetModuleExportList(ctx, m, js_murmur_funcs,
countof(js_murmur_funcs));
}
JSModuleDef *
js_init_module(JSContext *ctx, const char *module_name)
{
int rc;
JSModuleDef *m;
m = JS_NewCModule(ctx, module_name, js_murmur_init);
if (!m) {
return NULL;
}
rc = JS_AddModuleExportList(ctx, m, js_murmur_funcs,
countof(js_murmur_funcs));
if (rc < 0) {
return NULL;
}
/*
rc = JS_AddModuleExport(ctx, m, "default");
if (rc < 0) {
return NULL;
}
*/
return m;
}

HyperLogLog Unique Visitor Counter

Problem

Count unique visitors (IPs, user IDs) without storing all IDs.

Benefits:

  • Fixed memory (16KB for any # of visitors)
  • ~2% error rate
  • Lock-free
  • No storage of actual IPs

Use cases:

  • Unique visitor counting
  • Cardinality estimation
  • Distinct value counting

Algorithm Overview

HyperLogLog (HLL) is a probabilistic algorithm for estimating the cardinality (number of distinct elements) in a dataset. Instead of storing all unique values, it uses statistical properties of hash functions to estimate how many unique items have been observed.

Core Concept:

The fundamental insight behind HyperLogLog is based on the observation that when you hash random values uniformly, the probability of seeing certain bit patterns is predictable. Specifically:

  • The probability of a hash starting with 1 leading zero bit is 1/2
  • The probability of 2 leading zeros is 1/4
  • The probability of k leading zeros is 1/2^k

If you observe a hash with (for example) 10 leading zeros, you can estimate that you've likely seen around 2^10 = 1024 unique values. This is the core intuition: rare events indicate large cardinality.

However, using just one register would be extremely inaccurate (high variance). HyperLogLog solves this by:

  1. Dividing observations into many buckets (registers)
  2. Tracking the maximum leading zeros in each bucket independently
  3. Combining these estimates using harmonic mean to reduce variance

This ensemble approach dramatically improves accuracy while maintaining constant memory usage.


How HyperLogLog Works

Step 1: Hash the Input

Each unique value (IP address, user ID, etc.) is hashed using a uniform hash function. This produces a bit string that appears random and uniformly distributed.

Example: hash("192.168.1.1")0b00101110001101... (64-bit hash)

Step 2: Split Hash into Bucket Index and Value

The hash is divided into two parts:

  • First p bits: Determine which bucket (register) to update
    • With p=14, we get 2^14 = 16,384 buckets
  • Remaining bits: Used to count leading zeros

Example with p=14:

Hash: [14 bits for bucket][50 bits for counting]
      ^^^^^^^^^^^^^^      ^^^^^^^^^^^^^^^^^^
      bucket index        count leading zeros here

Step 3: Count Leading Zeros (ρ function)

For the remaining bits, count how many leading zeros appear before the first 1-bit. This count is called ρ (rho).

Examples:

  • 1101010... → ρ = 0 (no leading zeros)
  • 0001101... → ρ = 3 (three leading zeros)
  • 0000001... → ρ = 6 (six leading zeros)

The value ρ+1 is stored in the register.

Step 4: Track Maximum per Bucket

Each of the m buckets maintains a register that stores the maximum ρ value ever observed for that bucket.

Bucket[i] = max(Bucket[i], ρ + 1)

This is why we only need 6 bits per register: ρ+1 rarely exceeds 63, even with billions of unique values.

Step 5: Estimate Cardinality

To estimate the total number of unique elements, HyperLogLog uses the harmonic mean of 2^M[j] across all buckets:

E = α_m × m² × 1 / Σ(2^(-M[j]))

Where:

  • m = number of buckets (16,384)
  • M[j] = value in bucket j
  • α_m = correction constant (~0.7213 for large m)

The harmonic mean is crucial because it's resistant to outliers (a few buckets with very high values don't skew the estimate).

Why This Works:

The average maximum leading zero count in a bucket is approximately log₂(n/m), where n is the true cardinality. By combining estimates from all buckets using harmonic mean, we get:

  • Individual buckets have high variance
  • Harmonic mean of many buckets reduces variance significantly
  • Result: ~2% standard error with 16KB memory

Key Insights

Memory vs Accuracy Trade-off

HyperLogLog's accuracy depends on the number of buckets (m):

  • Standard error = 1.04 / √m
  • With m = 16,384 (2^14): error ≈ 1.04 / 128 ≈ 0.81% (theoretical)
  • In practice: ~2% error accounting for edge cases

Why 16KB?

The implementation uses:

  • m = 16,384 buckets (2^14)
  • 6 bits per register
  • Total: 16,384 × 6 bits = 98,304 bits ≈ 12KB
  • With overhead: ~16KB total

This configuration provides:

  • Excellent accuracy (~2% error)
  • Reasonable memory footprint
  • Ability to count up to billions of unique values

Space Complexity

Traditional exact counting requires O(n) space where n is the number of unique elements:

  • 1 million unique IPs: ~16 MB (storing 4-byte IPs in a hash set)
  • 1 billion unique IPs: ~16 GB

HyperLogLog requires O(m) space where m is the number of buckets:

  • Any cardinality: 16 KB (fixed)
  • 1 million, 1 billion, or 1 trillion unique values: still 16 KB

The space savings become dramatic as cardinality grows.

Why Harmonic Mean?

The harmonic mean is used instead of arithmetic mean because:

  • Harmonic mean is more resistant to outliers (buckets with unusually high values)
  • It provides a better bias correction for the estimator
  • Arithmetic mean would overestimate cardinality significantly

Consider if one bucket accidentally gets a very high value (say, 50 leading zeros):

  • Arithmetic mean: would be heavily influenced, causing overestimation
  • Harmonic mean: dampens the effect of this outlier, maintaining accuracy

Probability and Statistics

The algorithm leverages the law of large numbers:

  • Each bucket provides a noisy estimate
  • With 16,384 independent estimates, the noise averages out
  • The combined estimate converges to the true cardinality
  • Standard error decreases proportionally to √m

This is why doubling the number of buckets (and memory) only improves accuracy by ~40% (√2), demonstrating diminishing returns.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment