Skip to content

Instantly share code, notes, and snippets.

@muminoff
Created February 6, 2025 01:28
Show Gist options
  • Save muminoff/8db643031ec97685fbde8d991f8e9672 to your computer and use it in GitHub Desktop.
Save muminoff/8db643031ec97685fbde8d991f8e9672 to your computer and use it in GitHub Desktop.
HyperLogLog Rust Implementation
use std::hash::{Hash, Hasher};
use std::collections::hash_map::DefaultHasher;
struct HyperLogLog {
registers: Vec<u8>, // Vector to store the maximum number of leading zeros seen.
m: usize, // Number of registers (m = 2^b).
b: u8, // Number of bits used for indexing.
}
impl HyperLogLog {
fn new(b: u8) -> Self {
let m = 1 << b; // m = 2^b
HyperLogLog {
registers: vec![0; m],
m,
b,
}
}
// Add an element to the HyperLogLog structure.
fn add(&mut self, value: &str) {
let hash = Self::hash(value); // Hash the input value.
let index = (hash >> (32 - self.b)) as usize; // Use the first `b` bits as the index.
// Safely compute the mask to extract the remaining bits.
let mask = if self.b < 32 {
(1_u32 << (32 - self.b)).wrapping_sub(1)
} else {
0 // If b == 32, there are no remaining bits.
};
let w = hash & mask; // Mask to extract the remaining bits.
let p = w.leading_zeros() as u8 + 1; // Count leading zeros in the remaining bits.
// Update the register if this value has more leading zeros than previously seen.
self.registers[index] = self.registers[index].max(p);
}
// Estimate the number of unique elements.
fn estimate(&self) -> f64 {
let mut z = 0.0;
for &reg in &self.registers {
z += 1.0 / (1u64 << reg) as f64; // Use 64-bit shift to prevent overflow
}
let alpha = if self.m == 16 {
0.673
} else if self.m == 32 {
0.697
} else if self.m == 64 {
0.709
} else {
0.7213 / (1.0 + 1.079 / self.m as f64)
};
let estimate = alpha * self.m as f64 * self.m as f64 / z;
// Small range correction
if estimate <= 2.5 * self.m as f64 {
let zeros = self.registers.iter().filter(|&&x| x == 0).count();
if zeros != 0 {
return self.m as f64 * (self.m as f64 / zeros as f64).ln();
}
}
estimate
}
// Simple hash function (for demonstration purposes only).
fn hash(value: &str) -> u32 {
let mut hasher = DefaultHasher::new();
value.hash(&mut hasher); // Hash the string.
hasher.finish() as u32 // Return the hash as a 32-bit integer.
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment