Created
August 11, 2012 18:59
-
-
Save dgabriele/3326413 to your computer and use it in GitHub Desktop.
lock-free Bloom filter with quiescent consistency
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <atomic> | |
#include <vector> | |
#include <cstdio> | |
#include <cstdlib> | |
#include <cmath> | |
#include <stdint.h> | |
#include <limits.h> | |
class bloom_filter | |
{ | |
typedef std::atomic_uchar atomic_byte; | |
static inline unsigned H1(const unsigned x) { | |
return (1583458089U * x); | |
} | |
static inline unsigned H2(const unsigned x) { | |
return (784588716U * x); | |
} | |
inline unsigned HASH(unsigned h1, unsigned h2, unsigned i) { | |
return ((h1 + i * h2 + ((i*(i-1)) >> 1)) % m_m); | |
} | |
const unsigned m_cap; | |
const unsigned m_m; | |
const unsigned m_k; | |
const unsigned m_bpe; | |
const unsigned m_len; | |
atomic_byte* m_vec; | |
public: | |
explicit bloom_filter(unsigned cap, unsigned bpe) | |
: m_cap(1 << (unsigned)ceil(log(cap)/log(2))), | |
m_m (m_cap * bpe), | |
m_k ((unsigned)ceil((0.693f * bpe))), | |
m_bpe (bpe), | |
m_len ((unsigned)ceil((cap * bpe) / 8.f)), | |
m_vec (new atomic_byte[m_len]) | |
{ | |
} | |
~bloom_filter() | |
{ | |
delete[] m_vec; | |
} | |
bool contains(unsigned x) | |
{ | |
unsigned h1 = H1(x); | |
unsigned h2 = H2(x); | |
for (unsigned t = 0; t < m_k; ++t) | |
{ | |
unsigned addr = HASH(h1, h2, t+1); | |
unsigned index = addr >> 3; | |
unsigned offset = 1 << (addr & 7); | |
if ( ! (m_vec[index] & offset) ) | |
return (false); | |
} | |
return (true); | |
} | |
void insert(unsigned x) | |
{ | |
unsigned h1 = H1(x); | |
unsigned h2 = H2(x); | |
for (unsigned t = 0; t < m_k; ++t) | |
{ | |
unsigned addr = HASH(h1, h2, t+1); | |
unsigned index = addr >> 3; | |
unsigned char offset = 1 << (addr & 7); | |
unsigned char byte = m_vec[index]; | |
do { | |
if ( ! (byte & offset) ) { | |
if (!m_vec[index].compare_exchange_weak(byte, byte|offset)) { | |
byte = m_vec[index]; | |
continue; | |
} | |
} break; | |
} while (true); | |
} | |
} | |
}; | |
int main() | |
{ | |
unsigned fp = 0; // false positive count | |
unsigned cap = 1 << 15; // bloom filter capacity until fp is suboptimal | |
unsigned bpe = 6; // bits per element | |
bloom_filter bf(cap, bpe); | |
for (unsigned i=1; i<cap; ++i) | |
bf.insert(i); | |
// query the next `cap' elements, counting the | |
// number of positives. these are false positives. | |
for (unsigned i=cap; i<(cap << 1)-1; ++i) | |
fp += bf.contains(i); | |
std::cout << "Pr[false positive] = " << ((float)fp / cap) << std::endl; | |
return (0); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment