Skip to content

Instantly share code, notes, and snippets.

@warmist
Created June 29, 2012 17:17
Show Gist options
  • Save warmist/3019332 to your computer and use it in GitHub Desktop.
Save warmist/3019332 to your computer and use it in GitHub Desktop.
Bloom implementacija
#include <vector>
#include <iostream>
#include <fstream>
using namespace std;
vector<bool> bloom;
const unsigned int bloomsize=1024;
//-----------------------------------------------------------------------------
// MurmurHash2, by Austin Appleby
// Note - This code makes a few assumptions about how your machine behaves -
// 1. We can read a 4-byte value from any address without crashing
// 2. sizeof(int) == 4
// And it has a few limitations -
// 1. It will not work incrementally.
// 2. It will not produce the same results on little-endian and big-endian
// machines.
struct StringHash
{
unsigned int v1,v2,v3;
StringHash(const string &a)
{
/*v1=sumhash(a,1337)%bloomsize;
v2=sumhash(a,13)%bloomsize;
v3=sumhash(a,109321)%bloomsize;*/
//*
v1=Hash1(a,1337)%bloomsize;
v2=Hash1(a,13)%bloomsize;
v3=Hash1(a,109321)%bloomsize;
//*/
}
unsigned int sumhash(const string &a,int seed)
{
unsigned int sum=seed;
for(size_t i=0;i<a.size();i++)
{
sum+= (seed^ a[i])*seed+ (! a[i]);
}
return sum;
}
unsigned int Hash1(const string& a,int seed)
{
string b=a;
if(b.size()%4!=0)
b.append(" ",4-b.size()%4);
return MurmurHash2(b.c_str(),b.size(),seed);
}
unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
const unsigned int m = 0x5bd1e995;
const int r = 24;
// Initialize the hash to a 'random' value
unsigned int h = seed ^ len;
// Mix 4 bytes at a time into the hash
const unsigned char * data = (const unsigned char *)key;
while(len >= 4)
{
unsigned int k = *(unsigned int *)data;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
// Handle the last few bytes of the input array
switch(len)
{
case 3:
h ^= data[2] << 16;
case 2:
h ^= data[1] << 8;
case 1:
h ^= data[0];
h *= m;
};
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
bool Contains(vector<bool>& vec)
{
return vec[v1] && vec[v2] && vec[v3];
}
bool Put(vector<bool>&vec)
{
vec[v1]=true;
vec[v2]=true;
vec[v3]=true;
}
};
int main()
{
bloom.resize(bloomsize,false);
fstream input("bloom-words.txt",ios::in);
while(!input.eof())
{
string in;
input>>in;
StringHash H(in);
H.Put(bloom);
//cout<<in<<" :"<<H.v1<<" "<<H.v2<<" "<<H.v3<<endl;
}
fstream input2("bloom-test.txt");
while(!input2.eof())
{
string in;
input2>>in;
StringHash H(in);
cout<<in<<" is in:"<<H.Contains(bloom)<<endl;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment