Created
May 2, 2010 21:09
-
-
Save making/387455 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.security.MessageDigest; | |
import java.security.NoSuchAlgorithmException; | |
import java.util.BitSet; | |
import java.util.Random; | |
public class BloomFilter<E> { | |
private BitSet bf; | |
private int bitArraySize; | |
private int numHashFunc; | |
public BloomFilter(int bitArraySize, int numHashFunc) { | |
this.bitArraySize = bitArraySize; | |
this.numHashFunc = numHashFunc; | |
bf = new BitSet(bitArraySize); | |
} | |
public BloomFilter() { | |
this(100000000, 6); | |
} | |
public void add(E obj) { | |
int[] indexes = getHashIndexes(obj); | |
for (int index : indexes) { | |
bf.set(index); | |
} | |
} | |
public boolean contains(E obj) { | |
int[] indexes = getHashIndexes(obj); | |
for (int index : indexes) { | |
if (bf.get(index) == false) { | |
return false; | |
} | |
} | |
return true; | |
} | |
public void union(BloomFilter<E> other) { | |
bf.or(other.bf); | |
} | |
protected int[] getHashIndexes(E obj) { | |
int[] indexes = new int[numHashFunc]; | |
long seed = 0; | |
byte[] digest; | |
try { | |
MessageDigest md = MessageDigest.getInstance("MD5"); | |
md.update(obj.toString().getBytes()); | |
digest = md.digest(); | |
for (int i = 0; i < 6; i++) { | |
seed = seed | (((long) digest[i] & 0xFF)) << (8 * i); | |
} | |
} catch (NoSuchAlgorithmException e) { | |
} | |
Random gen = new Random(seed); | |
for (int i = 0; i < numHashFunc; i++) { | |
indexes[i] = gen.nextInt(bitArraySize); | |
} | |
return indexes; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment