Skip to content

Instantly share code, notes, and snippets.

@kharmabum
Created July 8, 2015 00:08
Show Gist options
  • Save kharmabum/72578acd7b6071541791 to your computer and use it in GitHub Desktop.
Save kharmabum/72578acd7b6071541791 to your computer and use it in GitHub Desktop.
/*
* Copyright 2013 bits of proof zrt.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bitsofproof.supernode.common;
import java.security.SecureRandom;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.bouncycastle.util.Arrays;
import com.bitsofproof.supernode.api.Address;
/**
* A bloom filter
*/
public class BloomFilter
{
public static enum UpdateMode
{
none, all, keys
}
private final byte[] filter;
private final long hashFunctions;
private final long tweak;
private final UpdateMode update;
private static final int MAX_FILTER_SIZE = 36000;
private static final int MAX_HASH_FUNCS = 50;
/**
* create a bloom filter of optimal size and hash functions for
*
* @param n
* - number of data stored
* @param falsePositiveProbability
* - target false positive probability less than this
* @param update
* - a flag if bloom filter should be updated with transactions it matches.
* @return a new empty bloom filter
*/
public static BloomFilter createOptimalFilter (int n, double falsePositiveProbability, UpdateMode update)
{
return createOptimalFilter (n, falsePositiveProbability, Math.abs (new SecureRandom ().nextInt ()), update);
}
/**
* create a bloom filter of optimal size and hash functions for
*
* @param n
* - number of data stored
* @param falsePositiveProbability
* - target false positive probability less than this
* @param tweak
* - a tweak to obfuscate what information the filter was built on
* @param update
* - a flag if bloom filter should be updated with transactions it matches.
* @return a new empty bloom filter
*/
public static BloomFilter createOptimalFilter (int n, double falsePositiveProbability, long tweak, UpdateMode update)
{
// http://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives
double ln2 = Math.log (2.0);
int mod = Math.max (1, (int) Math.min ((-n * Math.log (falsePositiveProbability) / (ln2 * ln2)) / 8.0, MAX_FILTER_SIZE));
long hashFunctions = Math.max (1, Math.min ((int) (mod * 8.0 / n * ln2), MAX_HASH_FUNCS));
return new BloomFilter (new byte[mod], hashFunctions, tweak, update);
}
/**
* Get false positive probability for given filter if fed with n input data.
*
* @param n
* @return
*/
public double getFalsePositiveProbability (int n)
{
return Math.pow (1 - Math.pow (Math.E, -1.0 * (hashFunctions * n) / (filter.length * 8)), hashFunctions);
}
/**
* Create a bloom filter
*
* @param data
* filter content
* @param hashFunctions
* - number of hash functions to use
* @param tweak
* - obfuscation parameter
* @param update
* - a flag if transactions identified by the filter should update the filter
*/
public BloomFilter (byte[] data, long hashFunctions, long tweak, UpdateMode update)
{
filter = Arrays.clone (data);
this.hashFunctions = Math.min (hashFunctions, MAX_HASH_FUNCS);
this.tweak = tweak;
this.update = update;
}
/**
* Serialization of a transaction output, suitable to be included into a bloom filter
*
* @param hash
* @param ix
* @return
*/
public static byte[] serializedOutpoint (String hash, long ix)
{
WireFormat.Writer writer = new WireFormat.Writer ();
writer.writeHash (new Hash (hash));
writer.writeUint32 (ix);
return writer.toByteArray ();
}
/**
* Serialize and add a transaction output to a filter
*
* @param hash
* - transaction hash
* @param ix
* - output order number
* @return
*/
public int addOutpoint (String hash, long ix)
{
byte[] point = serializedOutpoint (hash, ix);
add (point);
return Arrays.hashCode (point);
}
/**
* Check if the filter contains a transaction output with positive probability
*
* @param hash
* @param ix
* @return
*/
public boolean containsOutpoint (String hash, long ix)
{
return contains (serializedOutpoint (hash, ix));
}
/**
* Check if the filter contains an address with positive probability
*
* @param address
* @param addressFlag
* @return
* @throws ValidationException
*/
public boolean containsAddress (String address, int addressFlag) throws ValidationException
{
return contains (Address.fromSatoshiStyle (address, addressFlag));
}
/**
* explicitelly set a filter bit
*
* @param n
*/
private void setBit (int n)
{
filter[n >>> 3] |= 1 << (7 & n);
}
/**
* test if a bit is set in the filter mask
*
* @param n
* @return
*/
private boolean testBit (int n)
{
return (filter[n >>> 3] & 1 << (7 & n)) != 0;
}
/**
* add some data to the filter.
*
* @param data
* @return
*/
public int add (byte[] data)
{
for ( int i = 0; i < hashFunctions; ++i )
{
setBit ((int) ((murmurhash3bit (i, data, tweak) & 0xFFFFFFFFL) % (filter.length * 8)));
}
return Arrays.hashCode (data);
}
/**
* Ask if a data was added to the filter. The filter might give false positive answers, but will not confirm existence of data that was not added.
*
* @param data
* @return
*/
public boolean contains (byte[] data)
{
for ( int i = 0; i < hashFunctions; ++i )
{
if ( !testBit ((int) ((murmurhash3bit (i, data, tweak) & 0xFFFFFFFFL) % (filter.length * 8))) )
{
return false;
}
}
return true;
}
/**
* Precompute hashes used to set mask bits for a data.
*
* @param data
* @param tweak
* @return
*/
public static List<Integer> precomputeHashes (byte[] data, long tweak)
{
ArrayList<Integer> list = new ArrayList<Integer> (MAX_HASH_FUNCS);
for ( int i = 0; i < MAX_HASH_FUNCS; ++i )
{
list.add (murmurhash3bit (i, data, tweak));
}
return list;
}
/**
* check if the filter contains data with precomputed hashes.
*
* @param hashes
* @return
*/
public boolean contains (List<Integer> hashes)
{
Iterator<Integer> ni = hashes.iterator ();
for ( int i = 0; i < hashFunctions; ++i )
{
if ( !testBit ((int) ((ni.next () & 0xFFFFFFFFL) % (filter.length * 8))) )
{
return false;
}
}
return true;
}
private static int murmurhash3bit (int hashNum, byte[] data, long tweak)
{
return murmurhash3 (data, 0, data.length, (int) (hashNum * 0xFBA4C795L + tweak));
}
/**
* write filter to wire
*
* @param writer
*/
public void toWire (WireFormat.Writer writer)
{
writer.writeVarBytes (filter);
writer.writeUint32 (hashFunctions);
writer.writeUint32 (tweak);
writer.writeByte (update.ordinal ());
}
/**
* reconstruct filter from wire format
*
* @param reader
* @return
*/
public static BloomFilter fromWire (WireFormat.Reader reader)
{
byte[] data = reader.readVarBytes ();
long hashFunctions = reader.readUint32 ();
long tweak = reader.readUint32 ();
int update = reader.readByte ();
return new BloomFilter (data, hashFunctions, tweak, UpdateMode.values ()[update]);
}
/*
* This code is public domain.
*
* The MurmurHash3 algorithm was created by Austin Appleby and put into the public domain. See http://code.google.com/p/smhasher/
*
* This java port was authored by Yonik Seeley and was placed into the public domain per
* https://github.com/yonik/java_util/blob/master/src/util/hash/MurmurHash3.java.
*/
private static int murmurhash3 (byte[] data, int offset, int len, int seed)
{
int c1 = 0xcc9e2d51;
int c2 = 0x1b873593;
int h1 = seed;
int roundedEnd = offset + (len & 0xfffffffc); // round down to 4 byte block
for ( int i = offset; i < roundedEnd; i += 4 )
{
// little endian load order
int k1 = (data[i] & 0xff) | ((data[i + 1] & 0xff) << 8) | ((data[i + 2] & 0xff) << 16) | (data[i + 3] << 24);
k1 *= c1;
k1 = (k1 << 15) | (k1 >>> 17); // ROTL32(k1,15);
k1 *= c2;
h1 ^= k1;
h1 = (h1 << 13) | (h1 >>> 19); // ROTL32(h1,13);
h1 = h1 * 5 + 0xe6546b64;
}
// tail
int k1 = 0;
switch ( len & 0x03 )
{
case 3:
k1 = (data[roundedEnd + 2] & 0xff) << 16;
// fallthrough
case 2:
k1 |= (data[roundedEnd + 1] & 0xff) << 8;
// fallthrough
case 1:
k1 |= data[roundedEnd] & 0xff;
k1 *= c1;
k1 = (k1 << 15) | (k1 >>> 17); // ROTL32(k1,15);
k1 *= c2;
h1 ^= k1;
default:
}
// finalization
h1 ^= len;
// fmix(h1);
h1 ^= h1 >>> 16;
h1 *= 0x85ebca6b;
h1 ^= h1 >>> 13;
h1 *= 0xc2b2ae35;
h1 ^= h1 >>> 16;
return h1;
}
public byte[] getFilter ()
{
return filter.clone ();
}
/**
* get the number of hash functions used
*
* @return
*/
public long getHashFunctions ()
{
return hashFunctions;
}
/**
* get obfuscation parameter
*
* @return
*/
public long getTweak ()
{
return tweak;
}
/**
* get filter update mode
*
* @return
*/
public UpdateMode getUpdateMode ()
{
return update;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment