Last active
December 14, 2018 05:28
-
-
Save AmatsuZero/9ebaba1c3c6b4a56b74a38cf64c4fc3c to your computer and use it in GitHub Desktop.
布隆过滤器的实现
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel | |
const popcnt = (v) => { | |
v -= (v >> 1) & 0x55555555 | |
v = (v & 0x33333333) + ((v >> 2) & 0x33333333) | |
return ((v + (v >> 4) & 0xf0f0f0f) * 0x1010101) >> 24 | |
} | |
// Fowler/Noll/Vo hashing. | |
const fnv_1a = (v) => { | |
let a = 2166136261 | |
for (let i = 0, n = v.length; i < n; ++i) { | |
let c = v.charCodeAt(i), | |
d = c & 0xff00 | |
if (d) a = fnv_multiply(a ^ d >> 8) | |
a = fnv_multiply(a ^ c & 0xff) | |
} | |
return fnv_mix(a) | |
} | |
// One additional iteration of FNV, given a hash. | |
const fnv_1a_b =(a) => { | |
return fnv_mix(fnv_multiply(a)) | |
} | |
// a * 16777619 mod 2**32 | |
const fnv_multiply = (a) => { | |
return a + (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24) | |
} | |
// See https://web.archive.org/web/20131019013225/http://home.comcast.net/~bretm/hash/6.html | |
const fnv_mix = (a) => { | |
a += a << 13 | |
a ^= a >>> 7 | |
a += a << 3 | |
a ^= a >>> 17 | |
a += a << 5 | |
return a & 0xffffffff | |
} | |
class BloomFilter { | |
// Creates a new bloom filter. If *m* is an array-like object, with a length | |
// property, then the bloom filter is loaded with data from the array, where | |
// each element is a 32-bit integer. Otherwise, *m* should specify the | |
// number of bits. Note that *m* is rounded up to the nearest multiple of | |
// 32. *k* specifies the number of hashing functions. | |
constructor(m, k) { | |
let a | |
if (typeof m !== "number") { | |
a = m | |
m = a.length * 32 | |
} | |
let n = Math.ceil(m/32), i = -1 | |
this.m = m = n * 32 | |
this.k = k | |
if (typeof ArrayBuffer !== "undefined") { | |
const kBytes = 1 << Math.ceil(Math.log(Math.ceil(Math.log(m)/ Math.LN2 / 8)) / Math.LN2), | |
array = kBytes === 1 ? Uint8Array : kBytes === 2 ? Uint16Array : Uint32Array, | |
kBuffer = new ArrayBuffer(kBytes * k), | |
buckets = this.buckets = new Int32Array(n) | |
if (a) | |
while (++i < n) | |
buckets[i] = a[i] | |
this._locations = new array(kBuffer) | |
} else { | |
const buckets = this.buckets = [] | |
if (a) | |
while (++i < n) | |
buckets[i] = a[i] | |
else | |
while (++i < n) | |
buckets[i] = 0 | |
this._locations = [] | |
} | |
} | |
locations(v) { | |
let k = this.k, | |
m = this.m, | |
r = this._locations, | |
a = fnv_1a(v), | |
b = fnv_1a_b(a), | |
x = a % m | |
for (let i = 0; i < k; ++i) { | |
r[i] = x < 0 ? (x + m) : x | |
x = (x + b) % m | |
} | |
return r | |
} | |
test(v) { | |
const l = this.locations(v + ""), | |
k = this.k, | |
buckets = this.buckets | |
for (let i = 0; i < k; ++i) { | |
const b = l[i] | |
if ((buckets[Math.floor(b / 32)] & (1 << (b % 32))) === 0) | |
return false | |
} | |
return true | |
} | |
add(v) { | |
let l = this.locations(v + ""), | |
k = this.k, | |
buckets = this.buckets | |
for (let i = 0; i < k; ++i) | |
buckets[Math.floor(l[i] / 32)] |= 1 << (l[i] % 32) | |
} | |
get size() { | |
let buckets = this.buckets, | |
bits = 0 | |
for (let i = 0, n = buckets.length; i < n; ++i) bits += popcnt(buckets[i]) | |
return -this.m * Math.log(1 - bits / this.m) / this.k | |
} | |
} | |
module.exports = BloomFilter |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// BloomFilter.swift | |
// | |
// Created by Jiang,Zhenhua on 2018/12/14. | |
// Copyright © 2018 Daubert. All rights reserved. | |
// | |
import Foundation | |
struct BloomFilter<E: Hashable> { | |
private var data: [Bool] | |
private let seeds: [Int] | |
init(size: Int, hashCount: Int) { | |
data = Array(repeating: false, count: size) | |
seeds = (0..<hashCount) | |
.map { _ in Int.random(in: 0..<Int.max) } | |
} | |
init() { | |
self.init(size: 512, hashCount: 3) | |
} | |
private func hashes(for element: E) -> [Int] { | |
return seeds.map { seed in | |
var hasher = Hasher() | |
hasher.combine(element) | |
hasher.combine(seed) | |
return abs(hasher.finalize()) | |
} | |
} | |
mutating func insert(_ element: E) { | |
hashes(for: element) | |
.forEach { data[$0 % data.count] = true } | |
} | |
func contains(_ element: E) -> Bool { | |
return hashes(for: element) | |
.allSatisfy { data[$0 % data.count] } | |
} | |
var isEmpty: Bool { | |
return data.allSatisfy { !$0 } | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment