Skip to content

Instantly share code, notes, and snippets.

@AmatsuZero
Last active December 14, 2018 05:28
Show Gist options
  • Save AmatsuZero/9ebaba1c3c6b4a56b74a38cf64c4fc3c to your computer and use it in GitHub Desktop.
Save AmatsuZero/9ebaba1c3c6b4a56b74a38cf64c4fc3c to your computer and use it in GitHub Desktop.
布隆过滤器的实现
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
const popcnt = (v) => {
v -= (v >> 1) & 0x55555555
v = (v & 0x33333333) + ((v >> 2) & 0x33333333)
return ((v + (v >> 4) & 0xf0f0f0f) * 0x1010101) >> 24
}
// Fowler/Noll/Vo hashing.
const fnv_1a = (v) => {
let a = 2166136261
for (let i = 0, n = v.length; i < n; ++i) {
let c = v.charCodeAt(i),
d = c & 0xff00
if (d) a = fnv_multiply(a ^ d >> 8)
a = fnv_multiply(a ^ c & 0xff)
}
return fnv_mix(a)
}
// One additional iteration of FNV, given a hash.
const fnv_1a_b =(a) => {
return fnv_mix(fnv_multiply(a))
}
// a * 16777619 mod 2**32
const fnv_multiply = (a) => {
return a + (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24)
}
// See https://web.archive.org/web/20131019013225/http://home.comcast.net/~bretm/hash/6.html
const fnv_mix = (a) => {
a += a << 13
a ^= a >>> 7
a += a << 3
a ^= a >>> 17
a += a << 5
return a & 0xffffffff
}
class BloomFilter {
// Creates a new bloom filter. If *m* is an array-like object, with a length
// property, then the bloom filter is loaded with data from the array, where
// each element is a 32-bit integer. Otherwise, *m* should specify the
// number of bits. Note that *m* is rounded up to the nearest multiple of
// 32. *k* specifies the number of hashing functions.
constructor(m, k) {
let a
if (typeof m !== "number") {
a = m
m = a.length * 32
}
let n = Math.ceil(m/32), i = -1
this.m = m = n * 32
this.k = k
if (typeof ArrayBuffer !== "undefined") {
const kBytes = 1 << Math.ceil(Math.log(Math.ceil(Math.log(m)/ Math.LN2 / 8)) / Math.LN2),
array = kBytes === 1 ? Uint8Array : kBytes === 2 ? Uint16Array : Uint32Array,
kBuffer = new ArrayBuffer(kBytes * k),
buckets = this.buckets = new Int32Array(n)
if (a)
while (++i < n)
buckets[i] = a[i]
this._locations = new array(kBuffer)
} else {
const buckets = this.buckets = []
if (a)
while (++i < n)
buckets[i] = a[i]
else
while (++i < n)
buckets[i] = 0
this._locations = []
}
}
locations(v) {
let k = this.k,
m = this.m,
r = this._locations,
a = fnv_1a(v),
b = fnv_1a_b(a),
x = a % m
for (let i = 0; i < k; ++i) {
r[i] = x < 0 ? (x + m) : x
x = (x + b) % m
}
return r
}
test(v) {
const l = this.locations(v + ""),
k = this.k,
buckets = this.buckets
for (let i = 0; i < k; ++i) {
const b = l[i]
if ((buckets[Math.floor(b / 32)] & (1 << (b % 32))) === 0)
return false
}
return true
}
add(v) {
let l = this.locations(v + ""),
k = this.k,
buckets = this.buckets
for (let i = 0; i < k; ++i)
buckets[Math.floor(l[i] / 32)] |= 1 << (l[i] % 32)
}
get size() {
let buckets = this.buckets,
bits = 0
for (let i = 0, n = buckets.length; i < n; ++i) bits += popcnt(buckets[i])
return -this.m * Math.log(1 - bits / this.m) / this.k
}
}
module.exports = BloomFilter
//
// BloomFilter.swift
//
// Created by Jiang,Zhenhua on 2018/12/14.
// Copyright © 2018 Daubert. All rights reserved.
//
import Foundation
struct BloomFilter<E: Hashable> {
private var data: [Bool]
private let seeds: [Int]
init(size: Int, hashCount: Int) {
data = Array(repeating: false, count: size)
seeds = (0..<hashCount)
.map { _ in Int.random(in: 0..<Int.max) }
}
init() {
self.init(size: 512, hashCount: 3)
}
private func hashes(for element: E) -> [Int] {
return seeds.map { seed in
var hasher = Hasher()
hasher.combine(element)
hasher.combine(seed)
return abs(hasher.finalize())
}
}
mutating func insert(_ element: E) {
hashes(for: element)
.forEach { data[$0 % data.count] = true }
}
func contains(_ element: E) -> Bool {
return hashes(for: element)
.allSatisfy { data[$0 % data.count] }
}
var isEmpty: Bool {
return data.allSatisfy { !$0 }
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment