Skip to content

Instantly share code, notes, and snippets.

@adoublef
Last active December 28, 2024 20:58
Show Gist options
  • Save adoublef/266de2f888702be96fdcdda67cfdaddc to your computer and use it in GitHub Desktop.
Save adoublef/266de2f888702be96fdcdda67cfdaddc to your computer and use it in GitHub Desktop.
Data Structures
// Copyright 2024 Kristopher Rahim Afful-Brown. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package probabilistic
type Hasher interface {
Hash(p []byte) uint64
}
type HashFunc func(p []byte) uint64
func (hf HashFunc) Hash(p []byte) uint64 {
return hf(p)
}
type Bloom struct {
m uint32 // m size of bitset
k uint32 // k number of sets
h Hasher
set []uint8
}
func (b *Bloom) Set(key string) {
h := b.h.Hash([]byte(key))
u := uint32(h & 0xffffffff)
l := uint32((h >> 32) & 0xffffffff)
for i := range b.k {
h := uint32((uint64(l+u*i) * uint64(b.m)) >> 32)
pos := (i*b.m + (h)) >> 3
j := h & 7
b.set[pos] |= (uint8(1) << j)
}
}
func (b *Bloom) Has(v string) bool {
h := b.h.Hash([]byte(v))
u := uint32(h & 0xffffffff)
l := uint32((h >> 32) & 0xffffffff)
for i := range b.k {
h := uint32((uint64(l+u*i) * uint64(b.m)) >> 32)
pos := (i*b.m + (h)) >> 3
j := h & 7
if (b.set[pos] & (uint8(1) << j)) == 0 {
return false
}
}
return true
}
// Copyright 2024 Kristopher Rahim Afful-Brown. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package consistenthash
type Hasher interface {
Hash(p []byte) uint64
}
type HashFunc func(p []byte) uint64
func (hf HashFunc) Hash(p []byte) uint64 {
return hf(p)
}
type Map struct {
k int // k number of sets
h Hasher
keys []int // sorted
set map[int]string
}
// Add adds some keys to the hash.
func (m *Map) Add(keys ...string) {
for _, key := range keys {
for i := 0; i < m.k; i++ {
h := int(m.h.Hash([]byte((itoa(i) + key))))
j := len(m.keys)
m.keys = append(m.keys, h)
for j > 0 && m.keys[j-1] > h {
m.keys[j] = m.keys[j-1]
j--
}
m.keys[j] = h
m.set[h] = key
}
}
}
// Get gets the closest item in the hash to the provided key.
func (m *Map) Get(key string) string {
if m.IsEmpty() {
return ""
}
h := int(m.h.Hash([]byte(key)))
idx, _ := search(m.keys, h)
if idx == len(m.keys) {
idx = 0
}
return m.set[m.keys[idx]]
}
// Returns true if there are no items available.
func (m *Map) IsEmpty() bool {
return len(m.keys) == 0
}
func New(replicas int, h Hasher) *Map {
if replicas < 1 {
replicas = 50
}
if h == nil {
panic("hasher cannot be nil")
}
return &Map{k: replicas, h: h, set: make(map[int]string)}
}
func itoa(n int) string {
var buf [20]byte // max uint64 length
i := len(buf)
// Process two digits at a time
for n >= 100 {
q := n / 100
r := n - q*100 // remainder
d1 := r / 10 // first digit
d2 := r - d1*10 // second digit
i -= 2
buf[i] = '0' + byte(d1)
buf[i+1] = '0' + byte(d2)
n = q
}
// Handle last 1-2 digits
if n >= 10 {
d1 := n / 10
d2 := n - d1*10
i -= 2
buf[i] = '0' + byte(d1)
buf[i+1] = '0' + byte(d2)
} else {
i--
buf[i] = '0' + byte(n)
}
return string(buf[i:])
}
func search(x []int, target int) (int, bool) {
// Inlining is faster than calling BinarySearchFunc with a lambda.
n := len(x)
// Define x[-1] < target and x[n] >= target.
// Invariant: x[i-1] < target, x[j] >= target.
i, j := 0, n
for i < j {
h := int(uint(i+j) >> 1) // avoid overflow when computing h
// i ≤ h < j
// (isNaN(x) && !isNaN(y)) || x < y
if (x[h] != x[h]) && (target != target) || x[h] < target {
i = h + 1 // preserves x[i-1] < target
} else {
j = h // preserves x[j] >= target
}
}
return i, i < n && (x[i] == target || (x[i] != x[i]) && (target != target))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment