Quick, dirty and slow HashSet based on https://www.youtube.com/watch?v=ncHmEUmJZf4
Created
July 7, 2018 21:42
-
-
Save fcofdez/4682fea311e3bb6342c14067774068f8 to your computer and use it in GitHub Desktop.
Dense hash set
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::hash::Hash; | |
use std::hash::BuildHasher; | |
use std::hash::Hasher; | |
use std::collections::hash_map::RandomState; | |
use std::mem; | |
use std::arch::x86_64::*; | |
use std::fmt::Debug; | |
const LOAD_FACTOR: f32 = 0.75; | |
const INITIAL_GROUPS: usize = 20; | |
#[derive(Clone)] | |
struct Group<T> { | |
metadata: [u8; 16], | |
slots: [T; 16], | |
} | |
impl<T> Default for Group<T> | |
where | |
T: Default + Copy, | |
{ | |
fn default() -> Group<T> { | |
Group { | |
metadata: [0; 16], | |
slots: [Default::default(); 16], | |
} | |
} | |
} | |
pub struct DenseHashSet<V, S = RandomState> { | |
groups: Vec<Group<V>>, | |
hash_builder: S, | |
} | |
fn capacity(desired: usize) -> usize { | |
let mut n = desired - 1; | |
n |= n >> 1; | |
n |= n >> 2; | |
n |= n >> 4; | |
n |= n >> 8; | |
n |= n >> 16; | |
n | |
} | |
fn H1(hash: u64) -> u64 { | |
hash >> 7 | |
} | |
fn H2(hash: u64) -> u64 { | |
hash & 0x7f | |
} | |
impl<K, S> Default for DenseHashSet<K, S> | |
where | |
K: Eq + Hash + Default + Clone + Copy + Debug, | |
S: BuildHasher + Default, | |
{ | |
fn default() -> DenseHashSet<K, S> { | |
DenseHashSet::with_hasher(Default::default()) | |
} | |
} | |
impl<V, S> DenseHashSet<V, S> | |
where | |
V: Hash + Eq + Clone + Default + Copy + Debug, | |
S: BuildHasher, | |
{ | |
fn with_hasher(s: S) -> DenseHashSet<V, S> { | |
DenseHashSet { | |
groups: vec![Default::default(); capacity(INITIAL_GROUPS)], | |
hash_builder: s, | |
} | |
} | |
fn insert(&mut self, elem: V) { | |
let hash = self.make_hash(&elem); | |
let h1: usize = H1(hash) as usize; | |
let group = h1 % self.groups.len(); | |
let h2 = H2(hash) as u8; | |
let x = &mut self.groups[group]; | |
mem::replace(&mut x.slots[h1 % 16], elem); | |
mem::replace(&mut x.metadata[h1 % 16], h2); | |
} | |
fn find(&self, elem: V) -> Option<V> { | |
let hash = self.make_hash(&elem); | |
let h1: usize = H1(hash) as usize; | |
let group = h1 % self.groups.len(); | |
let h2 = H2(hash) as i8; | |
let hh2 = H2(hash) as u8; | |
let x = &self.groups[group]; | |
let mut pos = h1 % 16; | |
let h = unsafe { _mm_set1_epi8(h2) }; | |
let mut h2_match = unsafe { _mm_movemask_epi8(_mm_cmpeq_epi8(h, unsafe { mem::transmute(x.metadata) })) }; | |
let mut idx = 0; | |
loop { | |
if h2_match == 0 { | |
break; | |
} | |
if h2_match & 1 == 1 && x.slots[idx] == elem { | |
return Some(x.slots[idx]); | |
} | |
h2_match = h2_match >> 1; | |
idx += 1; | |
} | |
return None; | |
} | |
pub fn make_hash<T: ?Sized>(&self, t: &T) -> u64 | |
where | |
T: Hash, | |
{ | |
let mut state = self.hash_builder.build_hasher(); | |
t.hash(&mut state); | |
state.finish() | |
} | |
} | |
#[cfg(test)] | |
mod tests { | |
use super::*; | |
#[test] | |
fn it_works() { | |
let mut x: DenseHashSet<i32> = Default::default(); | |
x.insert(12); | |
x.insert(256); | |
assert_eq!(x.find(12), Some(12)); | |
assert_eq!(x.find(256), Some(256)); | |
assert_eq!(x.find(11), None); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment