Last active
April 3, 2020 22:58
-
-
Save matklad/44ba1a5a6168bc0c26c995131c007907 to your computer and use it in GitHub Desktop.
Concurrent Interner for Rust
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::{ | |
cell::RefCell, | |
hash::{BuildHasher, Hash, Hasher}, | |
sync::Arc, | |
}; | |
// hashbrown = "0.7" | |
use hashbrown::{hash_map::DefaultHashBuilder, HashMap}; | |
// parking_lot = "0.10" | |
use parking_lot::{Mutex, RwLock}; | |
#[derive(PartialEq, Eq, Clone, Copy)] | |
pub struct Symbol(u32); | |
pub struct GlobalInterner<K: 'static> { | |
map_arena: Mutex<(HashMap<&'static K, Symbol>, Arena<K>)>, | |
vec: RwLock<Vec<&'static K>>, | |
} | |
pub struct LocalInterner<K: 'static> { | |
global: Arc<GlobalInterner<K>>, | |
map: HashMap<&'static K, Symbol>, | |
vec: RefCell<Vec<Option<&'static K>>>, | |
} | |
impl<K: 'static + Hash + Eq> GlobalInterner<K> { | |
pub fn with_capacity(cap: usize) -> Arc<GlobalInterner<K>> { | |
Arc::new(GlobalInterner { | |
map_arena: Mutex::new((Default::default(), Arena::with_capacity(cap))), | |
vec: Default::default(), | |
}) | |
} | |
pub fn fork(self: &Arc<GlobalInterner<K>>) -> LocalInterner<K> { | |
LocalInterner { | |
global: Arc::clone(self), | |
map: Default::default(), | |
vec: Default::default(), | |
} | |
} | |
pub fn intern(&self, key: K) -> Symbol { | |
self.intern_hashed(hash(&key), key).1 | |
} | |
fn intern_hashed(&self, hash: u64, key: K) -> (&K, Symbol) { | |
let mut map_arena = self.map_arena.lock(); | |
let (map, arena) = &mut *map_arena; | |
let (&mut interned, &mut sym) = map | |
.raw_entry_mut() | |
.from_key_hashed_nocheck(hash, &key) | |
.or_insert_with(|| { | |
let interned = arena.push(key); | |
let interned: &'static K = unsafe { &*interned }; | |
let mut vec = self.vec.write(); | |
let sym = Symbol(vec.len() as u32); | |
vec.push(interned); | |
(interned, sym) | |
}); | |
(interned, sym) | |
} | |
pub fn lookup(&self, symbol: Symbol) -> &K { | |
self.vec.read()[symbol.0 as usize] | |
} | |
} | |
impl<K: Hash + Eq + 'static> LocalInterner<K> { | |
pub fn intern(&mut self, key: K) -> Symbol { | |
let global = &*self.global; | |
let hash = hash(&key); | |
let (&mut interned, &mut sym) = self | |
.map | |
.raw_entry_mut() | |
.from_key_hashed_nocheck(hash, &key) | |
.or_insert_with(|| { | |
let (interned, sym) = global.intern_hashed(hash, key); | |
let interned: &'static K = unsafe { &*(interned as *const _) }; | |
(interned, sym) | |
}); | |
self.record(interned, sym); | |
sym | |
} | |
pub fn lookup(&self, sym: Symbol) -> &K { | |
match self.vec.borrow().get(sym.0 as usize) { | |
Some(Some(interned)) => return interned, | |
_ => (), | |
} | |
let interned = self.global.lookup(sym); | |
let interned: &'static K = unsafe { &*(interned as *const _) }; | |
self.record(interned, sym); | |
interned | |
} | |
fn record(&self, interned: &'static K, sym: Symbol) { | |
let mut vec = self.vec.borrow_mut(); | |
let len = vec.len().max(sym.0 as usize + 1); | |
vec.resize(len, None); | |
vec[sym.0 as usize] = Some(interned); | |
} | |
} | |
struct Arena<K> { | |
buf: Vec<K>, | |
full: Vec<Vec<K>>, | |
} | |
impl<K> Arena<K> { | |
fn with_capacity(cap: usize) -> Arena<K> { | |
let cap = cap.next_power_of_two(); | |
Arena { | |
buf: Vec::with_capacity(cap), | |
full: Vec::new(), | |
} | |
} | |
fn push(&mut self, value: K) -> *const K { | |
if self.buf.len() == self.buf.capacity() { | |
let new_buf = Vec::with_capacity(self.buf.capacity() * 2); | |
let old_buf = std::mem::replace(&mut self.buf, new_buf); | |
self.full.push(old_buf); | |
} | |
self.buf.push(value); | |
self.buf.last().unwrap() | |
} | |
} | |
fn hash<K: Hash>(key: &K) -> u64 { | |
let mut hasher = DefaultHashBuilder::new().build_hasher(); | |
key.hash(&mut hasher); | |
hasher.finish() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment