Skip to content

Instantly share code, notes, and snippets.

@AnthonyMikh
Last active January 13, 2026 18:37
Show Gist options
  • Select an option

  • Save AnthonyMikh/5f6dd08e0b1e88929f9cb4187e7f91eb to your computer and use it in GitHub Desktop.

Select an option

Save AnthonyMikh/5f6dd08e0b1e88929f9cb4187e7f91eb to your computer and use it in GitHub Desktop.
bumpalo-based thin string interner
use bumpalo::Bump;
use std::borrow::Borrow;
use std::cell::RefCell;
use std::collections::HashSet;
use std::fmt::{self, Debug, Display};
use std::hash::{self, Hash};
use std::marker::PhantomData;
use std::ptr::NonNull;
const USIZE_SIZE: usize = std::mem::size_of::<usize>();
const USIZE_ALIGN: usize = std::mem::align_of::<usize>();
#[derive(Clone, Copy, PartialEq, Eq)]
pub struct InternedStr<'a> {
ptr: NonNull<u8>,
_lt: PhantomData<&'a ()>,
}
impl<'a> InternedStr<'a> {
unsafe fn from_ptr(ptr: NonNull<u8>) -> Self {
Self {
ptr,
_lt: PhantomData,
}
}
pub fn as_str(self) -> &'a str {
let len = unsafe { self.ptr.cast::<usize>().read() };
let ptr = unsafe { self.ptr.add(USIZE_SIZE) };
unsafe { str::from_utf8_unchecked(std::slice::from_raw_parts(ptr.as_ptr(), len)) }
}
pub fn ptr_eq(self, other: InternedStr<'_>) -> bool {
self.ptr == other.ptr
}
unsafe fn reborrow_unchecked<'b>(self) -> InternedStr<'b> {
InternedStr {
ptr: self.ptr,
_lt: PhantomData,
}
}
pub fn reborrow<'b>(self) -> Self
where
'a: 'b,
{
Self {
ptr: self.ptr,
_lt: PhantomData,
}
}
}
impl Borrow<str> for InternedStr<'_> {
fn borrow(&self) -> &str {
self.as_str()
}
}
impl Debug for InternedStr<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Debug::fmt(self.as_str(), f)
}
}
impl Display for InternedStr<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Display::fmt(self.as_str(), f)
}
}
impl Hash for InternedStr<'_> {
fn hash<H: hash::Hasher>(&self, state: &mut H) {
self.as_str().hash(state)
}
}
#[derive(Default)]
pub struct Interner<S = std::collections::hash_map::RandomState> {
strs: RefCell<HashSet<InternedStr<'static>, S>>,
mem: Bump<{ USIZE_ALIGN }>,
}
impl Interner {
pub fn new() -> Self {
Self::default()
}
pub fn alloc(&self, s: &str) -> InternedStr<'_> {
let mut strs = self.strs.borrow_mut();
if let Some(&interned) = strs.get(s) {
return interned;
}
let len = s.len();
let ptr = {
let alloc_len = len.checked_add(USIZE_SIZE).expect("length overflow");
let layout = std::alloc::Layout::array::<u8>(alloc_len)
.unwrap()
.align_to(USIZE_ALIGN)
.expect("invalid layout");
self.mem.alloc_layout(layout)
};
let s = unsafe {
ptr.cast::<usize>().write(len);
ptr.byte_add(USIZE_SIZE)
.as_ptr()
.copy_from_nonoverlapping(s.as_ptr(), len);
InternedStr::from_ptr(ptr)
};
strs.insert(unsafe { s.reborrow_unchecked() });
s
}
pub fn clear(&mut self) {
self.strs.get_mut().clear();
self.mem.reset();
}
}
#[test]
fn smoke() {
let mut i = Interner::new();
let h = i.alloc("hello");
let w = i.alloc("world");
let s = format!("{h}, {w}!");
assert_eq!(s, "hello, world!");
i.clear();
let a = i.alloc("hey");
let b = i.alloc("hey");
assert!(a.ptr_eq(b));
assert_eq!(a.as_str(), b.as_str());
i.clear();
let a = i.alloc("hey");
let b = i.alloc(a.as_str());
assert!(a.ptr_eq(b));
drop(i);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment