Last active
December 26, 2024 01:46
-
-
Save eddiebergman/c4ade7f79b15a438f744b4a19ad5595a to your computer and use it in GitHub Desktop.
SparseSet: O(1) insert/delete/lookup with contiguous memory for efficient GPU buffer transfer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* SparseSet is a data structure that most importantly keeps all its stored | |
* items in contiguous memory, while still allowing for O(1) insertions, | |
* deletions, and lookups. These are usually objects you'd like to keep | |
* a reference for later O(1) lookup and mutation. You may also think | |
* of it as a `HashMap<usize, T>` but contiguous memory holding Vec<T> | |
* | |
* The contiguous part is particularly useful to be able to map them | |
* into a GPU buffer, as the data transfer can only specify full blocks | |
* of memory. This is why this structure **must** own the data it stores. | |
* | |
* For comparison, doing so directly with just a Vec would mean | |
* other objects can not hold references or pointers to the items in the Vec, | |
* as they would be move around in memory once the Vec is resized. | |
* The SparseSet has one layer of indirection to avoid this issue. | |
* | |
* A HashMap would also not be suitable, as it would not keep the items in | |
* contiguous memory. | |
* | |
* The key downside is really that it's more memory innefficient due to | |
* storing two index arrays, one of which is the size of your `data` vector, | |
* the other one going up to the highest index you care to use. Also, if you | |
* do not plan to delete any data regularly, the extra indirection is needless. | |
* | |
* ``` | |
* let sparseset = SparseSet::new(); | |
* | |
* // This will cause the `sparse` vector to grow to a size of 2_000 | |
* // even though there is only one item. | |
* sparseset.insert(2_000, "hello"); | |
* ``` | |
* | |
* As a result, this is best used with incremental indices, especially if you | |
* can recycle the indices. | |
* If sticking with a low number of items but you update/remove/add frequently, | |
* you may call `SparseSet<T, u8>::with_capacity(255)` which has a fixed | |
* memory cost over just a straight `Vec<T>` of: | |
* | |
* * `sizeof(U)*2*255` where for `u8` its `510` bytes, `u16` is `1020` | |
* and `u32` is `2040`. | |
* | |
* This stabalizes the memory allocation, preventing data being moved. | |
* | |
* This implementation is inspired by the bevy implementation of a sparse set: | |
* * https://github.com/bevyengine/bevy/blob/f9c8f511fd50d5d2321464ef5889d33ae8eba72f/crates/bevy_ecs/src/storage/sparse_set.rs#L375 | |
* | |
* Main changes are to not use the `nonmax` dependancy and just use `0` as | |
* sentinal value directly to say that a given slot is empty. | |
* | |
* The `dirty` flag is not required, it's simply there to flag when the data | |
* may need to be updated in the GPU buffer. | |
*/ | |
// This stuff about `Unsigned` is to allow for the use of u8, u16, u32, usize | |
// as the elements of the index buffers. | |
// `Default` just makes accessing 0 for the type much easier | |
pub trait Unsigned: Copy + TryFrom<usize> + Default { | |
fn as_usize(self) -> usize; | |
} | |
// u8,u16,usize support the .into::<usize>(), however u32 does not, despite `1000u32 as usize` | |
// being valid. Hence, we just make new methods... | |
impl Unsigned for usize { | |
#[inline] | |
fn as_usize(self) -> usize { | |
self | |
} | |
} | |
impl Unsigned for u8 { | |
#[inline] | |
fn as_usize(self) -> usize { | |
self as usize | |
} | |
} | |
impl Unsigned for u16 { | |
#[inline] | |
fn as_usize(self) -> usize { | |
self as usize | |
} | |
} | |
impl Unsigned for u32 { | |
#[inline] | |
fn as_usize(self) -> usize { | |
self as usize | |
} | |
} | |
#[derive(Debug, Clone)] | |
pub struct SparseSet<T, I: Unsigned = usize> { | |
// The contiguous memory that holds all the items | |
data: Vec<T>, | |
// Given a sparse index, returns the index into `data` | |
sparse: Vec<I>, | |
// Inverse operation to go from `data` back to what | |
// index in `sparse` is pointing to it. | |
// This will always be modified in the same way as `data` | |
dense: Vec<I>, | |
/// Indicate whether the data has been modified. | |
/// See [SparseSet::is_dirty], [SparseSet::mark_clean] and [SparseSet::mark_dirty] | |
dirty: bool, | |
} | |
impl<T, I: Unsigned> SparseSet<T, I> { | |
pub fn new() -> Self { | |
SparseSet { | |
data: Vec::new(), | |
dense: Vec::new(), | |
sparse: Vec::new(), | |
dirty: false, | |
} | |
} | |
pub fn with_capacity(capacity: I) -> Self { | |
SparseSet { | |
data: Vec::with_capacity(capacity.as_usize()), | |
dense: Vec::with_capacity(capacity.as_usize()), | |
sparse: Vec::with_capacity(capacity.as_usize()), | |
dirty: false, | |
} | |
} | |
pub fn items(&self) -> &Vec<T> { | |
&self.data | |
} | |
pub fn items_mut(&mut self) -> &mut Vec<T> { | |
&mut self.data | |
} | |
pub fn len(&self) -> I { | |
unsafe { I::try_from(self.data.len()).unwrap_unchecked() } | |
} | |
pub fn is_empty(&self) -> bool { | |
self.data.is_empty() | |
} | |
pub fn contains(&self, index: I) -> bool { | |
!matches!( | |
self.sparse.get(index.as_usize()).map(|x| x.as_usize()), | |
Some(0) | None | |
) | |
} | |
pub fn get(&self, index: I) -> Option<&T> { | |
match self.sparse.get(index.as_usize()).map(|x| x.as_usize()) { | |
Some(0) | None => None, | |
Some(dense_index) => { | |
// -1 because the indices contained in the sparse set are 1+ | |
let real_index = dense_index - 1; | |
self.data.get(real_index) | |
} | |
} | |
} | |
pub fn get_mut(&mut self, index: I) -> Option<&mut T> { | |
match self.sparse.get(index.as_usize()).map(|x| x.as_usize()) { | |
Some(0) | None => None, | |
Some(dense_index) => { | |
// -1 because the indices contained in the sparse set are 1+ | |
let real_index = dense_index - 1; | |
self.data.get_mut(real_index) | |
} | |
} | |
} | |
pub fn insert(&mut self, index: I, value: T) -> &T { | |
let uindex = index.as_usize(); | |
match self.sparse.get(uindex).map(|x| x.as_usize()) { | |
Some(0) => { | |
// In bounds, but nothing in that slot | |
let dense_index = self.data.len(); | |
let sparse_index = dense_index + 1; | |
// Unsafe here as it's possible we fail to convert from usize above | |
// into `I` (e.g. u8). However we grow according to the `I` type so | |
// this should not happen. | |
// TODO: Test this with u8 up to boundary and see if it panics | |
self.sparse[uindex] = unsafe { I::try_from(sparse_index).unwrap_unchecked() }; | |
self.dense.push(index); | |
self.data.push(value); | |
&self.data[dense_index] | |
} | |
Some(dense_index) => { | |
// In bounds, something in the slot, we just update | |
let real_index = dense_index - 1; | |
self.data[real_index] = value; | |
&self.data[real_index] | |
} | |
None => { | |
// Out of bounds, we need to resize the sparse array | |
self.sparse.resize(uindex + 1, I::default()); | |
let dense_index = self.data.len(); | |
let sparse_index = dense_index + 1; | |
self.sparse[uindex] = unsafe { I::try_from(sparse_index).unwrap_unchecked() }; | |
self.data.push(value); | |
self.dense.push(index); | |
&self.data[dense_index] | |
} | |
} | |
} | |
pub fn remove(&mut self, index: I) -> Option<T> { | |
let uindex = index.as_usize(); | |
match self.sparse.get(uindex).map(|x| x.as_usize()) { | |
Some(0) | None => None, | |
// In bounds and not empty slot | |
Some(dense_index) => { | |
let item_index = dense_index - 1; | |
let last_item_index = self.data.len() - 1; | |
// We are removing the last element | |
if item_index == last_item_index { | |
self.sparse[uindex] = I::default(); | |
self.dense.pop(); | |
return self.data.pop(); | |
} | |
let value = self.data.swap_remove(item_index); | |
let last_index = self.dense.last().unwrap().as_usize(); | |
let sparse_index = self.dense.swap_remove(item_index); | |
self.sparse[last_index] = unsafe { I::try_from(item_index + 1).unwrap_unchecked() }; | |
self.sparse[uindex] = I::default(); | |
Some(value) | |
} | |
} | |
} | |
pub fn is_dirty(&self) -> bool { | |
self.dirty | |
} | |
pub fn mark_dirty(&mut self) { | |
self.dirty = true; | |
} | |
pub fn mark_clean(&mut self) { | |
self.dirty = false; | |
} | |
} | |
impl<T> Default for SparseSet<T> { | |
fn default() -> Self { | |
Self::new() | |
} | |
} | |
#[cfg(test)] | |
mod tests { | |
use super::*; | |
#[test] | |
fn test_sparse_set_insert() { | |
let mut sparse_set = SparseSet::new(); | |
sparse_set.insert(0_u8, "a"); | |
sparse_set.insert(1, "b"); | |
sparse_set.insert(2, "c"); | |
assert_eq!(sparse_set.len(), 3); | |
assert_eq!(sparse_set.get(0), Some(&"a")); | |
assert_eq!(sparse_set.get(1), Some(&"b")); | |
assert_eq!(sparse_set.get(2), Some(&"c")); | |
assert!(sparse_set.contains(0)); | |
assert!(sparse_set.contains(1)); | |
assert!(sparse_set.contains(2)); | |
assert_eq!(sparse_set.items(), &["a", "b", "c"]); | |
assert_eq!(sparse_set.get(3), None); | |
assert!(!sparse_set.contains(3)); | |
} | |
#[test] | |
fn test_sparse_remove() { | |
let mut sparse_set = SparseSet::new(); | |
sparse_set.insert(0_u8, "a"); | |
sparse_set.insert(1, "b"); | |
sparse_set.insert(2, "c"); | |
sparse_set.insert(4, "e"); | |
assert_eq!(sparse_set.remove(1), Some("b")); | |
assert_eq!(sparse_set.len(), 3); | |
assert_eq!(sparse_set.items(), &["a", "e", "c"]); | |
assert_eq!(sparse_set.remove(2), Some("c")); | |
assert_eq!(sparse_set.len(), 2); | |
assert_eq!(sparse_set.items(), &["a", "e"]); | |
assert_eq!(sparse_set.remove(0), Some("a")); | |
assert_eq!(sparse_set.len(), 1); | |
assert_eq!(sparse_set.items(), &["e"]); | |
assert_eq!(sparse_set.remove(4), Some("e")); | |
assert_eq!(sparse_set.len(), 0); | |
assert!(sparse_set.is_empty()); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment