eddiebergman · December 26, 2024 01:46
diff --git a/sparseset.rs b/sparseset.rs
 /*
 * SparseSet is a data structure that most importantly keeps all its stored
 * items in contiguous memory, while still allowing for O(1) insertions,
 * deletions, and lookups. These are usually objects you'd like to keep
 * a reference for later O(1) lookup and mutation. You may also think
 * of it as a `HashMap<usize, T>` but contiguous memory holding Vec<T>
 *
 * The contiguous part is particularly useful to be able to map them
 * into a GPU buffer, as the data transfer can only specify full blocks
 * of memory. This is why this structure **must** own the data it stores.
 * 
 * For comparison, doing so directly with just a Vec would mean
 * other objects can not hold references or pointers to the items in the Vec,
 * as they would be move around in memory once the Vec is resized.
 * The SparseSet has one layer of indirection to avoid this issue.
 *
 * A HashMap would also not be suitable, as it would not keep the items in
 * contiguous memory.
 *
 * The key downside is really that it's more memory innefficient due to
 * storing two index arrays, one of which is the size of your `data` vector,
 * the other one going up to the highest index you care to use. Also, if you
 * do not plan to delete any data regularly, the extra indirection is needless. 
 *
 * ```
 * let sparseset = SparseSet::new();
 *
 * // This will cause the `sparse` vector to grow to a size of 2_000
 * // even though there is only one item.
 * sparseset.insert(2_000, "hello");
 * ```
 *
 * As a result, this is best used with incremental indices, especially if you
 * can recycle the indices.

 * If sticking with a low number of items but you update/remove/add frequently,
 * you may call `SparseSet<T, u8>::with_capacity(255)` which has a fixed
 * memory cost over just a straight `Vec<T>` of:
 *
 * * `sizeof(U)*2*255` where for `u8` its `510` bytes, `u16` is `1020`
 *    and `u32` is `2040`.
 *
 * This stabalizes the memory allocation, preventing data being moved.
 *
 * This implementation is inspired by the bevy implementation of a sparse set:
 * * https://github.com/bevyengine/bevy/blob/f9c8f511fd50d5d2321464ef5889d33ae8eba72f/crates/bevy_ecs/src/storage/sparse_set.rs#L375
 *
 * Main changes are to not use the `nonmax` dependancy and just use `0` as
 * sentinal value directly to say that a given slot is empty.
 *
 * The `dirty` flag is not required, it's simply there to flag when the data
 * may need to be updated in the GPU buffer.
 */

 // This stuff about `Unsigned` is to allow for the use of u8, u16, u32, usize
 // as the elements of the index buffers.
 // `Default` just makes accessing 0 for the type much easier
 pub trait Unsigned: Copy + TryFrom<usize> + Default {
    fn as_usize(self) -> usize;
 }

 // u8,u16,usize support the .into::<usize>(), however u32 does not, despite `1000u32 as usize`
 // being valid. Hence, we just make new methods...
 impl Unsigned for usize {
    #[inline]
    fn as_usize(self) -> usize {
        self
    }
 }
 impl Unsigned for u8 {
    #[inline]
    fn as_usize(self) -> usize {
        self as usize
    }
 }
 impl Unsigned for u16 {
    #[inline]
    fn as_usize(self) -> usize {
        self as usize
    }
 }
 impl Unsigned for u32 {
    #[inline]
    fn as_usize(self) -> usize {
        self as usize
    }
 }

 #[derive(Debug, Clone)]
 pub struct SparseSet<T, I: Unsigned = usize> {
    // The contiguous memory that holds all the items
    data: Vec<T>,

    // Given a sparse index, returns the index into `data`
    sparse: Vec<I>,

    // Inverse operation to go from `data` back to what
    // index in `sparse` is pointing to it.
    // This will always be modified in the same way as `data`
    dense: Vec<I>,

    /// Indicate whether the data has been modified.
    /// See [SparseSet::is_dirty], [SparseSet::mark_clean] and [SparseSet::mark_dirty]
    dirty: bool,
 }

 impl<T, I: Unsigned> SparseSet<T, I> {
    pub fn new() -> Self {
        SparseSet {
            data: Vec::new(),
            dense: Vec::new(),
            sparse: Vec::new(),
            dirty: false,
        }
    }

    pub fn with_capacity(capacity: I) -> Self {
        SparseSet {
            data: Vec::with_capacity(capacity.as_usize()),
            dense: Vec::with_capacity(capacity.as_usize()),
            sparse: Vec::with_capacity(capacity.as_usize()),
            dirty: false,
        }
    }

    pub fn items(&self) -> &Vec<T> {
        &self.data
    }

    pub fn items_mut(&mut self) -> &mut Vec<T> {
        &mut self.data
    }

    pub fn len(&self) -> I {
        unsafe { I::try_from(self.data.len()).unwrap_unchecked() }
    }

    pub fn is_empty(&self) -> bool {
        self.data.is_empty()
    }

    pub fn contains(&self, index: I) -> bool {
        !matches!(
            self.sparse.get(index.as_usize()).map(|x| x.as_usize()),
            Some(0) | None
        )
    }

    pub fn get(&self, index: I) -> Option<&T> {
        match self.sparse.get(index.as_usize()).map(|x| x.as_usize()) {
            Some(0) | None => None,
            Some(dense_index) => {
                // -1 because the indices contained in the sparse set are 1+
                let real_index = dense_index - 1;
                self.data.get(real_index)
            }
        }
    }

    pub fn get_mut(&mut self, index: I) -> Option<&mut T> {
        match self.sparse.get(index.as_usize()).map(|x| x.as_usize()) {
            Some(0) | None => None,
            Some(dense_index) => {
                // -1 because the indices contained in the sparse set are 1+
                let real_index = dense_index - 1;
                self.data.get_mut(real_index)
            }
        }
    }

    pub fn insert(&mut self, index: I, value: T) -> &T {
        let uindex = index.as_usize();
        match self.sparse.get(uindex).map(|x| x.as_usize()) {
            Some(0) => {
                // In bounds, but nothing in that slot
                let dense_index = self.data.len();
                let sparse_index = dense_index + 1;

                // Unsafe here as it's possible we fail to convert from usize above
                // into `I`  (e.g. u8). However we grow according to the `I` type so
                // this should not happen.
                // TODO: Test this with u8 up to boundary and see if it panics
                self.sparse[uindex] = unsafe { I::try_from(sparse_index).unwrap_unchecked() };

                self.dense.push(index);
                self.data.push(value);

                &self.data[dense_index]
            }
            Some(dense_index) => {
                // In bounds, something in the slot, we just update
                let real_index = dense_index - 1;
                self.data[real_index] = value;
                &self.data[real_index]
            }
            None => {
                // Out of bounds, we need to resize the sparse array
                self.sparse.resize(uindex + 1, I::default());
                let dense_index = self.data.len();
                let sparse_index = dense_index + 1;
                self.sparse[uindex] = unsafe { I::try_from(sparse_index).unwrap_unchecked() };

                self.data.push(value);
                self.dense.push(index);

                &self.data[dense_index]
            }
        }
    }

    pub fn remove(&mut self, index: I) -> Option<T> {
        let uindex = index.as_usize();
        match self.sparse.get(uindex).map(|x| x.as_usize()) {
            Some(0) | None => None,
            // In bounds and not empty slot
            Some(dense_index) => {
                let item_index = dense_index - 1;
                let last_item_index = self.data.len() - 1;

                // We are removing the last element
                if item_index == last_item_index {
                    self.sparse[uindex] = I::default();
                    self.dense.pop();
                    return self.data.pop();
                }

                let value = self.data.swap_remove(item_index);
                let last_index = self.dense.last().unwrap().as_usize();
                let sparse_index = self.dense.swap_remove(item_index);

                self.sparse[last_index] = unsafe { I::try_from(item_index + 1).unwrap_unchecked() };
                self.sparse[uindex] = I::default();

                Some(value)
            }
        }
    }

    pub fn is_dirty(&self) -> bool {
        self.dirty
    }

    pub fn mark_dirty(&mut self) {
        self.dirty = true;
    }

    pub fn mark_clean(&mut self) {
        self.dirty = false;
    }
 }

 impl<T> Default for SparseSet<T> {
    fn default() -> Self {
        Self::new()
    }
 }

 #[cfg(test)]
 mod tests {
    use super::*;

    #[test]
    fn test_sparse_set_insert() {
        let mut sparse_set = SparseSet::new();

        sparse_set.insert(0_u8, "a");
        sparse_set.insert(1, "b");
        sparse_set.insert(2, "c");
        assert_eq!(sparse_set.len(), 3);

        assert_eq!(sparse_set.get(0), Some(&"a"));
        assert_eq!(sparse_set.get(1), Some(&"b"));
        assert_eq!(sparse_set.get(2), Some(&"c"));

        assert!(sparse_set.contains(0));
        assert!(sparse_set.contains(1));
        assert!(sparse_set.contains(2));

        assert_eq!(sparse_set.items(), &["a", "b", "c"]);

        assert_eq!(sparse_set.get(3), None);
        assert!(!sparse_set.contains(3));
    }

    #[test]
    fn test_sparse_remove() {
        let mut sparse_set = SparseSet::new();

        sparse_set.insert(0_u8, "a");
        sparse_set.insert(1, "b");
        sparse_set.insert(2, "c");
        sparse_set.insert(4, "e");

        assert_eq!(sparse_set.remove(1), Some("b"));
        assert_eq!(sparse_set.len(), 3);
        assert_eq!(sparse_set.items(), &["a", "e", "c"]);

        assert_eq!(sparse_set.remove(2), Some("c"));
        assert_eq!(sparse_set.len(), 2);
        assert_eq!(sparse_set.items(), &["a", "e"]);

        assert_eq!(sparse_set.remove(0), Some("a"));
        assert_eq!(sparse_set.len(), 1);
        assert_eq!(sparse_set.items(), &["e"]);

        assert_eq!(sparse_set.remove(4), Some("e"));
        assert_eq!(sparse_set.len(), 0);
        assert!(sparse_set.is_empty());
    }
 }
	/*
	* SparseSet is a data structure that most importantly keeps all its stored
	* items in contiguous memory, while still allowing for O(1) insertions,
	* deletions, and lookups. These are usually objects you'd like to keep
	* a reference for later O(1) lookup and mutation. You may also think
	* of it as a `HashMap<usize, T>` but contiguous memory holding Vec<T>
	*
	* The contiguous part is particularly useful to be able to map them
	* into a GPU buffer, as the data transfer can only specify full blocks
	* of memory. This is why this structure must own the data it stores.
	*
	* For comparison, doing so directly with just a Vec would mean
	* other objects can not hold references or pointers to the items in the Vec,
	* as they would be move around in memory once the Vec is resized.
	* The SparseSet has one layer of indirection to avoid this issue.
	*
	* A HashMap would also not be suitable, as it would not keep the items in
	* contiguous memory.
	*
	* The key downside is really that it's more memory innefficient due to
	* storing two index arrays, one of which is the size of your `data` vector,
	* the other one going up to the highest index you care to use. Also, if you
	* do not plan to delete any data regularly, the extra indirection is needless.
	*
	* ```
	* let sparseset = SparseSet::new();
	*
	* // This will cause the `sparse` vector to grow to a size of 2_000
	* // even though there is only one item.
	* sparseset.insert(2_000, "hello");
	* ```
	*
	* As a result, this is best used with incremental indices, especially if you
	* can recycle the indices.

	* If sticking with a low number of items but you update/remove/add frequently,
	* you may call `SparseSet<T, u8>::with_capacity(255)` which has a fixed
	* memory cost over just a straight `Vec<T>` of:
	*
	* * `sizeof(U)2255` where for `u8` its `510` bytes, `u16` is `1020`
	* and `u32` is `2040`.
	*
	* This stabalizes the memory allocation, preventing data being moved.
	*
	* This implementation is inspired by the bevy implementation of a sparse set:
	* * https://github.com/bevyengine/bevy/blob/f9c8f511fd50d5d2321464ef5889d33ae8eba72f/crates/bevy_ecs/src/storage/sparse_set.rs#L375
	*
	* Main changes are to not use the `nonmax` dependancy and just use `0` as
	* sentinal value directly to say that a given slot is empty.
	*
	* The `dirty` flag is not required, it's simply there to flag when the data
	* may need to be updated in the GPU buffer.
	*/

	// This stuff about `Unsigned` is to allow for the use of u8, u16, u32, usize
	// as the elements of the index buffers.
	// `Default` just makes accessing 0 for the type much easier
	pub trait Unsigned: Copy + TryFrom<usize> + Default {
	fn as_usize(self) -> usize;
	}

	// u8,u16,usize support the .into::<usize>(), however u32 does not, despite `1000u32 as usize`
	// being valid. Hence, we just make new methods...
	impl Unsigned for usize {
	#[inline]
	fn as_usize(self) -> usize {
	self
	}
	}
	impl Unsigned for u8 {
	#[inline]
	fn as_usize(self) -> usize {
	self as usize
	}
	}
	impl Unsigned for u16 {
	#[inline]
	fn as_usize(self) -> usize {
	self as usize
	}
	}
	impl Unsigned for u32 {
	#[inline]
	fn as_usize(self) -> usize {
	self as usize
	}
	}

	#[derive(Debug, Clone)]
	pub struct SparseSet<T, I: Unsigned = usize> {
	// The contiguous memory that holds all the items
	data: Vec<T>,

	// Given a sparse index, returns the index into `data`
	sparse: Vec<I>,

	// Inverse operation to go from `data` back to what
	// index in `sparse` is pointing to it.
	// This will always be modified in the same way as `data`
	dense: Vec<I>,

	/// Indicate whether the data has been modified.
	/// See [SparseSet::is_dirty], [SparseSet::mark_clean] and [SparseSet::mark_dirty]
	dirty: bool,
	}

	impl<T, I: Unsigned> SparseSet<T, I> {
	pub fn new() -> Self {
	SparseSet {
	data: Vec::new(),
	dense: Vec::new(),
	sparse: Vec::new(),
	dirty: false,
	}
	}

	pub fn with_capacity(capacity: I) -> Self {
	SparseSet {
	data: Vec::with_capacity(capacity.as_usize()),
	dense: Vec::with_capacity(capacity.as_usize()),
	sparse: Vec::with_capacity(capacity.as_usize()),
	dirty: false,
	}
	}

	pub fn items(&self) -> &Vec<T> {
	&self.data
	}

	pub fn items_mut(&mut self) -> &mut Vec<T> {
	&mut self.data
	}

	pub fn len(&self) -> I {
	unsafe { I::try_from(self.data.len()).unwrap_unchecked() }
	}

	pub fn is_empty(&self) -> bool {
	self.data.is_empty()
	}

	pub fn contains(&self, index: I) -> bool {
	!matches!(
	self.sparse.get(index.as_usize()).map(\|x\| x.as_usize()),
	Some(0) \| None
	)
	}

	pub fn get(&self, index: I) -> Option<&T> {
	match self.sparse.get(index.as_usize()).map(\|x\| x.as_usize()) {
	Some(0) \| None => None,
	Some(dense_index) => {
	// -1 because the indices contained in the sparse set are 1+
	let real_index = dense_index - 1;
	self.data.get(real_index)
	}
	}
	}

	pub fn get_mut(&mut self, index: I) -> Option<&mut T> {
	match self.sparse.get(index.as_usize()).map(\|x\| x.as_usize()) {
	Some(0) \| None => None,
	Some(dense_index) => {
	// -1 because the indices contained in the sparse set are 1+
	let real_index = dense_index - 1;
	self.data.get_mut(real_index)
	}
	}
	}

	pub fn insert(&mut self, index: I, value: T) -> &T {
	let uindex = index.as_usize();
	match self.sparse.get(uindex).map(\|x\| x.as_usize()) {
	Some(0) => {
	// In bounds, but nothing in that slot
	let dense_index = self.data.len();
	let sparse_index = dense_index + 1;

	// Unsafe here as it's possible we fail to convert from usize above
	// into `I` (e.g. u8). However we grow according to the `I` type so
	// this should not happen.
	// TODO: Test this with u8 up to boundary and see if it panics
	self.sparse[uindex] = unsafe { I::try_from(sparse_index).unwrap_unchecked() };

	self.dense.push(index);
	self.data.push(value);

	&self.data[dense_index]
	}
	Some(dense_index) => {
	// In bounds, something in the slot, we just update
	let real_index = dense_index - 1;
	self.data[real_index] = value;
	&self.data[real_index]
	}
	None => {
	// Out of bounds, we need to resize the sparse array
	self.sparse.resize(uindex + 1, I::default());
	let dense_index = self.data.len();
	let sparse_index = dense_index + 1;
	self.sparse[uindex] = unsafe { I::try_from(sparse_index).unwrap_unchecked() };

	self.data.push(value);
	self.dense.push(index);

	&self.data[dense_index]
	}
	}
	}

	pub fn remove(&mut self, index: I) -> Option<T> {
	let uindex = index.as_usize();
	match self.sparse.get(uindex).map(\|x\| x.as_usize()) {
	Some(0) \| None => None,
	// In bounds and not empty slot
	Some(dense_index) => {
	let item_index = dense_index - 1;
	let last_item_index = self.data.len() - 1;

	// We are removing the last element
	if item_index == last_item_index {
	self.sparse[uindex] = I::default();
	self.dense.pop();
	return self.data.pop();
	}

	let value = self.data.swap_remove(item_index);
	let last_index = self.dense.last().unwrap().as_usize();
	let sparse_index = self.dense.swap_remove(item_index);

	self.sparse[last_index] = unsafe { I::try_from(item_index + 1).unwrap_unchecked() };
	self.sparse[uindex] = I::default();

	Some(value)
	}
	}
	}

	pub fn is_dirty(&self) -> bool {
	self.dirty
	}

	pub fn mark_dirty(&mut self) {
	self.dirty = true;
	}

	pub fn mark_clean(&mut self) {
	self.dirty = false;
	}
	}

	impl<T> Default for SparseSet<T> {
	fn default() -> Self {
	Self::new()
	}
	}

	#[cfg(test)]
	mod tests {
	use super::*;

	#[test]
	fn test_sparse_set_insert() {
	let mut sparse_set = SparseSet::new();

	sparse_set.insert(0_u8, "a");
	sparse_set.insert(1, "b");
	sparse_set.insert(2, "c");
	assert_eq!(sparse_set.len(), 3);

	assert_eq!(sparse_set.get(0), Some(&"a"));
	assert_eq!(sparse_set.get(1), Some(&"b"));
	assert_eq!(sparse_set.get(2), Some(&"c"));

	assert!(sparse_set.contains(0));
	assert!(sparse_set.contains(1));
	assert!(sparse_set.contains(2));

	assert_eq!(sparse_set.items(), &["a", "b", "c"]);

	assert_eq!(sparse_set.get(3), None);
	assert!(!sparse_set.contains(3));
	}

	#[test]
	fn test_sparse_remove() {
	let mut sparse_set = SparseSet::new();

	sparse_set.insert(0_u8, "a");
	sparse_set.insert(1, "b");
	sparse_set.insert(2, "c");
	sparse_set.insert(4, "e");

	assert_eq!(sparse_set.remove(1), Some("b"));
	assert_eq!(sparse_set.len(), 3);
	assert_eq!(sparse_set.items(), &["a", "e", "c"]);

	assert_eq!(sparse_set.remove(2), Some("c"));
	assert_eq!(sparse_set.len(), 2);
	assert_eq!(sparse_set.items(), &["a", "e"]);

	assert_eq!(sparse_set.remove(0), Some("a"));
	assert_eq!(sparse_set.len(), 1);
	assert_eq!(sparse_set.items(), &["e"]);

	assert_eq!(sparse_set.remove(4), Some("e"));
	assert_eq!(sparse_set.len(), 0);
	assert!(sparse_set.is_empty());
	}
	}