nyorain · March 17, 2021 20:16
diff --git a/pageVector.hpp b/pageVector.hpp
 // VIL: Vulkan Live Introspection & Debugging Layer.
 // Copyright (C) 2020-2021 Jan Kelling
 // SPDX-License-Identifier: GPL-3.0-only

 #pragma once

 #include <cstdlib> // size_t
 #include <cassert> // assert
 #include <type_traits> // std::aligned_storage_t
 #include <utility> // std::forward/std::move
 #include <stdexcept> // std::invalid_argument
 #include <cmath> // std::floor
 #include <cstring> // std::memcpy
 #include <memory> // std::allocator_traits

 namespace vil {

 // Returns ceil(num / denom), efficiently, only using integer division.
 inline constexpr std::size_t ceilDivide(std::size_t num, std::size_t denom) {
 	return (num + denom - 1) / denom;
 }

 // Returns log2(x). Throws an exception if x isn't a power of two (which
 // means a compile-time error if evaluated as constexpr).
 inline constexpr unsigned exactLog2(std::size_t x) {
 	if(x == 0) {
 		throw std::invalid_argument("log2(0) not defined");
 	}

    if((x & 1) == 1) {
 		if(x != 1) {
 			throw std::invalid_argument("Given number is not power of two");
 		}

 		return 0;
 	}

 	return 1 + exactLog2(x >> 1);
 }

 // Mixture of std::deque and std::vector. Mainly needed here since most
 // std::deque implementations are shitty. This one if optimized for large
 // allocations and it does not deal with the ugly bits.
 // NOTE: could optimize for smaller allocations but not allocating a whole
 // block at a time (would remove some of the theoretical advantages gained
 // by the deque-like design tho)
 // TODO: likely not exception-safe (when allocator or constructor throws).
 template<typename T, typename Allocator = std::allocator<T>>
 class PageVector {
 public:
 	static constexpr auto blockSize = 16 * 1024; // in bytes
 	static constexpr auto elemsPerBlock = blockSize / sizeof(T); // floor

 	struct Block;
 	struct MutIterator;
 	struct ConstIterator;

 	using AllocTraits = std::allocator_traits<Allocator>;
 	using BlockAllocTraits = typename std::allocator_traits<Allocator>::template rebind_traits<Block>;
 	using BlockPtr = typename BlockAllocTraits::pointer;

 	using value_type = T;
 	using allocator_type = Allocator;
 	using size_type = std::size_t;
 	using difference_type = std::ptrdiff_t;
 	using pointer = typename AllocTraits::pointer;
 	using const_pointer = typename AllocTraits::const_pointer;
 	using reference = value_type&;
 	using const_reference = const value_type&;
 	using iterator = MutIterator;
 	using const_iterator = ConstIterator;

 	struct Block {
 		pointer values; // [elemsPerBlock]
 	};

 	// NOTE: can be more efficient
 	// TODO: just iterating through the container can be made more efficient,
 	//   could write a `range(container)` adaptor that uses a sentinal
 	//   to not check in every operator++ whether the end is reached.
 	struct Iterator {
 		// TODO; iterator is invalidated when a new block is allocated.
 		// We could do better, not sure if worth it tho
 		Block* block_ {};
 		unsigned elemID_ {};

 		Iterator& operator++() {
 			++elemID_;
 			if(elemID_ == elemsPerBlock) {
 				++block_;
 				elemID_ = 0u;
 			}

 			return *this;
 		}

 		Iterator& operator++(int) {
 			auto ret = *this;
 			++(*this);
 			return ret;
 		}

 		friend bool operator==(const Iterator& a, const Iterator& b) {
 			return a.block_ == b.block_ && a.elemID_ == b.elemID_;
 		}
 		friend bool operator!=(const Iterator& a, const Iterator& b) {
 			return a.block_ != b.block_ || a.elemID_ != b.elemID_;
 		}

 		friend Iterator operator+(Iterator a, std::ptrdiff_t diff) {
 			auto elem = std::ptrdiff_t(a.elemID_) + diff;
 			// TODO: inefficient
 			a.block_ += std::floor(float(elem) / elemsPerBlock);
 			a.elemID = elem % elemsPerBlock;
 			return a;
 		}

 		friend Iterator operator-(Iterator a, std::ptrdiff_t diff) {
 			auto elem = std::ptrdiff_t(a.elemID_) - diff;
 			// TODO: inefficient
 			a.block_ += std::floor(float(elem) / elemsPerBlock);
 			a.elemID = elem % elemsPerBlock;
 			return a;
 		}

 		reference get() const {
 			assert(elemID_ < elemsPerBlock);
 			return block_->values[elemID_];
 		}
 	};

 	struct MutIterator : Iterator {
 		pointer operator->() const { return &Iterator::get(); }
 		reference operator*() const { return Iterator::get(); }
 	};

 	struct ConstIterator : Iterator {
 		ConstIterator(Block* block, unsigned elem) : Iterator{block, elem} {}
 		ConstIterator(MutIterator it) : Iterator(it) {}

 		const_pointer operator->() const { return &Iterator::get(); }
 		const_reference operator*() const { return Iterator::get(); }
 	};


 public:
 	PageVector(Allocator&& alloc = {}) : alloc_(std::move(alloc)) {
 		static_assert(blockSize > sizeof(T));
 		static_assert(elemsPerBlock > 0);
 		static_assert(sizeof(Block) <= blockSize);
 	}

 	PageVector(PageVector&& rhs) noexcept : alloc_(rhs.alloc_) { swap(*this, rhs); }
 	PageVector& operator=(PageVector&& rhs) noexcept {
 		PageVector tmp(std::move(rhs));
 		swap(*this, tmp);
 		return *this;
 	}

 	// TODO: could be implemented, deep-copy
 	PageVector(const PageVector&) = delete;
 	PageVector& operator=(const PageVector&) = delete;

 	~PageVector() {
 		release();
 	}

 	void reserve(std::size_t newCapacity) {
 		if(newCapacity > numBlocks_ * elemsPerBlock) {
 			doReserve(newCapacity);
 		}
 	}

 	void resize(std::size_t newSize) {
 		if(newSize > size_) {
 			resizeGrow(newSize);
 		} else if(newSize < size_) {
 			resizeShrink(newSize);
 		}
 	}

 	template<typename... Args>
 	reference emplace_back(Args&&... args) {
 		auto [blockID, elemID] = offsets(size_);
 		if(blockID >= numBlocks_) {
 			doReserve(size_ + 1);
 		}

 		++size_;
 		auto& block = blockat(blockID);
 		auto& elem = block.values[elemID];
 		return *(new(&elem) T(std::forward<Args>(args)...));
 	}

 	value_type pop_back() {
 		assert(!empty());
 		auto& elem = get(size_ - 1);
 		auto ret = std::move(elem);
 		std::destroy_at(&elem);
 		--size_;
 		return ret;
 	}

 	/*
 	void shrink_to_fit() {
 		auto newNumBlocks = ceilDivide(size_, elemsPerBlock);
 		assert(newNumBlocks > numBlocks_);
 		auto newBlocksMem = blockAlloc().allocate(newNumBlocks);
 		auto newBlocks = new(&*newBlocksMem) Block[newNumBlocks];

 		std::memcpy(&*newBlocks, &*blocks_, newNumBlocks * sizeof(Block));

 		// deallocate storage for new blocks
 		for(auto i = newNumBlocks; i < numBlocks_; ++i) {
 			alloc_.deallocate(blocks_[i].values, elemsPerBlock);
 		}

 		blockAlloc().deallocate(blocks_, numBlocks_);

 		blocks_ = newBlocks;
 		numBlocks_ = newNumBlocks;
 	}
 	*/

 	// Removes all elements and releases all memory.
 	// Basically like clear + shrink_to_fit.
 	void release() {
 		for(auto i = 0u; i < numBlocks_ && i * elemsPerBlock < size_; ++i) {
 			auto& block = blockat(i);
 			auto count = std::min(elemsPerBlock, size_ - i * elemsPerBlock);
 			std::destroy(block.values, block.values + count);
 			alloc_.deallocate(block.values, elemsPerBlock);
 		}

 		blockAlloc().deallocate(blocks_, numBlocks_);
 		blocks_ = nullptr;
 		numBlocks_ = 0u;
 		size_ = 0u;
 	}

 	std::size_t size() const { return size_; }
 	std::size_t empty() const { return size_ == 0; }
 	std::size_t capacity() const { return numBlocks_ * elemsPerBlock; }
 	void clear() { resizeShrink(0); }
 	reference push_back(T&& value) { return emplace_back(std::move(value)); }
 	reference operator[](std::size_t pos) { return get(pos); }
 	const_reference operator[](std::size_t pos) const { return get(pos); }

 	reference front() { return get(0); }
 	const_reference front() const { return get(0); }

 	reference back() { return get(size_ - 1); }
 	const_reference back() const { return get(size_ - 1); }

 	MutIterator begin() { return {blocks_, 0}; }
 	ConstIterator begin() const { return {blocks_, 0}; }

 	MutIterator end() { return endIter(); }
 	ConstIterator end() const { return endIter(); }

 	friend void swap(PageVector& a, PageVector& b) noexcept {
 		using std::swap;
 		swap(a.size_, b.size_);
 		swap(a.alloc_, b.alloc_);
 		swap(a.blocks_, b.blocks_);
 		swap(a.numBlocks_, b.numBlocks_);
 	}

 private:
 	struct Offsets {
 		unsigned blockID;
 		unsigned elemID;
 	};

 	Offsets offsets(std::size_t pos) const {
 		// Naive implementation
 		// TODO: speed this up for exact matches (which should be the
 		// case usually) using bit operations. Can be done via
 		// constexpr if switch and exactLog2
 		// auto block = pos / elemsPerBlock;
 		// auto elem = (pos % elemsPerBlock);
 		auto [block, elem] = std::ldiv(long(pos), long(elemsPerBlock));
 		return {unsigned(block), unsigned(elem)};
 	}

 	void doReserve(std::size_t newCapacity) {
 		auto newNumBlocks = ceilDivide(newCapacity, elemsPerBlock);
 		assert(newNumBlocks > numBlocks_);
 		auto newBlocksMem = blockAlloc().allocate(newNumBlocks);
 		auto newBlocks = new(&*newBlocksMem) Block[newNumBlocks];

 		std::memcpy(&*newBlocks, &*blocks_, numBlocks_ * sizeof(Block));

 		// allocate storage for new blocks
 		for(auto i = numBlocks_; i < newNumBlocks; ++i) {
 			newBlocks[i].values = alloc_.allocate(elemsPerBlock);
 		}

 		blockAlloc().deallocate(blocks_, numBlocks_);

 		blocks_ = newBlocks;
 		numBlocks_ = newNumBlocks;
 	}

 	auto blockAlloc() {
 		using R = typename AllocTraits::template rebind_alloc<Block>;
 		return R(alloc_);
 	}

 	Block& blockat(unsigned id) {
 		assert(id < numBlocks_);
 		return blocks_[id];
 	}

 	reference get(std::size_t pos) {
 		auto [blockID, elemID] = offsets(pos);
 		return blockat(blockID).values[elemID];
 	}

 	MutIterator endIter() const {
 		auto [block, elem] = offsets(size_);
 		return {blocks_ + block, elem};
 	}

 	void resizeGrow(std::size_t newSize) {
 		assert(newSize >= size_);

 		// make sure we have enough storage
 		reserve(newSize);

 		// fill blocks; create items in range [size_, newSize)
 		auto [blockID, elemID] = offsets(size_);
 		assert(elemID < elemsPerBlock);
 		while(blockID < numBlocks_ && blockID * elemsPerBlock + elemID < newSize) {
 			auto& block = blockat(blockID);
 			auto count = std::min(elemsPerBlock, newSize - blockID * elemsPerBlock) - elemID;
 			new(&*(block.values + elemID)) T[count];

 			++blockID;
 			elemID = 0;
 		}

 		size_ = newSize;
 	}

 	void resizeShrink(std::size_t newSize) {
 		assert(newSize <= size_);

 		// destroy elements in range [newSize, size_)
 		auto [blockID, elemID] = offsets(newSize);
 		while(blockID < numBlocks_ && blockID * elemsPerBlock < size_) {
 			auto& block = blockat(blockID);
 			auto end = std::min(elemsPerBlock, size_ - blockID * elemsPerBlock);
 			std::destroy(block.values + elemID, block.values + end);

 			++blockID;
 			elemID = 0;
 		}

 		size_ = newSize;
 	}

 private:
 	// TODO: use allocator traits
 	Allocator alloc_;
 	std::size_t size_ {};
 	std::size_t numBlocks_ {};
 	BlockPtr blocks_ {};
 };

 } // namespace vil
	// VIL: Vulkan Live Introspection & Debugging Layer.
	// Copyright (C) 2020-2021 Jan Kelling
	// SPDX-License-Identifier: GPL-3.0-only

	#pragma once

	#include <cstdlib> // size_t
	#include <cassert> // assert
	#include <type_traits> // std::aligned_storage_t
	#include <utility> // std::forward/std::move
	#include <stdexcept> // std::invalid_argument
	#include <cmath> // std::floor
	#include <cstring> // std::memcpy
	#include <memory> // std::allocator_traits

	namespace vil {

	// Returns ceil(num / denom), efficiently, only using integer division.
	inline constexpr std::size_t ceilDivide(std::size_t num, std::size_t denom) {
	return (num + denom - 1) / denom;
	}

	// Returns log2(x). Throws an exception if x isn't a power of two (which
	// means a compile-time error if evaluated as constexpr).
	inline constexpr unsigned exactLog2(std::size_t x) {
	if(x == 0) {
	throw std::invalid_argument("log2(0) not defined");
	}

	if((x & 1) == 1) {
	if(x != 1) {
	throw std::invalid_argument("Given number is not power of two");
	}

	return 0;
	}

	return 1 + exactLog2(x >> 1);
	}

	// Mixture of std::deque and std::vector. Mainly needed here since most
	// std::deque implementations are shitty. This one if optimized for large
	// allocations and it does not deal with the ugly bits.
	// NOTE: could optimize for smaller allocations but not allocating a whole
	// block at a time (would remove some of the theoretical advantages gained
	// by the deque-like design tho)
	// TODO: likely not exception-safe (when allocator or constructor throws).
	template<typename T, typename Allocator = std::allocator<T>>
	class PageVector {
	public:
	static constexpr auto blockSize = 16 * 1024; // in bytes
	static constexpr auto elemsPerBlock = blockSize / sizeof(T); // floor

	struct Block;
	struct MutIterator;
	struct ConstIterator;

	using AllocTraits = std::allocator_traits<Allocator>;
	using BlockAllocTraits = typename std::allocator_traits<Allocator>::template rebind_traits<Block>;
	using BlockPtr = typename BlockAllocTraits::pointer;

	using value_type = T;
	using allocator_type = Allocator;
	using size_type = std::size_t;
	using difference_type = std::ptrdiff_t;
	using pointer = typename AllocTraits::pointer;
	using const_pointer = typename AllocTraits::const_pointer;
	using reference = value_type&;
	using const_reference = const value_type&;
	using iterator = MutIterator;
	using const_iterator = ConstIterator;

	struct Block {
	pointer values; // [elemsPerBlock]
	};

	// NOTE: can be more efficient
	// TODO: just iterating through the container can be made more efficient,
	// could write a `range(container)` adaptor that uses a sentinal
	// to not check in every operator++ whether the end is reached.
	struct Iterator {
	// TODO; iterator is invalidated when a new block is allocated.
	// We could do better, not sure if worth it tho
	Block* block_ {};
	unsigned elemID_ {};

	Iterator& operator++() {
	++elemID_;
	if(elemID_ == elemsPerBlock) {
	++block_;
	elemID_ = 0u;
	}

	return *this;
	}

	Iterator& operator++(int) {
	auto ret = *this;
	++(*this);
	return ret;
	}

	friend bool operator==(const Iterator& a, const Iterator& b) {
	return a.block_ == b.block_ && a.elemID_ == b.elemID_;
	}
	friend bool operator!=(const Iterator& a, const Iterator& b) {
	return a.block_ != b.block_ \|\| a.elemID_ != b.elemID_;
	}

	friend Iterator operator+(Iterator a, std::ptrdiff_t diff) {
	auto elem = std::ptrdiff_t(a.elemID_) + diff;
	// TODO: inefficient
	a.block_ += std::floor(float(elem) / elemsPerBlock);
	a.elemID = elem % elemsPerBlock;
	return a;
	}

	friend Iterator operator-(Iterator a, std::ptrdiff_t diff) {
	auto elem = std::ptrdiff_t(a.elemID_) - diff;
	// TODO: inefficient
	a.block_ += std::floor(float(elem) / elemsPerBlock);
	a.elemID = elem % elemsPerBlock;
	return a;
	}

	reference get() const {
	assert(elemID_ < elemsPerBlock);
	return block_->values[elemID_];
	}
	};

	struct MutIterator : Iterator {
	pointer operator->() const { return &Iterator::get(); }
	reference operator*() const { return Iterator::get(); }
	};

	struct ConstIterator : Iterator {
	ConstIterator(Block* block, unsigned elem) : Iterator{block, elem} {}
	ConstIterator(MutIterator it) : Iterator(it) {}

	const_pointer operator->() const { return &Iterator::get(); }
	const_reference operator*() const { return Iterator::get(); }
	};


	public:
	PageVector(Allocator&& alloc = {}) : alloc_(std::move(alloc)) {
	static_assert(blockSize > sizeof(T));
	static_assert(elemsPerBlock > 0);
	static_assert(sizeof(Block) <= blockSize);
	}

	PageVector(PageVector&& rhs) noexcept : alloc_(rhs.alloc_) { swap(*this, rhs); }
	PageVector& operator=(PageVector&& rhs) noexcept {
	PageVector tmp(std::move(rhs));
	swap(*this, tmp);
	return *this;
	}

	// TODO: could be implemented, deep-copy
	PageVector(const PageVector&) = delete;
	PageVector& operator=(const PageVector&) = delete;

	~PageVector() {
	release();
	}

	void reserve(std::size_t newCapacity) {
	if(newCapacity > numBlocks_ * elemsPerBlock) {
	doReserve(newCapacity);
	}
	}

	void resize(std::size_t newSize) {
	if(newSize > size_) {
	resizeGrow(newSize);
	} else if(newSize < size_) {
	resizeShrink(newSize);
	}
	}

	template<typename... Args>
	reference emplace_back(Args&&... args) {
	auto [blockID, elemID] = offsets(size_);
	if(blockID >= numBlocks_) {
	doReserve(size_ + 1);
	}

	++size_;
	auto& block = blockat(blockID);
	auto& elem = block.values[elemID];
	return *(new(&elem) T(std::forward<Args>(args)...));
	}

	value_type pop_back() {
	assert(!empty());
	auto& elem = get(size_ - 1);
	auto ret = std::move(elem);
	std::destroy_at(&elem);
	--size_;
	return ret;
	}

	/*
	void shrink_to_fit() {
	auto newNumBlocks = ceilDivide(size_, elemsPerBlock);
	assert(newNumBlocks > numBlocks_);
	auto newBlocksMem = blockAlloc().allocate(newNumBlocks);
	auto newBlocks = new(&*newBlocksMem) Block[newNumBlocks];

	std::memcpy(&newBlocks, &blocks_, newNumBlocks * sizeof(Block));

	// deallocate storage for new blocks
	for(auto i = newNumBlocks; i < numBlocks_; ++i) {
	alloc_.deallocate(blocks_[i].values, elemsPerBlock);
	}

	blockAlloc().deallocate(blocks_, numBlocks_);

	blocks_ = newBlocks;
	numBlocks_ = newNumBlocks;
	}
	*/

	// Removes all elements and releases all memory.
	// Basically like clear + shrink_to_fit.
	void release() {
	for(auto i = 0u; i < numBlocks_ && i * elemsPerBlock < size_; ++i) {
	auto& block = blockat(i);
	auto count = std::min(elemsPerBlock, size_ - i * elemsPerBlock);
	std::destroy(block.values, block.values + count);
	alloc_.deallocate(block.values, elemsPerBlock);
	}

	blockAlloc().deallocate(blocks_, numBlocks_);
	blocks_ = nullptr;
	numBlocks_ = 0u;
	size_ = 0u;
	}

	std::size_t size() const { return size_; }
	std::size_t empty() const { return size_ == 0; }
	std::size_t capacity() const { return numBlocks_ * elemsPerBlock; }
	void clear() { resizeShrink(0); }
	reference push_back(T&& value) { return emplace_back(std::move(value)); }
	reference operator[](std::size_t pos) { return get(pos); }
	const_reference operator[](std::size_t pos) const { return get(pos); }

	reference front() { return get(0); }
	const_reference front() const { return get(0); }

	reference back() { return get(size_ - 1); }
	const_reference back() const { return get(size_ - 1); }

	MutIterator begin() { return {blocks_, 0}; }
	ConstIterator begin() const { return {blocks_, 0}; }

	MutIterator end() { return endIter(); }
	ConstIterator end() const { return endIter(); }

	friend void swap(PageVector& a, PageVector& b) noexcept {
	using std::swap;
	swap(a.size_, b.size_);
	swap(a.alloc_, b.alloc_);
	swap(a.blocks_, b.blocks_);
	swap(a.numBlocks_, b.numBlocks_);
	}

	private:
	struct Offsets {
	unsigned blockID;
	unsigned elemID;
	};

	Offsets offsets(std::size_t pos) const {
	// Naive implementation
	// TODO: speed this up for exact matches (which should be the
	// case usually) using bit operations. Can be done via
	// constexpr if switch and exactLog2
	// auto block = pos / elemsPerBlock;
	// auto elem = (pos % elemsPerBlock);
	auto [block, elem] = std::ldiv(long(pos), long(elemsPerBlock));
	return {unsigned(block), unsigned(elem)};
	}

	void doReserve(std::size_t newCapacity) {
	auto newNumBlocks = ceilDivide(newCapacity, elemsPerBlock);
	assert(newNumBlocks > numBlocks_);
	auto newBlocksMem = blockAlloc().allocate(newNumBlocks);
	auto newBlocks = new(&*newBlocksMem) Block[newNumBlocks];

	std::memcpy(&newBlocks, &blocks_, numBlocks_ * sizeof(Block));

	// allocate storage for new blocks
	for(auto i = numBlocks_; i < newNumBlocks; ++i) {
	newBlocks[i].values = alloc_.allocate(elemsPerBlock);
	}

	blockAlloc().deallocate(blocks_, numBlocks_);

	blocks_ = newBlocks;
	numBlocks_ = newNumBlocks;
	}

	auto blockAlloc() {
	using R = typename AllocTraits::template rebind_alloc<Block>;
	return R(alloc_);
	}

	Block& blockat(unsigned id) {
	assert(id < numBlocks_);
	return blocks_[id];
	}

	reference get(std::size_t pos) {
	auto [blockID, elemID] = offsets(pos);
	return blockat(blockID).values[elemID];
	}

	MutIterator endIter() const {
	auto [block, elem] = offsets(size_);
	return {blocks_ + block, elem};
	}

	void resizeGrow(std::size_t newSize) {
	assert(newSize >= size_);

	// make sure we have enough storage
	reserve(newSize);

	// fill blocks; create items in range [size_, newSize)
	auto [blockID, elemID] = offsets(size_);
	assert(elemID < elemsPerBlock);
	while(blockID < numBlocks_ && blockID * elemsPerBlock + elemID < newSize) {
	auto& block = blockat(blockID);
	auto count = std::min(elemsPerBlock, newSize - blockID * elemsPerBlock) - elemID;
	new(&*(block.values + elemID)) T[count];

	++blockID;
	elemID = 0;
	}

	size_ = newSize;
	}

	void resizeShrink(std::size_t newSize) {
	assert(newSize <= size_);

	// destroy elements in range [newSize, size_)
	auto [blockID, elemID] = offsets(newSize);
	while(blockID < numBlocks_ && blockID * elemsPerBlock < size_) {
	auto& block = blockat(blockID);
	auto end = std::min(elemsPerBlock, size_ - blockID * elemsPerBlock);
	std::destroy(block.values + elemID, block.values + end);

	++blockID;
	elemID = 0;
	}

	size_ = newSize;
	}

	private:
	// TODO: use allocator traits
	Allocator alloc_;
	std::size_t size_ {};
	std::size_t numBlocks_ {};
	BlockPtr blocks_ {};
	};

	} // namespace vil