Last active
December 19, 2025 01:39
-
-
Save jweinst1/318689b3bf03f39f82e90ebaa2ca8118 to your computer and use it in GitHub Desktop.
automatic bit sorting in C++
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <array> | |
| #include <cstdint> | |
| #include <cstddef> | |
| #include <cmath> | |
| #include <cstdio> | |
| #include <climits> | |
| #include <vector> | |
| #include <cassert> | |
| #include <random> | |
| #include <chrono> | |
| #include <iostream> | |
| #include <limits> | |
| /** | |
| * Handles bitset lesser or greater than where | |
| * 0 is the least significant bit, | |
| * 0101 -> 0 and 2 are in the set. | |
| * */ | |
| bool getNextGreaterThan(size_t input, size_t target, size_t* out) { | |
| const size_t shifted = input >> (target + 1); | |
| if (!shifted) return false; | |
| *out = __builtin_ctzll(shifted) + target + 1; | |
| return true; | |
| } | |
| bool getNextGreaterThanOrEqual(size_t input, size_t target, size_t* out) { | |
| const size_t shifted = input >> (target); | |
| if (!shifted) return false; | |
| *out = __builtin_ctzll(shifted) + target; | |
| return true; | |
| } | |
| // Special function for getting all of a lowest block | |
| bool getAllFromLowest(size_t input, size_t* out) { | |
| const size_t clamped = input & ((size_t)-1); | |
| if (!clamped) return false; | |
| *out = 63 - __builtin_clzll(clamped); | |
| return true; | |
| } | |
| bool getAllFromGreatest(size_t input, size_t* out) { | |
| if (!input) return false; | |
| *out = __builtin_ctzll(input); | |
| return true; | |
| } | |
| bool getNextLowerThan(size_t input, size_t target, size_t* out) { | |
| const size_t clamped = input & ((size_t{1} << (target)) - 1); | |
| if (!clamped) return false; | |
| *out = 63 - __builtin_clzll(clamped); | |
| return true; | |
| } | |
| bool getNextLowerThanOrEqual(size_t input, size_t target, size_t* out) { | |
| const size_t clamped = input & ((size_t{1} << (target + 1)) - 1); | |
| if (!clamped) return false; | |
| *out = 63 - __builtin_clzll(clamped); | |
| return true; | |
| } | |
| struct ConstexprBitset64 { | |
| uint64_t block = 0; | |
| constexpr void set(size_t idx) { | |
| block |= uint64_t{1} << idx; | |
| } | |
| constexpr void clear(size_t idx) { | |
| block &= ~(uint64_t{1} << idx); | |
| } | |
| constexpr bool test(size_t idx) const { | |
| return (block >> idx) & 1u; | |
| } | |
| constexpr bool getAllFromLowest(size_t* out) const { | |
| return ::getAllFromLowest(block, out); | |
| } | |
| constexpr bool getAllFromGreatest(size_t* out) const { | |
| return ::getAllFromGreatest(block, out); | |
| } | |
| constexpr bool getNextLowerThan(size_t value, size_t* out) const { | |
| return ::getNextLowerThan(block, value, out); | |
| } | |
| constexpr bool getNextLowerThanOrEqual(size_t value, size_t* out) const { | |
| return ::getNextLowerThanOrEqual(block, value, out); | |
| } | |
| constexpr bool getNextGreaterThan(size_t value, size_t* out) const { | |
| return ::getNextGreaterThan(block, value, out); | |
| } | |
| constexpr bool getNextGreaterThanOrEqual(size_t value, size_t* out) const { | |
| return ::getNextGreaterThanOrEqual(block, value, out); | |
| } | |
| // mean approximate functions | |
| constexpr size_t getMeanApprox64() const { | |
| // Uses 64 bit approx | |
| return __builtin_popcountll(block) >> 1; | |
| } | |
| }; | |
| template<size_t maxIntegerBits> | |
| struct SortTable { | |
| static constexpr size_t blockCount = maxIntegerBits / 64; | |
| ConstexprBitset64 cells[blockCount] = {0}; | |
| constexpr size_t mean() const { | |
| size_t total = 0; | |
| for (int i = 0; i < blockCount; ++i) | |
| { | |
| total += cells[i].getMeanApprox64(); | |
| } | |
| return total; | |
| } | |
| constexpr void set(size_t idx) { | |
| const size_t block = idx >> 6; | |
| const size_t offset = idx & 63; | |
| cells[block].set(offset); | |
| } | |
| constexpr bool getNextLowerThan(size_t value, size_t* out) const { | |
| const size_t block = value >> 6; | |
| const size_t offset = value & 63; | |
| size_t i = block; | |
| size_t cur_out = 0; | |
| bool res = cells[i].getNextLowerThan(offset, &cur_out); | |
| while (!res && i > 0) { | |
| --i; | |
| res = cells[i].getAllFromLowest(&cur_out); | |
| } | |
| if (res) { | |
| *out = cur_out + (i << 6); | |
| return true; | |
| } | |
| return false; | |
| } | |
| constexpr bool getNextGreaterThan(size_t value, size_t* out) const { | |
| const size_t block = value >> 6; | |
| const size_t offset = value & 63; | |
| size_t i = block; | |
| size_t cur_out = 0; | |
| bool res = cells[i].getNextGreaterThan(offset, &cur_out); | |
| while (!res && i < blockCount) { | |
| ++i; | |
| res = cells[i].getAllFromGreatest(&cur_out); | |
| } | |
| if (res) { | |
| *out = cur_out + (i << 6); | |
| return true; | |
| } | |
| return false; | |
| } | |
| }; | |
| // todo block based filter that keeps bit index if any in a 64 bit field is set or not. | |
| // todo dual level filter for 32bit + sizes | |
| enum class LeafType { | |
| Value, | |
| Table | |
| }; | |
| union LeafValue { | |
| void* ptr; | |
| size_t value; | |
| }; | |
| struct SortTableNode { | |
| LeafType type = LeafType::Value; | |
| LeafValue obj; | |
| }; | |
| struct SortTableLevel32 { | |
| std::vector<SortTable<65535>> memAlloc; | |
| SortTableNode levelTwo[65535]; | |
| SortTable<65535> myBits; | |
| // insert | |
| // insert after value. | |
| // get lower greater | |
| }; | |
| static void printLowerThan(size_t set, size_t target) { | |
| size_t out = 0; | |
| if(getNextLowerThan(set, target, &out)) { | |
| std::printf("%zu\n", out); | |
| } | |
| } | |
| static void printGreaterThan(size_t set, size_t target) { | |
| size_t out = 0; | |
| if(getNextGreaterThan(set, target, &out)) { | |
| std::printf("%zu\n", out); | |
| } | |
| } | |
| static void printGreaterThanOrEqual(size_t set, size_t target) { | |
| size_t out = 0; | |
| if(getNextGreaterThanOrEqual(set, target, &out)) { | |
| std::printf("%zu\n", out); | |
| } | |
| } | |
| static constexpr size_t testSampleSize = 1024 * 1024 * 10; | |
| int main(int argc, char const *argv[]) | |
| { | |
| std::random_device rd; | |
| std::mt19937 gen(rd()); | |
| std::uniform_int_distribution<uint16_t> distrib( | |
| std::numeric_limits<uint16_t>::min(), | |
| std::numeric_limits<uint16_t>::max() | |
| ); | |
| std::vector<uint16_t> randData; | |
| size_t curRand = 0; | |
| for (int i = 0; i < testSampleSize; ++i) | |
| { | |
| randData.push_back(static_cast<uint16_t>(distrib(gen))); | |
| } | |
| printGreaterThan(0b00100101, 0); | |
| printGreaterThan(0b00100101, 3); | |
| printGreaterThanOrEqual(0b00100100, 6); | |
| printGreaterThanOrEqual(0b00100100, 5); | |
| printLowerThan((size_t{1} << 63) | (uint64_t{1} << 58), 63); | |
| SortTable<65535> tab; | |
| for (int i = 0; i < (testSampleSize / 30); ++i) | |
| { | |
| tab.set(randData[i]); | |
| } | |
| size_t total = 0; | |
| auto start = std::chrono::high_resolution_clock::now(); | |
| for (int i = 0; i < testSampleSize; ++i) | |
| { | |
| tab.getNextGreaterThan(randData[i], &total); | |
| } | |
| auto end = std::chrono::high_resolution_clock::now(); | |
| // 1.6us | |
| std::cout << total << " " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() << "us\n"; | |
| return 0; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <array> | |
| #include <cstdint> | |
| #include <cstddef> | |
| #include <cmath> | |
| #include <cstdio> | |
| #include <climits> | |
| #include <vector> | |
| #include <cassert> | |
| #include <random> | |
| #include <chrono> | |
| #include <iostream> | |
| #include <limits> | |
| #include <cassert> | |
| #include <memory> | |
| /** | |
| * Handles bitset lesser or greater than where | |
| * 0 is the least significant bit, | |
| * 0101 -> 0 and 2 are in the set. | |
| * */ | |
| bool getNextGreaterThan(size_t input, size_t target, size_t* out) { | |
| const size_t shifted = input >> (target + 1); | |
| if (!shifted) return false; | |
| *out = __builtin_ctzll(shifted) + target + 1; | |
| return true; | |
| } | |
| bool getNextGreaterThanOrEqual(size_t input, size_t target, size_t* out) { | |
| const size_t shifted = input >> (target); | |
| if (!shifted) return false; | |
| *out = __builtin_ctzll(shifted) + target; | |
| return true; | |
| } | |
| // Special function for getting all of a lowest block | |
| bool getAllFromLowest(size_t input, size_t* out) { | |
| const size_t clamped = input & ((size_t)-1); | |
| if (!clamped) return false; | |
| *out = 63 - __builtin_clzll(clamped); | |
| return true; | |
| } | |
| bool getAllFromGreatest(size_t input, size_t* out) { | |
| if (!input) return false; | |
| *out = __builtin_ctzll(input); | |
| return true; | |
| } | |
| bool getNextLowerThan(size_t input, size_t target, size_t* out) { | |
| const size_t clamped = input & ((size_t{1} << (target)) - 1); | |
| if (!clamped) return false; | |
| *out = 63 - __builtin_clzll(clamped); | |
| return true; | |
| } | |
| bool getNextLowerThanOrEqual(size_t input, size_t target, size_t* out) { | |
| const size_t clamped = input & ((size_t{1} << (target + 1)) - 1); | |
| if (!clamped) return false; | |
| *out = 63 - __builtin_clzll(clamped); | |
| return true; | |
| } | |
| struct ConstexprBitset64 { | |
| uint64_t block = 0; | |
| constexpr void set(size_t idx) { | |
| block |= uint64_t{1} << idx; | |
| } | |
| constexpr void clear(size_t idx) { | |
| block &= ~(uint64_t{1} << idx); | |
| } | |
| constexpr bool test(size_t idx) const { | |
| return (block >> idx) & 1u; | |
| } | |
| constexpr bool getAllFromLowest(size_t* out) const { | |
| return ::getAllFromLowest(block, out); | |
| } | |
| constexpr bool getAllFromGreatest(size_t* out) const { | |
| return ::getAllFromGreatest(block, out); | |
| } | |
| constexpr bool getNextLowerThan(size_t value, size_t* out) const { | |
| return ::getNextLowerThan(block, value, out); | |
| } | |
| constexpr bool getNextLowerThanOrEqual(size_t value, size_t* out) const { | |
| return ::getNextLowerThanOrEqual(block, value, out); | |
| } | |
| constexpr bool getNextGreaterThan(size_t value, size_t* out) const { | |
| return ::getNextGreaterThan(block, value, out); | |
| } | |
| constexpr bool getNextGreaterThanOrEqual(size_t value, size_t* out) const { | |
| return ::getNextGreaterThanOrEqual(block, value, out); | |
| } | |
| // mean approximate functions | |
| constexpr size_t getMeanApprox64() const { | |
| // Uses 64 bit approx | |
| return __builtin_popcountll(block) >> 1; | |
| } | |
| }; | |
| template<size_t maxIntegerBits> | |
| struct SortTable { | |
| static constexpr size_t blockCount = maxIntegerBits / 64; | |
| ConstexprBitset64 cells[blockCount] = {0}; | |
| constexpr size_t mean() const { | |
| size_t total = 0; | |
| for (int i = 0; i < blockCount; ++i) | |
| { | |
| total += cells[i].getMeanApprox64(); | |
| } | |
| return total; | |
| } | |
| constexpr void set(size_t idx) { | |
| const size_t block = idx >> 6; | |
| const size_t offset = idx & 63; | |
| cells[block].set(offset); | |
| } | |
| constexpr bool getNextLowerThan(size_t value, size_t* out) const { | |
| const size_t block = value >> 6; | |
| const size_t offset = value & 63; | |
| size_t i = block; | |
| size_t cur_out = 0; | |
| bool res = cells[i].getNextLowerThan(offset, &cur_out); | |
| while (!res && i > 0) { | |
| --i; | |
| res = cells[i].getAllFromLowest(&cur_out); | |
| } | |
| if (res) { | |
| *out = cur_out + (i << 6); | |
| return true; | |
| } | |
| return false; | |
| } | |
| constexpr bool getNextGreaterThan(size_t value, size_t* out) const { | |
| const size_t block = value >> 6; | |
| const size_t offset = value & 63; | |
| size_t i = block; | |
| size_t cur_out = 0; | |
| bool res = cells[i].getNextGreaterThan(offset, &cur_out); | |
| while (!res && i < blockCount) { | |
| ++i; | |
| res = cells[i].getAllFromGreatest(&cur_out); | |
| } | |
| if (res) { | |
| *out = cur_out + (i << 6); | |
| return true; | |
| } | |
| return false; | |
| } | |
| }; | |
| // todo block based filter that keeps bit index if any in a 64 bit field is set or not. | |
| // todo dual level filter for 32bit + sizes | |
| enum class LeafType { | |
| Empty, | |
| Value, | |
| Table | |
| }; | |
| union LeafValue { | |
| void* ptr; | |
| size_t value; | |
| }; | |
| struct SortTableNode { | |
| LeafType type = LeafType::Empty; | |
| LeafValue obj = {}; | |
| }; | |
| struct SortTableLevel32 { | |
| std::allocator<SortTable<65537>> memAlloc; | |
| SortTableNode levelTwo[65537]; | |
| SortTable<65537> myBits; | |
| void insert(uint32_t number) { | |
| const uint16_t first = number >> 16; | |
| const uint16_t second = number & 0xffff; | |
| SortTableNode& spot = levelTwo[first]; | |
| if (spot.type == LeafType::Empty) { | |
| spot.type = LeafType::Value; | |
| spot.obj.value = number; | |
| myBits.set(first); | |
| return; | |
| } else if (spot.type == LeafType::Value) { | |
| if (spot.obj.value == number) { | |
| return; | |
| } | |
| uint16_t oldSecond = spot.obj.value & 0xffff; | |
| spot.type = LeafType::Table; | |
| SortTable<65537>* myPtr = memAlloc.allocate(1); | |
| new (myPtr) SortTable<65537>(); | |
| myPtr->set(second); | |
| myPtr->set(oldSecond); | |
| spot.obj.ptr = (void*)myPtr; | |
| return; | |
| } else { | |
| assert(spot.type == LeafType::Table); | |
| SortTable<65537>* myPtr = (SortTable<65537>*)spot.obj.ptr; | |
| myPtr->set(second); | |
| } | |
| } | |
| uint32_t getGreater(uint32_t number) { | |
| const uint16_t first = number >> 16; | |
| const uint16_t second = number & 0xffff; | |
| size_t output = 0; | |
| puts("first greater than"); | |
| if(!myBits.getNextGreaterThan(first, &output)) { | |
| // todo failures. | |
| return (uint32_t)-1; | |
| } | |
| const SortTableNode& spot = levelTwo[output]; | |
| if (spot.type == LeafType::Value) { | |
| return spot.obj.value; | |
| } else { | |
| assert(spot.type != LeafType::Empty); | |
| SortTable<65537>* myPtr = (SortTable<65537>*)spot.obj.ptr; | |
| size_t innerOut = 0; | |
| puts("second greater than"); | |
| if (!myPtr->getNextGreaterThan(second, &innerOut)) { | |
| return (uint32_t)-1; | |
| } | |
| return (output << 16) | innerOut; | |
| } | |
| } | |
| // insert | |
| // insert after value. | |
| // get lower greater | |
| }; | |
| static void printLowerThan(size_t set, size_t target) { | |
| size_t out = 0; | |
| if(getNextLowerThan(set, target, &out)) { | |
| std::printf("%zu\n", out); | |
| } | |
| } | |
| static void printGreaterThan(size_t set, size_t target) { | |
| size_t out = 0; | |
| if(getNextGreaterThan(set, target, &out)) { | |
| std::printf("%zu\n", out); | |
| } | |
| } | |
| static void printGreaterThanOrEqual(size_t set, size_t target) { | |
| size_t out = 0; | |
| if(getNextGreaterThanOrEqual(set, target, &out)) { | |
| std::printf("%zu\n", out); | |
| } | |
| } | |
| static constexpr size_t testSampleSize = 1024 * 1024 * 10; | |
| static void test32bitTwolevel() { | |
| std::random_device rd; | |
| std::mt19937 gen(rd()); | |
| std::uniform_int_distribution<uint32_t> distrib( | |
| std::numeric_limits<uint32_t>::min(), | |
| std::numeric_limits<uint32_t>::max() | |
| ); | |
| std::vector<uint32_t> randData; | |
| size_t curRand = 0; | |
| for (int i = 0; i < testSampleSize; ++i) | |
| { | |
| randData.push_back(static_cast<uint32_t>(distrib(gen))); | |
| } | |
| SortTableLevel32 myTable; | |
| for (int i = 0; i < (testSampleSize / 30); ++i) { | |
| myTable.insert(randData[i]); | |
| } | |
| size_t total = 0; | |
| auto start = std::chrono::high_resolution_clock::now(); | |
| for (int i = 0; i < testSampleSize; ++i) | |
| { | |
| total = myTable.getGreater(randData[i]); | |
| } | |
| auto end = std::chrono::high_resolution_clock::now(); | |
| std::cout << total << " " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() << "us\n"; | |
| } | |
| int main(int argc, char const *argv[]) | |
| { | |
| static_assert(SortTable<65537>::blockCount == 1024); | |
| std::random_device rd; | |
| std::mt19937 gen(rd()); | |
| std::uniform_int_distribution<uint16_t> distrib( | |
| std::numeric_limits<uint16_t>::min(), | |
| std::numeric_limits<uint16_t>::max() | |
| ); | |
| std::vector<uint16_t> randData; | |
| size_t curRand = 0; | |
| for (int i = 0; i < testSampleSize; ++i) | |
| { | |
| randData.push_back(static_cast<uint16_t>(distrib(gen))); | |
| } | |
| printGreaterThan(0b00100101, 0); | |
| printGreaterThan(0b00100101, 3); | |
| printGreaterThanOrEqual(0b00100100, 6); | |
| printGreaterThanOrEqual(0b00100100, 5); | |
| printLowerThan((size_t{1} << 63) | (uint64_t{1} << 58), 63); | |
| SortTable<65537> tab; | |
| for (int i = 0; i < (testSampleSize / 30); ++i) | |
| { | |
| tab.set(randData[i]); | |
| } | |
| size_t total = 0; | |
| auto start = std::chrono::high_resolution_clock::now(); | |
| for (int i = 0; i < testSampleSize; ++i) | |
| { | |
| tab.getNextGreaterThan(randData[i], &total); | |
| } | |
| auto end = std::chrono::high_resolution_clock::now(); | |
| // 1.6us | |
| std::cout << total << " " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() << "us\n"; | |
| test32bitTwolevel(); | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment