Last active
December 25, 2025 00:33
-
-
Save jweinst1/13db595af6846970fb16159aa093ceca to your computer and use it in GitHub Desktop.
and pop count of specific segments of bits for 8 bit dimension vectors
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <array> | |
| #include <cstdint> | |
| #include <cstddef> | |
| #include <cmath> | |
| #include <cstdio> | |
| #include <climits> | |
| #include <vector> | |
| #include <cassert> | |
| #include <random> | |
| #include <chrono> | |
| #include <iostream> | |
| #include <limits> | |
| #include <cassert> | |
| #include <memory> | |
| #include <cmath> | |
| #include <bitset> | |
| static constexpr size_t testSampleSize = 1024 * 1024; | |
| static constexpr size_t hammingThreshold = 23; | |
| static constexpr size_t bit7CountMark = uint64_t{128} | | |
| (uint64_t{128} << 8) | | |
| (uint64_t{128} << 16) | | |
| (uint64_t{128} << 24) | | |
| (uint64_t{128} << 32) | | |
| (uint64_t{128} << 40) | | |
| (uint64_t{128} << 48) | | |
| (uint64_t{128} << 56); | |
| static constexpr size_t bit6CountMark = uint64_t{64} | | |
| (uint64_t{64} << 8) | | |
| (uint64_t{64} << 16) | | |
| (uint64_t{64} << 24) | | |
| (uint64_t{64} << 32) | | |
| (uint64_t{64} << 40) | | |
| (uint64_t{64} << 48) | | |
| (uint64_t{64} << 56); | |
| static constexpr size_t bit5CountMark = uint64_t{32} | | |
| (uint64_t{32} << 8) | | |
| (uint64_t{32} << 16) | | |
| (uint64_t{32} << 24) | | |
| (uint64_t{32} << 32) | | |
| (uint64_t{32} << 40) | | |
| (uint64_t{32} << 48) | | |
| (uint64_t{32} << 56); | |
| static constexpr size_t bit4CountMark = uint64_t{16} | | |
| (uint64_t{16} << 8) | | |
| (uint64_t{16} << 16) | | |
| (uint64_t{16} << 24) | | |
| (uint64_t{16} << 32) | | |
| (uint64_t{16} << 40) | | |
| (uint64_t{16} << 48) | | |
| (uint64_t{16} << 56); | |
| static size_t hamming(uint64_t group, uint64_t num) { | |
| //printf("Hamming is %zu\n", __builtin_popcountll(group ^ num)); | |
| return __builtin_popcountll(group ^ num); | |
| } | |
| static size_t bit7Magnitude(uint64_t num) { | |
| return __builtin_popcountll(num & bit7CountMark); | |
| } | |
| static size_t bit6Magnitude(uint64_t num) { | |
| return __builtin_popcountll(num & bit6CountMark); | |
| } | |
| static size_t bit5Magnitude(uint64_t num) { | |
| return __builtin_popcountll(num & bit5CountMark); | |
| } | |
| static size_t bit4Magnitude(uint64_t num) { | |
| return __builtin_popcountll(num & bit4CountMark); | |
| } | |
| std::array<double, 8> | |
| stddev_from_uint64_bytes(const std::vector<uint64_t>& data) { | |
| std::array<double, 8> mean{}; | |
| std::array<double, 8> variance{}; | |
| if (data.empty()) { | |
| return variance; // all zeros | |
| } | |
| const size_t count = data.size(); | |
| // 1st pass: mean per byte position | |
| for (uint64_t v : data) { | |
| for (size_t i = 0; i < 8; ++i) { | |
| uint8_t byte = static_cast<uint8_t>(v >> (i * 8)); // little-endian order | |
| mean[i] += byte; | |
| } | |
| } | |
| for (size_t i = 0; i < 8; ++i) { | |
| mean[i] /= count; | |
| } | |
| // 2nd pass: variance per byte position | |
| for (uint64_t v : data) { | |
| for (size_t i = 0; i < 8; ++i) { | |
| uint8_t byte = static_cast<uint8_t>(v >> (i * 8)); | |
| double diff = byte - mean[i]; | |
| variance[i] += diff * diff; | |
| } | |
| } | |
| // Final std deviation (population) | |
| for (size_t i = 0; i < 8; ++i) { | |
| variance[i] = std::sqrt(variance[i] / count); | |
| } | |
| return variance; | |
| } | |
| int main(int argc, char const *argv[]) | |
| { | |
| std::random_device rd; | |
| std::mt19937 gen(rd()); | |
| std::uniform_int_distribution<uint8_t> distrib( | |
| std::numeric_limits<uint8_t>::min(), | |
| std::numeric_limits<uint8_t>::max() | |
| ); | |
| std::vector<uint64_t> randData; | |
| for (int i = 0; i < testSampleSize; ++i) | |
| { | |
| uint8_t parts[8] = {static_cast<uint8_t>(distrib(gen)), | |
| static_cast<uint8_t>(distrib(gen)), | |
| static_cast<uint8_t>(distrib(gen)), | |
| static_cast<uint8_t>(distrib(gen)), | |
| static_cast<uint8_t>(distrib(gen)), | |
| static_cast<uint8_t>(distrib(gen)), | |
| static_cast<uint8_t>(distrib(gen)), | |
| static_cast<uint8_t>(distrib(gen)) | |
| }; | |
| // random with respect to 8 bit bnoundaries | |
| uint64_t result = uint64_t{parts[0]} | | |
| (uint64_t{parts[1]} << 8) | | |
| (uint64_t{parts[2]} << 16) | | |
| (uint64_t{parts[3]} << 24) | | |
| (uint64_t{parts[4]} << 32) | | |
| (uint64_t{parts[5]} << 40) | | |
| (uint64_t{parts[6]} << 48) | | |
| (uint64_t{parts[7]} << 56); | |
| randData.push_back(result); | |
| } | |
| std::vector<uint64_t> levels[9][9][9][9]; | |
| //auto start = std::chrono::high_resolution_clock::now(); | |
| for (const auto& num: randData) | |
| { | |
| size_t firstLevel = bit7Magnitude(num); | |
| size_t secondLevel = bit6Magnitude(num); | |
| size_t thirdLevel = bit5Magnitude(num); | |
| size_t fourLevel = bit4Magnitude(num); | |
| //printf("Got first %zu second %zu\n", firstLevel, secondLevel); | |
| levels[firstLevel][secondLevel][thirdLevel][fourLevel].push_back(num); | |
| } | |
| //auto end = std::chrono::high_resolution_clock::now(); | |
| for (int i = 0; i < 9; ++i) | |
| { | |
| for (int j = 0; j < 9; ++j) | |
| { | |
| for (int k = 0; k < 9; ++k) | |
| { | |
| for (int a = 0; a < 9; ++a) | |
| { | |
| const auto stddev = stddev_from_uint64_bytes(levels[i][j][k][a]); | |
| std::printf("Std dev of %d %d %d %d with size %zu [", i, j, k, a, levels[i][j][k][a].size()); | |
| std::printf(" %f ", stddev[0]); | |
| std::printf(" %f ", stddev[1]); | |
| std::printf(" %f ", stddev[2]); | |
| std::printf(" %f ", stddev[3]); | |
| std::printf(" %f ", stddev[4]); | |
| std::printf(" %f ", stddev[5]); | |
| std::printf(" %f ", stddev[6]); | |
| std::printf(" %f]\n", stddev[7]); | |
| } | |
| } | |
| } | |
| } | |
| //std::cout << " " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() << "us\n"; | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment