Skip to content

Instantly share code, notes, and snippets.

@jweinst1
Last active December 25, 2025 00:33
Show Gist options
  • Select an option

  • Save jweinst1/13db595af6846970fb16159aa093ceca to your computer and use it in GitHub Desktop.

Select an option

Save jweinst1/13db595af6846970fb16159aa093ceca to your computer and use it in GitHub Desktop.
and pop count of specific segments of bits for 8 bit dimension vectors
#include <array>
#include <cstdint>
#include <cstddef>
#include <cmath>
#include <cstdio>
#include <climits>
#include <vector>
#include <cassert>
#include <random>
#include <chrono>
#include <iostream>
#include <limits>
#include <cassert>
#include <memory>
#include <cmath>
#include <bitset>
static constexpr size_t testSampleSize = 1024 * 1024;
static constexpr size_t hammingThreshold = 23;
static constexpr size_t bit7CountMark = uint64_t{128} |
(uint64_t{128} << 8) |
(uint64_t{128} << 16) |
(uint64_t{128} << 24) |
(uint64_t{128} << 32) |
(uint64_t{128} << 40) |
(uint64_t{128} << 48) |
(uint64_t{128} << 56);
static constexpr size_t bit6CountMark = uint64_t{64} |
(uint64_t{64} << 8) |
(uint64_t{64} << 16) |
(uint64_t{64} << 24) |
(uint64_t{64} << 32) |
(uint64_t{64} << 40) |
(uint64_t{64} << 48) |
(uint64_t{64} << 56);
static constexpr size_t bit5CountMark = uint64_t{32} |
(uint64_t{32} << 8) |
(uint64_t{32} << 16) |
(uint64_t{32} << 24) |
(uint64_t{32} << 32) |
(uint64_t{32} << 40) |
(uint64_t{32} << 48) |
(uint64_t{32} << 56);
static constexpr size_t bit4CountMark = uint64_t{16} |
(uint64_t{16} << 8) |
(uint64_t{16} << 16) |
(uint64_t{16} << 24) |
(uint64_t{16} << 32) |
(uint64_t{16} << 40) |
(uint64_t{16} << 48) |
(uint64_t{16} << 56);
static size_t hamming(uint64_t group, uint64_t num) {
//printf("Hamming is %zu\n", __builtin_popcountll(group ^ num));
return __builtin_popcountll(group ^ num);
}
static size_t bit7Magnitude(uint64_t num) {
return __builtin_popcountll(num & bit7CountMark);
}
static size_t bit6Magnitude(uint64_t num) {
return __builtin_popcountll(num & bit6CountMark);
}
static size_t bit5Magnitude(uint64_t num) {
return __builtin_popcountll(num & bit5CountMark);
}
static size_t bit4Magnitude(uint64_t num) {
return __builtin_popcountll(num & bit4CountMark);
}
std::array<double, 8>
stddev_from_uint64_bytes(const std::vector<uint64_t>& data) {
std::array<double, 8> mean{};
std::array<double, 8> variance{};
if (data.empty()) {
return variance; // all zeros
}
const size_t count = data.size();
// 1st pass: mean per byte position
for (uint64_t v : data) {
for (size_t i = 0; i < 8; ++i) {
uint8_t byte = static_cast<uint8_t>(v >> (i * 8)); // little-endian order
mean[i] += byte;
}
}
for (size_t i = 0; i < 8; ++i) {
mean[i] /= count;
}
// 2nd pass: variance per byte position
for (uint64_t v : data) {
for (size_t i = 0; i < 8; ++i) {
uint8_t byte = static_cast<uint8_t>(v >> (i * 8));
double diff = byte - mean[i];
variance[i] += diff * diff;
}
}
// Final std deviation (population)
for (size_t i = 0; i < 8; ++i) {
variance[i] = std::sqrt(variance[i] / count);
}
return variance;
}
int main(int argc, char const *argv[])
{
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<uint8_t> distrib(
std::numeric_limits<uint8_t>::min(),
std::numeric_limits<uint8_t>::max()
);
std::vector<uint64_t> randData;
for (int i = 0; i < testSampleSize; ++i)
{
uint8_t parts[8] = {static_cast<uint8_t>(distrib(gen)),
static_cast<uint8_t>(distrib(gen)),
static_cast<uint8_t>(distrib(gen)),
static_cast<uint8_t>(distrib(gen)),
static_cast<uint8_t>(distrib(gen)),
static_cast<uint8_t>(distrib(gen)),
static_cast<uint8_t>(distrib(gen)),
static_cast<uint8_t>(distrib(gen))
};
// random with respect to 8 bit bnoundaries
uint64_t result = uint64_t{parts[0]} |
(uint64_t{parts[1]} << 8) |
(uint64_t{parts[2]} << 16) |
(uint64_t{parts[3]} << 24) |
(uint64_t{parts[4]} << 32) |
(uint64_t{parts[5]} << 40) |
(uint64_t{parts[6]} << 48) |
(uint64_t{parts[7]} << 56);
randData.push_back(result);
}
std::vector<uint64_t> levels[9][9][9][9];
//auto start = std::chrono::high_resolution_clock::now();
for (const auto& num: randData)
{
size_t firstLevel = bit7Magnitude(num);
size_t secondLevel = bit6Magnitude(num);
size_t thirdLevel = bit5Magnitude(num);
size_t fourLevel = bit4Magnitude(num);
//printf("Got first %zu second %zu\n", firstLevel, secondLevel);
levels[firstLevel][secondLevel][thirdLevel][fourLevel].push_back(num);
}
//auto end = std::chrono::high_resolution_clock::now();
for (int i = 0; i < 9; ++i)
{
for (int j = 0; j < 9; ++j)
{
for (int k = 0; k < 9; ++k)
{
for (int a = 0; a < 9; ++a)
{
const auto stddev = stddev_from_uint64_bytes(levels[i][j][k][a]);
std::printf("Std dev of %d %d %d %d with size %zu [", i, j, k, a, levels[i][j][k][a].size());
std::printf(" %f ", stddev[0]);
std::printf(" %f ", stddev[1]);
std::printf(" %f ", stddev[2]);
std::printf(" %f ", stddev[3]);
std::printf(" %f ", stddev[4]);
std::printf(" %f ", stddev[5]);
std::printf(" %f ", stddev[6]);
std::printf(" %f]\n", stddev[7]);
}
}
}
}
//std::cout << " " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() << "us\n";
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment