Skip to content

Instantly share code, notes, and snippets.

@jweinst1
Created August 21, 2025 05:21
Show Gist options
  • Save jweinst1/3476cdd85465f16a05beb26e9e1f0f3c to your computer and use it in GitHub Desktop.
Save jweinst1/3476cdd85465f16a05beb26e9e1f0f3c to your computer and use it in GitHub Desktop.
quantize floats to small values
#include <vector>
#include <cstdint>
#include <iostream>
#include <cmath>
#include <numeric>
#include <bitset>
uint16_t quantizeVectorToBits(const std::vector<float>& vec,
int numBits,
bool useGlobalMean = false,
float customThreshold = NAN)
{
if (vec.empty() || numBits < 1 || numBits > 16) return 0;
size_t N = vec.size();
// Determine threshold
float threshold;
if (!std::isnan(customThreshold)) {
threshold = customThreshold; // user-specified
} else if (useGlobalMean) {
threshold = std::accumulate(vec.begin(), vec.end(), 0.0f) / N; // global mean
} else {
threshold = 0.0f; // default
}
// Divide dimensions into groups as evenly as possible
size_t baseGroupSize = N / numBits;
size_t remainder = N % numBits;
uint16_t result = 0;
size_t idx = 0;
for (int bit = 0; bit < numBits; ++bit) {
size_t groupSize = baseGroupSize + (bit < remainder ? 1 : 0);
if (groupSize == 0) continue;
float sum = 0.0f;
for (size_t j = 0; j < groupSize; ++j) {
sum += vec[idx++];
}
float mean = sum / groupSize;
if (mean > threshold) {
result |= (1 << bit);
}
}
return result;
}
uint16_t quantizeVectorTo10Bits(const std::vector<float>& vec, float threshold = 0.0f) {
size_t N = vec.size();
if (N == 0) return 0;
// Divide dimensions into 10 groups as evenly as possible
size_t baseGroupSize = N / 10;
size_t remainder = N % 10;
uint16_t result = 0;
size_t idx = 0;
for (int bit = 0; bit < 10; ++bit) {
// Group size (some groups get +1 if remainder left)
size_t groupSize = baseGroupSize + (bit < remainder ? 1 : 0);
if (groupSize == 0) continue;
// Compute group mean
float sum = 0.0f;
for (size_t j = 0; j < groupSize; ++j) {
sum += vec[idx++];
}
float mean = sum / groupSize;
// Compare to threshold -> set bit
if (mean > threshold) {
result |= (1 << bit);
}
}
return result;
}
// Example usage
int main() {
std::vector<float> vec(50);
for (int i = 0; i < 50; i++) {
vec[i] = std::sin(i * 0.2f); // example values
}
uint16_t q = quantizeVectorTo10Bits(vec, 0.0f);
std::cout << "Quantized 10-bit code: " << std::bitset<10>(q) << "\n";
std::cout << "As integer: " << q << "\n";
{
std::vector<float> vec1(42);
for (int i = 0; i < 42; i++) {
vec1[i] = (i % 7) - 3; // example pattern
}
std::cout << "Custom threshold = 0.5\n";
uint16_t q1 = quantizeVectorToBits(vec1, 10, false, 0.5f);
std::cout << "10 bits -> " << std::bitset<16>(q1) << " (int=" << q1 << ")\n";
std::cout << "\nGlobal mean threshold\n";
uint16_t q2 = quantizeVectorToBits(vec1, 10, true);
std::cout << "10 bits -> " << std::bitset<16>(q2) << " (int=" << q2 << ")\n";
std::cout << "\nFixed threshold = 0.0\n";
uint16_t q3 = quantizeVectorToBits(vec1, 10);
std::cout << "10 bits -> " << std::bitset<16>(q3) << " (int=" << q3 << ")\n";
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment