Skip to content

Instantly share code, notes, and snippets.

@rygorous
Created December 24, 2024 15:07
Show Gist options
  • Save rygorous/056cb0219e6e65d50457d4b60a33a225 to your computer and use it in GitHub Desktop.
Save rygorous/056cb0219e6e65d50457d4b60a33a225 to your computer and use it in GitHub Desktop.
Direct UNORM/SNORM conversion test program
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
static uint32_t float_to_bits(float x)
{
uint32_t u;
memcpy(&u, &x, sizeof(x));
return u;
}
static float bits_to_float(uint32_t x)
{
float f;
memcpy(&f, &x, sizeof(x));
return f;
}
static float decode_ref_unorm8(int x)
{
return float(x) / 255.0f;
}
static float decode_ref_unorm16(int x)
{
return float(x) / 65535.0f;
}
static float decode_ref_snorm8(int x)
{
if (x <= -127) x = -127;
return float(x) / 127.0f;
}
static float decode_ref_snorm16(int x)
{
if (x <= -32767) x = -32767;
return float(x) / 32767.0f;
}
struct NormalizedPair
{
uint32_t mantissa;
uint32_t exp_sign;
};
static NormalizedPair normalize(uint32_t x)
{
NormalizedPair pair;
pair.mantissa = x;
pair.exp_sign = 126;
while (pair.mantissa < 0x8000)
{
pair.mantissa <<= 1;
pair.exp_sign--;
}
pair.mantissa -= 0x8000;
return pair;
}
static NormalizedPair normalize_signed(int x)
{
NormalizedPair pair = normalize(abs(x));
pair.exp_sign |= (x < 0) ? 256 : 0;
return pair;
}
static float decode_direct_unorm16(int x)
{
// Handle easy special cases
if (x <= 0)
return 0.0f;
else if (x >= 0xffff)
return 1.0f;
// In [1,0xfffe], which means at least one set bit, and at least one clear bit.
// Normalize it, then drop implicit 1 bit
NormalizedPair norm = normalize(x);
// Puzzle together final float
return bits_to_float(
(norm.exp_sign << 23) +
(norm.mantissa << 8) +
0x80 +
(norm.mantissa >> 8) +
((norm.mantissa >> 7) & 1)
);
}
static float decode_direct_unorm8(int x)
{
return decode_direct_unorm16((x << 8) | x);
}
static float decode_direct_snorm8(int x)
{
// Handle easy special cases
if (x <= -127)
return -1.0f;
else if (x >= 127)
return 1.0f;
else if (x == 0)
return 0.0f;
// Normalize
NormalizedPair norm = normalize_signed(x << 9);
uint32_t mant = norm.mantissa >> 9;
// mant is 6 bits, we need 23 bits of mantissa
// in order:
// [22:17] 16 [15:10] 9 [8:3] 2 [1:0]
// { mant[5:0], 1'b1, mant[5:0], 1'b1, mant[5:0], 1'b1, mant[5:4] } + rounding
// Puzzle together final float
return bits_to_float(
(norm.exp_sign << 23) +
(mant << 17) +
0x10000 +
(mant << 10) +
0x200 +
(mant << 3) +
4 +
(mant >> 4) +
((mant >> 3) & 1)
);
}
static float decode_direct_snorm16(int x)
{
// Handle easy special cases
if (x <= -32767)
return -1.0f;
else if (x >= 32767)
return 1.0f;
else if (x == 0)
return 0.0f;
// Normalize
NormalizedPair norm = normalize_signed(x << 1);
uint32_t mant = norm.mantissa >> 1;
// mant is 14 bits, we need 23 bits of mantissa
// in order:
// [22:9] 8 [7:0]
// { mant[13:0], 1'b1, mant[13:6] } + rounding
// Puzzle together final float
return bits_to_float(
(norm.exp_sign << 23) +
(mant << 9) +
0x100 +
(mant >> 6) +
((mant >> 5) & 1)
);
}
int main()
{
printf("unorm8:\n");
for (int x = 0; x <= 0xff; ++x)
{
float ref = decode_ref_unorm8(x);
float tst = decode_direct_unorm8(x);
if (ref != tst)
{
printf(" mismatch! x=%3d ref=%.8g tst=%.8g diff=%.8g\n", x, ref, tst, ref - tst);
return 1;
}
}
printf(" all ok!\n");
printf("unorm16:\n");
for (int x = 0; x <= 0xffff; ++x)
{
float ref = decode_ref_unorm16(x);
float tst = decode_direct_unorm16(x);
if (ref != tst)
{
printf(" mismatch! x=%3d ref=%.8g tst=%.8g diff=%.8g\n", x, ref, tst, ref - tst);
return 1;
}
}
printf(" all ok!\n");
printf("snorm8:\n");
for (int x = -0x80; x <= 0x7f; ++x)
{
float ref = decode_ref_snorm8(x);
float tst = decode_direct_snorm8(x);
if (ref != tst)
{
printf(" mismatch! x=%3d ref=0x%08x tst=0x%08x\n", x, float_to_bits(ref), float_to_bits(tst));
return 1;
}
}
printf(" all ok!\n");
printf("snorm16:\n");
for (int x = -0x8000; x <= 0x7fff; ++x)
{
float ref = decode_ref_snorm16(x);
float tst = decode_direct_snorm16(x);
if (ref != tst)
{
printf(" mismatch! x=%3d ref=0x%08x tst=0x%08x\n", x, float_to_bits(ref), float_to_bits(tst));
return 1;
}
}
printf(" all ok!\n");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment