rygorous · December 24, 2024 15:07
diff --git a/main.cpp b/main.cpp
 #include <stdio.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>

 static uint32_t float_to_bits(float x)
 {
 	uint32_t u;
 	memcpy(&u, &x, sizeof(x));
 	return u;
 }

 static float bits_to_float(uint32_t x)
 {
 	float f;
 	memcpy(&f, &x, sizeof(x));
 	return f;
 }

 static float decode_ref_unorm8(int x)
 {
 	return float(x) / 255.0f;
 }

 static float decode_ref_unorm16(int x)
 {
 	return float(x) / 65535.0f;
 }

 static float decode_ref_snorm8(int x)
 {
 	if (x <= -127) x = -127;
 	return float(x) / 127.0f;
 }

 static float decode_ref_snorm16(int x)
 {
 	if (x <= -32767) x = -32767;
 	return float(x) / 32767.0f;
 }

 struct NormalizedPair
 {
 	uint32_t mantissa;
 	uint32_t exp_sign;
 };

 static NormalizedPair normalize(uint32_t x)
 {
 	NormalizedPair pair;
 	pair.mantissa = x;
 	pair.exp_sign = 126;
 	while (pair.mantissa < 0x8000)
 	{
 		pair.mantissa <<= 1;
 		pair.exp_sign--;
 	}

 	pair.mantissa -= 0x8000;
 	return pair;
 }

 static NormalizedPair normalize_signed(int x)
 {
 	NormalizedPair pair = normalize(abs(x));
 	pair.exp_sign |= (x < 0) ? 256 : 0;
 	return pair;
 }

 static float decode_direct_unorm16(int x)
 {
 	// Handle easy special cases
 	if (x <= 0)
 		return 0.0f;
 	else if (x >= 0xffff)
 		return 1.0f;

 	// In [1,0xfffe], which means at least one set bit, and at least one clear bit.
 	// Normalize it, then drop implicit 1 bit
 	NormalizedPair norm = normalize(x);

 	// Puzzle together final float
 	return bits_to_float(
 		(norm.exp_sign << 23) +
 		(norm.mantissa << 8) +
 		0x80 +
 		(norm.mantissa >> 8) +
 		((norm.mantissa >> 7) & 1)
 	);
 }

 static float decode_direct_unorm8(int x)
 {
 	return decode_direct_unorm16((x << 8) | x);
 }

 static float decode_direct_snorm8(int x)
 {
 	// Handle easy special cases
 	if (x <= -127)
 		return -1.0f;
 	else if (x >= 127)
 		return 1.0f;
 	else if (x == 0)
 		return 0.0f;

 	// Normalize
 	NormalizedPair norm = normalize_signed(x << 9);
 	uint32_t mant = norm.mantissa >> 9;

 	// mant is 6 bits, we need 23 bits of mantissa
 	// in order:
 	//      [22:17]    16    [15:10]     9     [8:3]      2     [1:0]
 	//   { mant[5:0], 1'b1, mant[5:0], 1'b1, mant[5:0], 1'b1, mant[5:4] } + rounding

 	// Puzzle together final float
 	return bits_to_float(
 		(norm.exp_sign << 23) +
 		(mant << 17) +
 		0x10000 +
 		(mant << 10) +
 		0x200 +
 		(mant << 3) +
 		4 +
 		(mant >> 4) +
 		((mant >> 3) & 1)
 	);
 }

 static float decode_direct_snorm16(int x)
 {
 	// Handle easy special cases
 	if (x <= -32767)
 		return -1.0f;
 	else if (x >= 32767)
 		return 1.0f;
 	else if (x == 0)
 		return 0.0f;

 	// Normalize
 	NormalizedPair norm = normalize_signed(x << 1);
 	uint32_t mant = norm.mantissa >> 1;

 	// mant is 14 bits, we need 23 bits of mantissa
 	// in order:
 	//      [22:9]       8      [7:0]
 	//   { mant[13:0], 1'b1, mant[13:6] } + rounding

 	// Puzzle together final float
 	return bits_to_float(
 		(norm.exp_sign << 23) +
 		(mant << 9) +
 		0x100 +
 		(mant >> 6) +
 		((mant >> 5) & 1)
 	);
 }

 int main()
 {
 	printf("unorm8:\n");
 	for (int x = 0; x <= 0xff; ++x)
 	{
 		float ref = decode_ref_unorm8(x);
 		float tst = decode_direct_unorm8(x);

 		if (ref != tst)
 		{
 			printf("  mismatch! x=%3d ref=%.8g tst=%.8g diff=%.8g\n", x, ref, tst, ref - tst);
 			return 1;
 		}
 	}
 	printf("  all ok!\n");

 	printf("unorm16:\n");
 	for (int x = 0; x <= 0xffff; ++x)
 	{
 		float ref = decode_ref_unorm16(x);
 		float tst = decode_direct_unorm16(x);

 		if (ref != tst)
 		{
 			printf("  mismatch! x=%3d ref=%.8g tst=%.8g diff=%.8g\n", x, ref, tst, ref - tst);
 			return 1;
 		}
 	}
 	printf("  all ok!\n");

 	printf("snorm8:\n");
 	for (int x = -0x80; x <= 0x7f; ++x)
 	{
 		float ref = decode_ref_snorm8(x);
 		float tst = decode_direct_snorm8(x);

 		if (ref != tst)
 		{
 			printf("  mismatch! x=%3d ref=0x%08x tst=0x%08x\n", x, float_to_bits(ref), float_to_bits(tst));
 			return 1;
 		}
 	}
 	printf("  all ok!\n");

 	printf("snorm16:\n");
 	for (int x = -0x8000; x <= 0x7fff; ++x)
 	{
 		float ref = decode_ref_snorm16(x);
 		float tst = decode_direct_snorm16(x);

 		if (ref != tst)
 		{
 			printf("  mismatch! x=%3d ref=0x%08x tst=0x%08x\n", x, float_to_bits(ref), float_to_bits(tst));
 			return 1;
 		}
 	}
 	printf("  all ok!\n");

 	return 0;
 }
	#include <stdio.h>
	#include <stdint.h>
	#include <stdlib.h>
	#include <string.h>

	static uint32_t float_to_bits(float x)
	{
	uint32_t u;
	memcpy(&u, &x, sizeof(x));
	return u;
	}

	static float bits_to_float(uint32_t x)
	{
	float f;
	memcpy(&f, &x, sizeof(x));
	return f;
	}

	static float decode_ref_unorm8(int x)
	{
	return float(x) / 255.0f;
	}

	static float decode_ref_unorm16(int x)
	{
	return float(x) / 65535.0f;
	}

	static float decode_ref_snorm8(int x)
	{
	if (x <= -127) x = -127;
	return float(x) / 127.0f;
	}

	static float decode_ref_snorm16(int x)
	{
	if (x <= -32767) x = -32767;
	return float(x) / 32767.0f;
	}

	struct NormalizedPair
	{
	uint32_t mantissa;
	uint32_t exp_sign;
	};

	static NormalizedPair normalize(uint32_t x)
	{
	NormalizedPair pair;
	pair.mantissa = x;
	pair.exp_sign = 126;
	while (pair.mantissa < 0x8000)
	{
	pair.mantissa <<= 1;
	pair.exp_sign--;
	}

	pair.mantissa -= 0x8000;
	return pair;
	}

	static NormalizedPair normalize_signed(int x)
	{
	NormalizedPair pair = normalize(abs(x));
	pair.exp_sign \|= (x < 0) ? 256 : 0;
	return pair;
	}

	static float decode_direct_unorm16(int x)
	{
	// Handle easy special cases
	if (x <= 0)
	return 0.0f;
	else if (x >= 0xffff)
	return 1.0f;

	// In [1,0xfffe], which means at least one set bit, and at least one clear bit.
	// Normalize it, then drop implicit 1 bit
	NormalizedPair norm = normalize(x);

	// Puzzle together final float
	return bits_to_float(
	(norm.exp_sign << 23) +
	(norm.mantissa << 8) +
	0x80 +
	(norm.mantissa >> 8) +
	((norm.mantissa >> 7) & 1)
	);
	}

	static float decode_direct_unorm8(int x)
	{
	return decode_direct_unorm16((x << 8) \| x);
	}

	static float decode_direct_snorm8(int x)
	{
	// Handle easy special cases
	if (x <= -127)
	return -1.0f;
	else if (x >= 127)
	return 1.0f;
	else if (x == 0)
	return 0.0f;

	// Normalize
	NormalizedPair norm = normalize_signed(x << 9);
	uint32_t mant = norm.mantissa >> 9;

	// mant is 6 bits, we need 23 bits of mantissa
	// in order:
	// [22:17] 16 [15:10] 9 [8:3] 2 [1:0]
	// { mant[5:0], 1'b1, mant[5:0], 1'b1, mant[5:0], 1'b1, mant[5:4] } + rounding

	// Puzzle together final float
	return bits_to_float(
	(norm.exp_sign << 23) +
	(mant << 17) +
	0x10000 +
	(mant << 10) +
	0x200 +
	(mant << 3) +
	4 +
	(mant >> 4) +
	((mant >> 3) & 1)
	);
	}

	static float decode_direct_snorm16(int x)
	{
	// Handle easy special cases
	if (x <= -32767)
	return -1.0f;
	else if (x >= 32767)
	return 1.0f;
	else if (x == 0)
	return 0.0f;

	// Normalize
	NormalizedPair norm = normalize_signed(x << 1);
	uint32_t mant = norm.mantissa >> 1;

	// mant is 14 bits, we need 23 bits of mantissa
	// in order:
	// [22:9] 8 [7:0]
	// { mant[13:0], 1'b1, mant[13:6] } + rounding

	// Puzzle together final float
	return bits_to_float(
	(norm.exp_sign << 23) +
	(mant << 9) +
	0x100 +
	(mant >> 6) +
	((mant >> 5) & 1)
	);
	}

	int main()
	{
	printf("unorm8:\n");
	for (int x = 0; x <= 0xff; ++x)
	{
	float ref = decode_ref_unorm8(x);
	float tst = decode_direct_unorm8(x);

	if (ref != tst)
	{
	printf(" mismatch! x=%3d ref=%.8g tst=%.8g diff=%.8g\n", x, ref, tst, ref - tst);
	return 1;
	}
	}
	printf(" all ok!\n");

	printf("unorm16:\n");
	for (int x = 0; x <= 0xffff; ++x)
	{
	float ref = decode_ref_unorm16(x);
	float tst = decode_direct_unorm16(x);

	if (ref != tst)
	{
	printf(" mismatch! x=%3d ref=%.8g tst=%.8g diff=%.8g\n", x, ref, tst, ref - tst);
	return 1;
	}
	}
	printf(" all ok!\n");

	printf("snorm8:\n");
	for (int x = -0x80; x <= 0x7f; ++x)
	{
	float ref = decode_ref_snorm8(x);
	float tst = decode_direct_snorm8(x);

	if (ref != tst)
	{
	printf(" mismatch! x=%3d ref=0x%08x tst=0x%08x\n", x, float_to_bits(ref), float_to_bits(tst));
	return 1;
	}
	}
	printf(" all ok!\n");

	printf("snorm16:\n");
	for (int x = -0x8000; x <= 0x7fff; ++x)
	{
	float ref = decode_ref_snorm16(x);
	float tst = decode_direct_snorm16(x);

	if (ref != tst)
	{
	printf(" mismatch! x=%3d ref=0x%08x tst=0x%08x\n", x, float_to_bits(ref), float_to_bits(tst));
	return 1;
	}
	}
	printf(" all ok!\n");

	return 0;
	}