milhidaka · March 13, 2019 04:05
diff --git a/convert_float32.c b/convert_float32.c
 #include <stdio.h>
 #include <stdint.h>
 #include <assert.h>

 #define DATA_SIZE 2052

 float decode(uint16_t float16_value)
 {
  // MSB -> LSB
  // float16=1bit: sign, 5bit: exponent, 10bit: fraction
  // float32=1bit: sign, 8bit: exponent, 23bit: fraction
  // for normal exponent(1 to 0x1e): value=2**(exponent-15)*(1.fraction)
  // for denormalized exponent(0): value=2**-14*(0.fraction)
  uint32_t sign = float16_value >> 15;
  uint32_t exponent = (float16_value >> 10) & 0x1F;
  uint32_t fraction = (float16_value & 0x3FF);
  uint32_t float32_value;
  if (exponent == 0)
  {
    if (fraction == 0)
    {
      // zero
      float32_value = (sign << 31);
    }
    else
    {
      // can be represented as ordinary value in float32
      // 2 ** -14 * 0.0101
      // => 2 ** -16 * 1.0100
      // int int_exponent = -14;
      exponent = 127 - 14;
      while ((fraction & (1 << 10)) == 0)
      {
        //int_exponent--;
        exponent--;
        fraction <<= 1;
      }
      fraction &= 0x3FF;
      // int_exponent += 127;
      float32_value = (sign << 31) | (exponent << 23) | (fraction << 13);  
    }    
  }
  else if (exponent == 0x1F)
  {
    /* Inf or NaN */
    float32_value = (sign << 31) | (0xFF << 23) | (fraction << 13);
  }
  else
  {
    /* ordinary number */
    float32_value = (sign << 31) | ((exponent + (127-15)) << 23) | (fraction << 13);
  }
  
  return *((float*)&float32_value);
 }

 int main(void)
 {
  uint16_t float16_data[DATA_SIZE];
  float float32_data[DATA_SIZE];
  FILE* fr = fopen("float16.bin", "rb");
  assert(fr != NULL);

  size_t loaded = fread(float16_data, sizeof(uint16_t), DATA_SIZE, fr);
  assert(loaded == DATA_SIZE);

  fclose(fr);

  for (int i = 0; i < DATA_SIZE; i++)
  {
    float32_data[i] = decode(float16_data[i]);
  }

  FILE* fw = fopen("float32_decoded.bin", "wb");
  assert(fw != NULL);

  size_t saved = fwrite(float32_data, sizeof(float), DATA_SIZE, fw);
  assert(saved == DATA_SIZE);

  fclose(fw);
 }
diff --git a/numpy_make_data.py b/numpy_make_data.py
 import numpy as np

 np.random.seed(1)
 data_small = np.random.normal(scale=1e-2, size=(1024,)).astype(np.float16)
 data_large = np.random.normal(scale=1e2, size=(1024,)).astype(np.float16)
 data_special = np.array([0 / 1, 0 / -1, np.inf, -np.inf], dtype=np.float16)

 data = np.concatenate((data_small, data_large, data_special))

 data.tofile("float16.bin")

 data.astype(np.float32).tofile("float32.bin")
	#include <stdio.h>
	#include <stdint.h>
	#include <assert.h>

	#define DATA_SIZE 2052

	float decode(uint16_t float16_value)
	{
	// MSB -> LSB
	// float16=1bit: sign, 5bit: exponent, 10bit: fraction
	// float32=1bit: sign, 8bit: exponent, 23bit: fraction
	// for normal exponent(1 to 0x1e): value=2*(exponent-15)(1.fraction)
	// for denormalized exponent(0): value=2*-14(0.fraction)
	uint32_t sign = float16_value >> 15;
	uint32_t exponent = (float16_value >> 10) & 0x1F;
	uint32_t fraction = (float16_value & 0x3FF);
	uint32_t float32_value;
	if (exponent == 0)
	{
	if (fraction == 0)
	{
	// zero
	float32_value = (sign << 31);
	}
	else
	{
	// can be represented as ordinary value in float32
	// 2 ** -14 * 0.0101
	// => 2 ** -16 * 1.0100
	// int int_exponent = -14;
	exponent = 127 - 14;
	while ((fraction & (1 << 10)) == 0)
	{
	//int_exponent--;
	exponent--;
	fraction <<= 1;
	}
	fraction &= 0x3FF;
	// int_exponent += 127;
	float32_value = (sign << 31) \| (exponent << 23) \| (fraction << 13);
	}
	}
	else if (exponent == 0x1F)
	{
	/* Inf or NaN */
	float32_value = (sign << 31) \| (0xFF << 23) \| (fraction << 13);
	}
	else
	{
	/* ordinary number */
	float32_value = (sign << 31) \| ((exponent + (127-15)) << 23) \| (fraction << 13);
	}

	return ((float)&float32_value);
	}

	int main(void)
	{
	uint16_t float16_data[DATA_SIZE];
	float float32_data[DATA_SIZE];
	FILE* fr = fopen("float16.bin", "rb");
	assert(fr != NULL);

	size_t loaded = fread(float16_data, sizeof(uint16_t), DATA_SIZE, fr);
	assert(loaded == DATA_SIZE);

	fclose(fr);

	for (int i = 0; i < DATA_SIZE; i++)
	{
	float32_data[i] = decode(float16_data[i]);
	}

	FILE* fw = fopen("float32_decoded.bin", "wb");
	assert(fw != NULL);

	size_t saved = fwrite(float32_data, sizeof(float), DATA_SIZE, fw);
	assert(saved == DATA_SIZE);

	fclose(fw);
	}
	import numpy as np

	np.random.seed(1)
	data_small = np.random.normal(scale=1e-2, size=(1024,)).astype(np.float16)
	data_large = np.random.normal(scale=1e2, size=(1024,)).astype(np.float16)
	data_special = np.array([0 / 1, 0 / -1, np.inf, -np.inf], dtype=np.float16)

	data = np.concatenate((data_small, data_large, data_special))

	data.tofile("float16.bin")

	data.astype(np.float32).tofile("float32.bin")