Created
March 13, 2019 04:05
-
-
Save milhidaka/95863906fe828198f47991c813dbe233 to your computer and use it in GitHub Desktop.
float16 -> float32 conversion in C
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdint.h> | |
#include <assert.h> | |
#define DATA_SIZE 2052 | |
float decode(uint16_t float16_value) | |
{ | |
// MSB -> LSB | |
// float16=1bit: sign, 5bit: exponent, 10bit: fraction | |
// float32=1bit: sign, 8bit: exponent, 23bit: fraction | |
// for normal exponent(1 to 0x1e): value=2**(exponent-15)*(1.fraction) | |
// for denormalized exponent(0): value=2**-14*(0.fraction) | |
uint32_t sign = float16_value >> 15; | |
uint32_t exponent = (float16_value >> 10) & 0x1F; | |
uint32_t fraction = (float16_value & 0x3FF); | |
uint32_t float32_value; | |
if (exponent == 0) | |
{ | |
if (fraction == 0) | |
{ | |
// zero | |
float32_value = (sign << 31); | |
} | |
else | |
{ | |
// can be represented as ordinary value in float32 | |
// 2 ** -14 * 0.0101 | |
// => 2 ** -16 * 1.0100 | |
// int int_exponent = -14; | |
exponent = 127 - 14; | |
while ((fraction & (1 << 10)) == 0) | |
{ | |
//int_exponent--; | |
exponent--; | |
fraction <<= 1; | |
} | |
fraction &= 0x3FF; | |
// int_exponent += 127; | |
float32_value = (sign << 31) | (exponent << 23) | (fraction << 13); | |
} | |
} | |
else if (exponent == 0x1F) | |
{ | |
/* Inf or NaN */ | |
float32_value = (sign << 31) | (0xFF << 23) | (fraction << 13); | |
} | |
else | |
{ | |
/* ordinary number */ | |
float32_value = (sign << 31) | ((exponent + (127-15)) << 23) | (fraction << 13); | |
} | |
return *((float*)&float32_value); | |
} | |
int main(void) | |
{ | |
uint16_t float16_data[DATA_SIZE]; | |
float float32_data[DATA_SIZE]; | |
FILE* fr = fopen("float16.bin", "rb"); | |
assert(fr != NULL); | |
size_t loaded = fread(float16_data, sizeof(uint16_t), DATA_SIZE, fr); | |
assert(loaded == DATA_SIZE); | |
fclose(fr); | |
for (int i = 0; i < DATA_SIZE; i++) | |
{ | |
float32_data[i] = decode(float16_data[i]); | |
} | |
FILE* fw = fopen("float32_decoded.bin", "wb"); | |
assert(fw != NULL); | |
size_t saved = fwrite(float32_data, sizeof(float), DATA_SIZE, fw); | |
assert(saved == DATA_SIZE); | |
fclose(fw); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
np.random.seed(1) | |
data_small = np.random.normal(scale=1e-2, size=(1024,)).astype(np.float16) | |
data_large = np.random.normal(scale=1e2, size=(1024,)).astype(np.float16) | |
data_special = np.array([0 / 1, 0 / -1, np.inf, -np.inf], dtype=np.float16) | |
data = np.concatenate((data_small, data_large, data_special)) | |
data.tofile("float16.bin") | |
data.astype(np.float32).tofile("float32.bin") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment