Skip to content

Instantly share code, notes, and snippets.

@petamoriken
Created April 11, 2017 06:57
Show Gist options
  • Save petamoriken/26d8d3517a93d032d7674eaf6c720ea1 to your computer and use it in GitHub Desktop.
Save petamoriken/26d8d3517a93d032d7674eaf6c720ea1 to your computer and use it in GitHub Desktop.
half precision float round function
const buffer = new ArrayBuffer(4);
const floatView = new Float32Array(buffer);
const uint32View = new Uint32Array(buffer);
const baseTable = new Uint32Array(512);
const shiftTable = new Uint32Array(512);
for(let i = 0; i < 256; ++i) {
const e = i - 127;
// very small number (0, -0)
if(e < -27) {
baseTable[i | 0x000] = 0x0000;
baseTable[i | 0x100] = 0x8000;
shiftTable[i | 0x000] = 24;
shiftTable[i | 0x100] = 24;
// small number (denorm)
} else if(e < -14) {
baseTable[i | 0x000] = 0x0400 >> (-e - 14);
baseTable[i | 0x100] = (0x0400 >> (-e - 14)) | 0x8000;
shiftTable[i | 0x000] = -e - 1;
shiftTable[i | 0x100] = -e - 1;
// normal number
} else if(e <= 15) {
baseTable[i | 0x000] = (e + 15) << 10;
baseTable[i | 0x100] = ((e + 15) << 10) | 0x8000;
shiftTable[i | 0x000] = 13;
shiftTable[i | 0x100] = 13;
// large number (Infinity, -Infinity)
} else if(e < 128) {
baseTable[i | 0x000] = 0x7c00;
baseTable[i | 0x100] = 0xfc00;
shiftTable[i | 0x000] = 24;
shiftTable[i | 0x100] = 24;
// stay (NaN, Infinity, -Infinity)
} else {
baseTable[i | 0x000] = 0x7c00;
baseTable[i | 0x100] = 0xfc00;
shiftTable[i | 0x000] = 13;
shiftTable[i | 0x100] = 13;
}
}
/**
* round a number to a half float number bits.
* @param {number} num
* @see {@link ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf}
*/
function roundToFloat16Bits(num) {
floatView[0] = num;
const f = uint32View[0];
const e = (f >> 23) & 0x1ff;
return baseTable[e] + ((f & 0x007fffff) >> shiftTable[e]);
}
const mantissaTable = new Uint32Array(2048);
const exponentTable = new Uint32Array(64);
const offsetTable = new Uint32Array(64);
// mantissa
mantissaTable[0] = 0;
for(let i = 1; i < 1024; ++i) {
let m = i << 13; // zero pad mantissa bits
let e = 0; // zero exponent
// normalized
while((m & 0x00800000) === 0) {
e -= 0x00800000; // decrement exponent
m <<= 1;
}
m &= ~0x00800000; // clear leading 1 bit
e += 0x38800000; // adjust bias
mantissaTable[i] = m | e;
}
for(let i = 1024; i < 2048; ++i) {
mantissaTable[i] = 0x38000000 + ((i - 1024) << 13);
}
// exponent
exponentTable[0] = 0;
for(let i = 1; i < 31; ++i) {
exponentTable[i] = i << 23;
}
exponentTable[31] = 0x47800000;
exponentTable[32] = 0x80000000;
for(let i = 33; i < 63; ++i) {
exponentTable[i] = 0x80000000 + ((i - 32) << 23);
}
exponentTable[63] = 0xc7800000;
// offset
offsetTable[0] = 0;
for(let i = 1; i < 64; ++i) {
if(i === 32) {
offsetTable[i] = 0;
} else {
offsetTable[i] = 1024;
}
}
/**
* convert a half float number bits to a number.
* @param {number} h - half float number bits
* @see {@link ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf}
*/
function convertNumber(h) {
const m = h >> 10;
uint32View[0] = mantissaTable[offsetTable[m] + (h & 0x3ff)] + exponentTable[m];
return floatView[0];
}
/**
* returns the nearest half precision float representation of a number.
* @param {number} num
*/
function hfround(num) {
num = Number(num);
// for optimization (this if statement can be commented out)
if(!Number.isFinite(num) || num === 0) {
return num;
}
const x16 = roundToFloat16Bits(num);
return convertNumber(x16);
}
@petamoriken
Copy link
Author

Math.fround のように倍精度浮動小数点数を半精度浮動小数点数に丸める函数。
Babel Repl

@petamoriken
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment