Last active
July 14, 2019 17:26
-
-
Save native-m/1890be95db503e0ae382417aa187245f to your computer and use it in GitHub Desktop.
Floating-point addition & multiplication behind the scene. Rounding not included!
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
union IEEEFloat | |
{ | |
struct | |
{ | |
int m : 23; | |
int e : 8; | |
int s : 1; | |
}; | |
int i; | |
float f; | |
}; | |
int clz(uint32_t x) | |
{ | |
static const char debruijn32[32] = { | |
0, 31, 9, 30, 3, 8, 13, 29, 2, 5, 7, 21, 12, 24, 28, 19, | |
1, 10, 4, 14, 6, 22, 25, 20, 11, 15, 23, 26, 16, 27, 17, 18 | |
}; | |
x |= x >> 1; | |
x |= x >> 2; | |
x |= x >> 4; | |
x |= x >> 8; | |
x |= x >> 16; | |
x++; | |
return debruijn32[x * 0x076be629 >> 27]; | |
} | |
int popcnt64(unsigned __int64 w) | |
{ | |
w -= (w >> 1) & 0x5555555555555555ULL; | |
w = (w & 0x3333333333333333ULL) + ((w >> 2) & 0x3333333333333333ULL); | |
w = (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0fULL; | |
return (int)((w * 0x0101010101010101ULL) >> 56); | |
} | |
int clz64(uint64_t x) | |
{ | |
int c[] = { 1, 2, 4, 8, 16, 32 }; | |
for (int i : c) | |
x |= (x >> i); | |
return 64 - popcnt64(x); | |
} | |
float addf(float x, float y) | |
{ | |
static const int max24 = (unsigned)(-1) >> 8; | |
IEEEFloat a; | |
IEEEFloat b; | |
IEEEFloat c; | |
int m0; | |
int m1; | |
a.f = x; | |
b.f = y; | |
c.m = 0; | |
c.e = 0; | |
c.s = 0; | |
if (a.m == b.m && a.e == b.e && a.s != b.s) | |
{ | |
c.s = a.s ^ b.s; | |
return c.f; | |
} | |
m0 = a.m & max24 | (1 << 23); | |
m1 = b.m & max24 | (1 << 23); | |
if (a.e > b.e) | |
{ | |
int res = 0, norm = 0; | |
if (a.e & 0x80) | |
m1 = (m1 >> (a.e - b.e)); | |
else | |
m0 = (m0 >> (b.e - a.e)); | |
if (a.s ^ b.s) | |
res = (m1 > m0) ? m1 - m0 : m0 - m1; | |
else | |
res = m0 + m1; | |
if (a.s == 1 && b.s == 1 || m0 > m1) | |
c.s = 1; | |
norm = 8 - clz(res); | |
res >>= norm; | |
c.e = ((a.e & 0x80) ? a.e : b.e) + norm; | |
c.m = res; | |
} | |
else | |
{ | |
int res = 0, norm = 0; | |
if (a.e & 0x80) | |
m0 = (m0 >> (b.e - a.e)); | |
else | |
m1 = (m1 >> (a.e - b.e)); | |
if (a.s ^ b.s) | |
res = (m1 > m0) ? m1 - m0 : m0 - m1; | |
else | |
res = m0 + m1; | |
if (a.s && b.s || m1 > m0 && b.s || m0 > m1 && a.s) | |
c.s = 1; | |
norm = 8 - clz(res); | |
if (norm & 0x80) | |
res <<= abs(norm); | |
else | |
res >>= norm; | |
c.e = ((a.e & 0x80) ? b.e : a.e) + norm; | |
c.m = res; | |
} | |
return c.f; | |
} | |
float mulf(float x, float y) | |
{ | |
static const int max24 = (unsigned)(-1) >> 8; | |
IEEEFloat a; | |
IEEEFloat b; | |
IEEEFloat c; | |
int64_t m0, m1; | |
int64_t res; | |
int norm; | |
a.f = x; | |
b.f = y; | |
c.m = 0; | |
c.e = ((a.e < 0) ? a.e - 127 : 127 - a.e) | |
+ ((b.e < 0) ? b.e - 127 : 127 - b.e) + 127; | |
c.s = 0; | |
m0 = a.m & max24 | (1 << 23); | |
m1 = b.m & max24 | (1 << 23); | |
res = m0 * m1; | |
norm = 40 - clz64(res); | |
if (norm & 0x80) | |
res <<= abs(norm); | |
else | |
res >>= norm; | |
c.m = res; | |
c.s = a.s | b.s; | |
return c.f; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment