Last active
June 15, 2022 21:16
-
-
Save dalibor-drgon/12ddcf3c60f2a3397ff6a657633826a2 to your computer and use it in GitHub Desktop.
Floating point hacking
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
Compile and run with: gcc float.c -lm && ./a.out | |
Output: | |
Float 13.000000 deconstructed as [+ (1.10100000000000000000000) * 2^3] | |
Float -3.250000 deconstructed as [- (1.10100000000000000000000) * 2^1] | |
Float 0.375000 deconstructed as [+ (1.10000000000000000000000) * 2^-2] | |
Multiplication: [+ (1.00000000000000000000000) * 2^1] * [+ (1.10000000000000000000000) * 2^-2] = [+ (1.10000000000000000000000) * 2^-1] | |
AKA 2.000000 * 0.375000 = 0.750000 | |
Multiplication: [+ (1.10100000000000000000000) * 2^3] * [- (1.10100000000000000000000) * 2^1] = [- (1.01010010000000000000000) * 2^5] | |
AKA 13.000000 * -3.250000 = -42.250000 [got instead -42.250000] | |
SQRT estimate of 1.200000 is 1.095445, after 4 iterations 1.095445 vs 1.095445 from math lib | |
SQRT estimate of 2.000000 is 1.414214, after 4 iterations 1.414214 vs 1.414214 from math lib | |
SQRT estimate of 128.123444 is 11.319161, after 4 iterations 11.319162 vs 11.319162 from math lib | |
*/ | |
#include <stdint.h> // intX_t, uintX_t | |
#include <stdio.h> | |
#include <stdbool.h> | |
#include <math.h> | |
typedef struct { | |
int32_t sign; | |
int32_t exp; | |
uint32_t mantissa; | |
} float_raw; | |
bool is_similar(float x, float y) { | |
return abs(x-y) <= (abs(x) + abs(y)) * __DBL_EPSILON__ * 50; | |
} | |
float construct(float_raw raw) { | |
uint32_t exp = raw.exp + 127; | |
//printf("- %#x %#x %#x\n", raw.sign, exp, raw.mantissa); | |
uint32_t val = (raw.sign << 31) | (exp << 23) | (raw.mantissa); | |
return *((float*) &val); | |
} | |
float_raw deconstruct(float val) { | |
uint32_t raw = *((uint32_t*) &val); | |
float_raw ret = { | |
.sign = raw >> 31, | |
.exp = ((raw >> 23) & 0xff) - 127, | |
.mantissa = raw & 0x7fffff | |
}; | |
return ret; | |
} | |
void float_raw_print(float_raw raw) { | |
char mantissa[24] = {0}; | |
for(int i = 0; i < 23; i++) { | |
mantissa[i] = ((raw.mantissa >> (22-i)) & 1) ? '1' : '0'; | |
} | |
printf("[%c (1.%s) * 2^%d]", | |
(raw.sign ? '-' : '+'), mantissa, raw.exp); | |
} | |
void show_deconstruction(float val) { | |
float_raw dec = deconstruct(val); | |
printf("Float %f deconstructed as ", val); | |
float_raw_print(dec); | |
if(val != construct(dec)) { | |
printf(" [ERROR]"); | |
} | |
printf("\n"); | |
} | |
float_raw multiply(float_raw x, float_raw y) { | |
int inc = 0; | |
uint64_t mant = (uint64_t) (0x800000 | x.mantissa) * (0x800000 | y.mantissa); | |
mant >>= 23; | |
// Ak x_mantisa * y_mantisa >= 2, increment exponent by one | |
if(mant & 0x1000000) { | |
mant >>= 1; | |
inc += 1; | |
} | |
float_raw ret = { | |
.sign = (x.sign != y.sign) ? 1 : 0, | |
.exp = x.exp + y.exp + inc, | |
.mantissa = mant & 0x7fffff | |
}; | |
return ret; | |
} | |
void show_multiplication(float x, float y) { | |
float_raw rx = deconstruct(x); | |
float_raw ry = deconstruct(y); | |
printf("Multiplication: "); | |
float_raw_print(rx); | |
printf(" * "); | |
float_raw_print(ry); | |
printf(" = "); | |
float_raw res = multiply(rx, ry); | |
float_raw_print(res); | |
printf("\n\tAKA %f * %f = %f", x, y, construct(res)); | |
if(!is_similar(x*y, construct(res))) | |
printf(" [got instead %f]", x*y); | |
printf("\n"); | |
} | |
#define table_order 9 | |
#define table_size 512 | |
float recip_const1[table_size+1]; | |
float recip_const2[table_size+1]; | |
void setup_table() { | |
float f = 1.0f; | |
for(unsigned i = 0; i < table_size+1; i++) { | |
recip_const1[i] = sqrtf(f); | |
recip_const2[i] = sqrtf(2.0f*f); | |
f += 1.0f/table_size; | |
} | |
} | |
float sqrt_estimate(float x) { | |
if(x < 0) return NAN; | |
if(recip_const1[0] == 0) | |
setup_table(); | |
float_raw rx = deconstruct(x); | |
float est1, est2; | |
int exp = 0; | |
if((rx.exp & 1) == 0) { // even exponent | |
exp = rx.exp / 2; | |
est1 = recip_const1[rx.mantissa >> (23 - table_order)]; | |
est2 = recip_const1[(rx.mantissa >> (23 - table_order)) + 1]; | |
} else { // odd exponent | |
exp = rx.exp / 2; | |
est1 = recip_const2[rx.mantissa >> (23 - table_order)]; | |
est2 = recip_const2[(rx.mantissa >> (23 - table_order)) + 1]; | |
} | |
float diff = (float) (rx.mantissa & ((1 << (23 - table_order))-1)) / (1 << (23 - table_order)); | |
float est = est1 + (est2 - est1) * diff; | |
return est * powf(2.0f, exp); | |
} | |
float sqrt_iterate(float sqrt, float num) { | |
return (sqrt + num/sqrt) / 2; | |
} | |
float sqrt_precise(float x) { | |
float est = sqrt_estimate(x); | |
for(int i = 0; i < 4; i++) | |
est = sqrt_iterate(est, x); | |
return est; | |
} | |
float show_sqrt(float x) { | |
printf("SQRT estimate of %f is %f, after 4 iterations %f vs %f from math lib\n", | |
x, sqrt_estimate(x), sqrt_precise(x), sqrtf(x)); | |
} | |
int main() { | |
// Floats | |
show_deconstruction(13.0f); | |
show_deconstruction(-3.25f); | |
show_deconstruction(0.375); | |
// Multiplication | |
show_multiplication(2.0f, 0.375f); | |
show_multiplication(13.0f, -3.25f); | |
show_sqrt(1.2); | |
show_sqrt(2.0); | |
show_sqrt(128.12345); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment