Skip to content

Instantly share code, notes, and snippets.

@dalibor-drgon
Last active June 15, 2022 21:16
Show Gist options
  • Save dalibor-drgon/12ddcf3c60f2a3397ff6a657633826a2 to your computer and use it in GitHub Desktop.
Save dalibor-drgon/12ddcf3c60f2a3397ff6a657633826a2 to your computer and use it in GitHub Desktop.
Floating point hacking
/**
Compile and run with: gcc float.c -lm && ./a.out
Output:
Float 13.000000 deconstructed as [+ (1.10100000000000000000000) * 2^3]
Float -3.250000 deconstructed as [- (1.10100000000000000000000) * 2^1]
Float 0.375000 deconstructed as [+ (1.10000000000000000000000) * 2^-2]
Multiplication: [+ (1.00000000000000000000000) * 2^1] * [+ (1.10000000000000000000000) * 2^-2] = [+ (1.10000000000000000000000) * 2^-1]
AKA 2.000000 * 0.375000 = 0.750000
Multiplication: [+ (1.10100000000000000000000) * 2^3] * [- (1.10100000000000000000000) * 2^1] = [- (1.01010010000000000000000) * 2^5]
AKA 13.000000 * -3.250000 = -42.250000 [got instead -42.250000]
SQRT estimate of 1.200000 is 1.095445, after 4 iterations 1.095445 vs 1.095445 from math lib
SQRT estimate of 2.000000 is 1.414214, after 4 iterations 1.414214 vs 1.414214 from math lib
SQRT estimate of 128.123444 is 11.319161, after 4 iterations 11.319162 vs 11.319162 from math lib
*/
#include <stdint.h> // intX_t, uintX_t
#include <stdio.h>
#include <stdbool.h>
#include <math.h>
typedef struct {
int32_t sign;
int32_t exp;
uint32_t mantissa;
} float_raw;
bool is_similar(float x, float y) {
return abs(x-y) <= (abs(x) + abs(y)) * __DBL_EPSILON__ * 50;
}
float construct(float_raw raw) {
uint32_t exp = raw.exp + 127;
//printf("- %#x %#x %#x\n", raw.sign, exp, raw.mantissa);
uint32_t val = (raw.sign << 31) | (exp << 23) | (raw.mantissa);
return *((float*) &val);
}
float_raw deconstruct(float val) {
uint32_t raw = *((uint32_t*) &val);
float_raw ret = {
.sign = raw >> 31,
.exp = ((raw >> 23) & 0xff) - 127,
.mantissa = raw & 0x7fffff
};
return ret;
}
void float_raw_print(float_raw raw) {
char mantissa[24] = {0};
for(int i = 0; i < 23; i++) {
mantissa[i] = ((raw.mantissa >> (22-i)) & 1) ? '1' : '0';
}
printf("[%c (1.%s) * 2^%d]",
(raw.sign ? '-' : '+'), mantissa, raw.exp);
}
void show_deconstruction(float val) {
float_raw dec = deconstruct(val);
printf("Float %f deconstructed as ", val);
float_raw_print(dec);
if(val != construct(dec)) {
printf(" [ERROR]");
}
printf("\n");
}
float_raw multiply(float_raw x, float_raw y) {
int inc = 0;
uint64_t mant = (uint64_t) (0x800000 | x.mantissa) * (0x800000 | y.mantissa);
mant >>= 23;
// Ak x_mantisa * y_mantisa >= 2, increment exponent by one
if(mant & 0x1000000) {
mant >>= 1;
inc += 1;
}
float_raw ret = {
.sign = (x.sign != y.sign) ? 1 : 0,
.exp = x.exp + y.exp + inc,
.mantissa = mant & 0x7fffff
};
return ret;
}
void show_multiplication(float x, float y) {
float_raw rx = deconstruct(x);
float_raw ry = deconstruct(y);
printf("Multiplication: ");
float_raw_print(rx);
printf(" * ");
float_raw_print(ry);
printf(" = ");
float_raw res = multiply(rx, ry);
float_raw_print(res);
printf("\n\tAKA %f * %f = %f", x, y, construct(res));
if(!is_similar(x*y, construct(res)))
printf(" [got instead %f]", x*y);
printf("\n");
}
#define table_order 9
#define table_size 512
float recip_const1[table_size+1];
float recip_const2[table_size+1];
void setup_table() {
float f = 1.0f;
for(unsigned i = 0; i < table_size+1; i++) {
recip_const1[i] = sqrtf(f);
recip_const2[i] = sqrtf(2.0f*f);
f += 1.0f/table_size;
}
}
float sqrt_estimate(float x) {
if(x < 0) return NAN;
if(recip_const1[0] == 0)
setup_table();
float_raw rx = deconstruct(x);
float est1, est2;
int exp = 0;
if((rx.exp & 1) == 0) { // even exponent
exp = rx.exp / 2;
est1 = recip_const1[rx.mantissa >> (23 - table_order)];
est2 = recip_const1[(rx.mantissa >> (23 - table_order)) + 1];
} else { // odd exponent
exp = rx.exp / 2;
est1 = recip_const2[rx.mantissa >> (23 - table_order)];
est2 = recip_const2[(rx.mantissa >> (23 - table_order)) + 1];
}
float diff = (float) (rx.mantissa & ((1 << (23 - table_order))-1)) / (1 << (23 - table_order));
float est = est1 + (est2 - est1) * diff;
return est * powf(2.0f, exp);
}
float sqrt_iterate(float sqrt, float num) {
return (sqrt + num/sqrt) / 2;
}
float sqrt_precise(float x) {
float est = sqrt_estimate(x);
for(int i = 0; i < 4; i++)
est = sqrt_iterate(est, x);
return est;
}
float show_sqrt(float x) {
printf("SQRT estimate of %f is %f, after 4 iterations %f vs %f from math lib\n",
x, sqrt_estimate(x), sqrt_precise(x), sqrtf(x));
}
int main() {
// Floats
show_deconstruction(13.0f);
show_deconstruction(-3.25f);
show_deconstruction(0.375);
// Multiplication
show_multiplication(2.0f, 0.375f);
show_multiplication(13.0f, -3.25f);
show_sqrt(1.2);
show_sqrt(2.0);
show_sqrt(128.12345);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment