dalibor-drgon · June 15, 2022 21:16
diff --git a/float.c b/float.c
 /** 
 Compile and run with: gcc float.c -lm && ./a.out
 Output:
 Float 13.000000 deconstructed as [+ (1.10100000000000000000000) * 2^3]
 Float -3.250000 deconstructed as [- (1.10100000000000000000000) * 2^1]
 Float 0.375000 deconstructed as [+ (1.10000000000000000000000) * 2^-2]
 Multiplication: [+ (1.00000000000000000000000) * 2^1] * [+ (1.10000000000000000000000) * 2^-2] = [+ (1.10000000000000000000000) * 2^-1]
        AKA 2.000000 * 0.375000 = 0.750000
 Multiplication: [+ (1.10100000000000000000000) * 2^3] * [- (1.10100000000000000000000) * 2^1] = [- (1.01010010000000000000000) * 2^5]
        AKA 13.000000 * -3.250000 = -42.250000 [got instead -42.250000]
 SQRT estimate of 1.200000 is 1.095445, after 4 iterations 1.095445 vs 1.095445 from math lib
 SQRT estimate of 2.000000 is 1.414214, after 4 iterations 1.414214 vs 1.414214 from math lib
 SQRT estimate of 128.123444 is 11.319161, after 4 iterations 11.319162 vs 11.319162 from math lib
 */

 #include <stdint.h> // intX_t, uintX_t
 #include <stdio.h>
 #include <stdbool.h>
 #include <math.h>

 typedef struct {
    int32_t sign;
    int32_t exp;
    uint32_t mantissa;
 } float_raw;

 bool is_similar(float x, float y) {
    return abs(x-y) <= (abs(x) + abs(y)) * __DBL_EPSILON__ * 50;
 }

 float construct(float_raw raw) {
    uint32_t exp = raw.exp + 127;
    //printf("- %#x %#x %#x\n", raw.sign, exp, raw.mantissa);
    uint32_t val = (raw.sign << 31) | (exp << 23) | (raw.mantissa);
    return *((float*) &val);
 }

 float_raw deconstruct(float val) {
    uint32_t raw = *((uint32_t*) &val);
    float_raw ret = {
        .sign = raw >> 31,
        .exp = ((raw >> 23) & 0xff) - 127,
        .mantissa = raw & 0x7fffff
    };
    return ret;
 }

 void float_raw_print(float_raw raw) {
    char mantissa[24] = {0};
    for(int i = 0; i < 23; i++) {
        mantissa[i] = ((raw.mantissa >> (22-i)) & 1) ? '1' : '0';
    }
    printf("[%c (1.%s) * 2^%d]", 
            (raw.sign ? '-' : '+'), mantissa, raw.exp);
 }

 void show_deconstruction(float val) {
    float_raw dec = deconstruct(val);
    printf("Float %f deconstructed as ", val);
    float_raw_print(dec);
    if(val != construct(dec)) {
        printf(" [ERROR]");
    }
    printf("\n");
 }

 float_raw multiply(float_raw x, float_raw y) {
    int inc = 0;
    uint64_t mant = (uint64_t) (0x800000 | x.mantissa) * (0x800000 | y.mantissa);
    mant >>= 23;
    // Ak x_mantisa * y_mantisa >= 2, increment exponent by one
    if(mant & 0x1000000) {
        mant >>= 1;
        inc += 1;
    }
    float_raw ret = {
        .sign = (x.sign != y.sign) ? 1 : 0,
        .exp = x.exp + y.exp + inc,
        .mantissa = mant & 0x7fffff
    };
    return ret;
 }

 void show_multiplication(float x, float y) {
    float_raw rx = deconstruct(x);
    float_raw ry = deconstruct(y);
    printf("Multiplication: ");
    float_raw_print(rx);
    printf(" * ");
    float_raw_print(ry);
    printf(" = ");
    float_raw res = multiply(rx, ry);
    float_raw_print(res);
    printf("\n\tAKA %f * %f = %f", x, y, construct(res));
    if(!is_similar(x*y, construct(res)))
        printf(" [got instead %f]", x*y);
    printf("\n");
 }


 #define table_order 9
 #define table_size 512
 float recip_const1[table_size+1];
 float recip_const2[table_size+1];

 void setup_table() {
    float f = 1.0f;
    for(unsigned i = 0; i < table_size+1; i++) {
        recip_const1[i] = sqrtf(f);
        recip_const2[i] = sqrtf(2.0f*f);
        f += 1.0f/table_size;
    }
 }

 float sqrt_estimate(float x) {
    if(x < 0) return NAN;
    if(recip_const1[0] == 0)
        setup_table();
    float_raw rx = deconstruct(x);
    float est1, est2;
    int exp = 0;
    if((rx.exp & 1) == 0) { // even exponent
        exp = rx.exp / 2;
        est1 = recip_const1[rx.mantissa >> (23 - table_order)];
        est2 = recip_const1[(rx.mantissa >> (23 - table_order)) + 1];
    } else { // odd exponent
        exp = rx.exp / 2;
        est1 = recip_const2[rx.mantissa >> (23 - table_order)];
        est2 = recip_const2[(rx.mantissa >> (23 - table_order)) + 1];
    }
    float diff = (float) (rx.mantissa & ((1 << (23 - table_order))-1)) / (1 << (23 - table_order)); 
    float est = est1 + (est2 - est1) * diff;
    return est * powf(2.0f, exp);
 }

 float sqrt_iterate(float sqrt, float num) {
    return (sqrt + num/sqrt) / 2;
 }

 float sqrt_precise(float x) {
    float est = sqrt_estimate(x);
    for(int i = 0; i < 4; i++)
        est = sqrt_iterate(est, x);
    return est;
 }

 float show_sqrt(float x) {
    printf("SQRT estimate of %f is %f, after 4 iterations %f vs %f from math lib\n",
        x, sqrt_estimate(x), sqrt_precise(x), sqrtf(x));
 }

 int main() {

    // Floats
    show_deconstruction(13.0f);
    show_deconstruction(-3.25f);
    show_deconstruction(0.375);
    
    // Multiplication
    show_multiplication(2.0f, 0.375f);
    show_multiplication(13.0f, -3.25f);
    
    show_sqrt(1.2);
    show_sqrt(2.0);
    show_sqrt(128.12345);

    return 0;
 }
	/**
	Compile and run with: gcc float.c -lm && ./a.out
	Output:
	Float 13.000000 deconstructed as [+ (1.10100000000000000000000) * 2^3]
	Float -3.250000 deconstructed as [- (1.10100000000000000000000) * 2^1]
	Float 0.375000 deconstructed as [+ (1.10000000000000000000000) * 2^-2]
	Multiplication: [+ (1.00000000000000000000000) * 2^1] * [+ (1.10000000000000000000000) * 2^-2] = [+ (1.10000000000000000000000) * 2^-1]
	AKA 2.000000 * 0.375000 = 0.750000
	Multiplication: [+ (1.10100000000000000000000) * 2^3] * [- (1.10100000000000000000000) * 2^1] = [- (1.01010010000000000000000) * 2^5]
	AKA 13.000000 * -3.250000 = -42.250000 [got instead -42.250000]
	SQRT estimate of 1.200000 is 1.095445, after 4 iterations 1.095445 vs 1.095445 from math lib
	SQRT estimate of 2.000000 is 1.414214, after 4 iterations 1.414214 vs 1.414214 from math lib
	SQRT estimate of 128.123444 is 11.319161, after 4 iterations 11.319162 vs 11.319162 from math lib
	*/

	#include <stdint.h> // intX_t, uintX_t
	#include <stdio.h>
	#include <stdbool.h>
	#include <math.h>

	typedef struct {
	int32_t sign;
	int32_t exp;
	uint32_t mantissa;
	} float_raw;

	bool is_similar(float x, float y) {
	return abs(x-y) <= (abs(x) + abs(y)) * __DBL_EPSILON__ * 50;
	}

	float construct(float_raw raw) {
	uint32_t exp = raw.exp + 127;
	//printf("- %#x %#x %#x\n", raw.sign, exp, raw.mantissa);
	uint32_t val = (raw.sign << 31) \| (exp << 23) \| (raw.mantissa);
	return ((float) &val);
	}

	float_raw deconstruct(float val) {
	uint32_t raw = ((uint32_t) &val);
	float_raw ret = {
	.sign = raw >> 31,
	.exp = ((raw >> 23) & 0xff) - 127,
	.mantissa = raw & 0x7fffff
	};
	return ret;
	}

	void float_raw_print(float_raw raw) {
	char mantissa[24] = {0};
	for(int i = 0; i < 23; i++) {
	mantissa[i] = ((raw.mantissa >> (22-i)) & 1) ? '1' : '0';
	}
	printf("[%c (1.%s) * 2^%d]",
	(raw.sign ? '-' : '+'), mantissa, raw.exp);
	}

	void show_deconstruction(float val) {
	float_raw dec = deconstruct(val);
	printf("Float %f deconstructed as ", val);
	float_raw_print(dec);
	if(val != construct(dec)) {
	printf(" [ERROR]");
	}
	printf("\n");
	}

	float_raw multiply(float_raw x, float_raw y) {
	int inc = 0;
	uint64_t mant = (uint64_t) (0x800000 \| x.mantissa) * (0x800000 \| y.mantissa);
	mant >>= 23;
	// Ak x_mantisa * y_mantisa >= 2, increment exponent by one
	if(mant & 0x1000000) {
	mant >>= 1;
	inc += 1;
	}
	float_raw ret = {
	.sign = (x.sign != y.sign) ? 1 : 0,
	.exp = x.exp + y.exp + inc,
	.mantissa = mant & 0x7fffff
	};
	return ret;
	}

	void show_multiplication(float x, float y) {
	float_raw rx = deconstruct(x);
	float_raw ry = deconstruct(y);
	printf("Multiplication: ");
	float_raw_print(rx);
	printf(" * ");
	float_raw_print(ry);
	printf(" = ");
	float_raw res = multiply(rx, ry);
	float_raw_print(res);
	printf("\n\tAKA %f * %f = %f", x, y, construct(res));
	if(!is_similar(x*y, construct(res)))
	printf(" [got instead %f]", x*y);
	printf("\n");
	}


	#define table_order 9
	#define table_size 512
	float recip_const1[table_size+1];
	float recip_const2[table_size+1];

	void setup_table() {
	float f = 1.0f;
	for(unsigned i = 0; i < table_size+1; i++) {
	recip_const1[i] = sqrtf(f);
	recip_const2[i] = sqrtf(2.0f*f);
	f += 1.0f/table_size;
	}
	}

	float sqrt_estimate(float x) {
	if(x < 0) return NAN;
	if(recip_const1[0] == 0)
	setup_table();
	float_raw rx = deconstruct(x);
	float est1, est2;
	int exp = 0;
	if((rx.exp & 1) == 0) { // even exponent
	exp = rx.exp / 2;
	est1 = recip_const1[rx.mantissa >> (23 - table_order)];
	est2 = recip_const1[(rx.mantissa >> (23 - table_order)) + 1];
	} else { // odd exponent
	exp = rx.exp / 2;
	est1 = recip_const2[rx.mantissa >> (23 - table_order)];
	est2 = recip_const2[(rx.mantissa >> (23 - table_order)) + 1];
	}
	float diff = (float) (rx.mantissa & ((1 << (23 - table_order))-1)) / (1 << (23 - table_order));
	float est = est1 + (est2 - est1) * diff;
	return est * powf(2.0f, exp);
	}

	float sqrt_iterate(float sqrt, float num) {
	return (sqrt + num/sqrt) / 2;
	}

	float sqrt_precise(float x) {
	float est = sqrt_estimate(x);
	for(int i = 0; i < 4; i++)
	est = sqrt_iterate(est, x);
	return est;
	}

	float show_sqrt(float x) {
	printf("SQRT estimate of %f is %f, after 4 iterations %f vs %f from math lib\n",
	x, sqrt_estimate(x), sqrt_precise(x), sqrtf(x));
	}

	int main() {

	// Floats
	show_deconstruction(13.0f);
	show_deconstruction(-3.25f);
	show_deconstruction(0.375);

	// Multiplication
	show_multiplication(2.0f, 0.375f);
	show_multiplication(13.0f, -3.25f);

	show_sqrt(1.2);
	show_sqrt(2.0);
	show_sqrt(128.12345);

	return 0;
	}