native-m · July 14, 2019 17:26
diff --git a/f32add.cpp b/f32add.cpp
 union IEEEFloat
 {
 	struct
 	{
 		int m : 23;
 		int e : 8;
 		int s : 1;
 	};

 	int i;
 	float f;
 };

 int clz(uint32_t x)
 {
 	static const char debruijn32[32] = {
 		0, 31, 9, 30, 3, 8, 13, 29, 2, 5, 7, 21, 12, 24, 28, 19,
 		1, 10, 4, 14, 6, 22, 25, 20, 11, 15, 23, 26, 16, 27, 17, 18
 	};

 	x |= x >> 1;
 	x |= x >> 2;
 	x |= x >> 4;
 	x |= x >> 8;
 	x |= x >> 16;
 	x++;
  
 	return debruijn32[x * 0x076be629 >> 27];
 }

 int popcnt64(unsigned __int64 w)
 {
 	w -= (w >> 1) & 0x5555555555555555ULL;
 	w = (w & 0x3333333333333333ULL) + ((w >> 2) & 0x3333333333333333ULL);
 	w = (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
 	return (int)((w * 0x0101010101010101ULL) >> 56);
 }

 int clz64(uint64_t x)
 {
 	int c[] = { 1, 2, 4, 8, 16, 32 };

 	for (int i : c)
 		x |= (x >> i);

 	return 64 - popcnt64(x);
 }

 float addf(float x, float y)
 {
 	static const int max24 = (unsigned)(-1) >> 8;
 	IEEEFloat a;
 	IEEEFloat b;
 	IEEEFloat c;
 	int m0;
 	int m1;

 	a.f = x;
 	b.f = y;
 	c.m = 0;
 	c.e = 0;
 	c.s = 0;

 	if (a.m == b.m && a.e == b.e && a.s != b.s)
 	{
 		c.s = a.s ^ b.s;
 		return c.f;
 	}

 	m0 = a.m & max24 | (1 << 23);
 	m1 = b.m & max24 | (1 << 23);

 	if (a.e > b.e)
 	{
 		int res = 0, norm = 0;

 		if (a.e & 0x80)
 			m1 = (m1 >> (a.e - b.e));
 		else
 			m0 = (m0 >> (b.e - a.e));

 		if (a.s ^ b.s)
 			res = (m1 > m0) ? m1 - m0 : m0 - m1;
 		else
 			res = m0 + m1;

 		if (a.s == 1 && b.s == 1 || m0 > m1)
 			c.s = 1;

 		norm = 8 - clz(res);
 		res >>= norm;

 		c.e = ((a.e & 0x80) ? a.e : b.e) + norm;
 		c.m = res;
 	}
 	else
 	{
 		int res = 0, norm = 0;

 		if (a.e & 0x80)
 			m0 = (m0 >> (b.e - a.e));
 		else
 			m1 = (m1 >> (a.e - b.e));

 		if (a.s ^ b.s)
 			res = (m1 > m0) ? m1 - m0 : m0 - m1;
 		else
 			res = m0 + m1;

 		if (a.s && b.s || m1 > m0 && b.s || m0 > m1 && a.s)
 			c.s = 1;

 		norm = 8 - clz(res);
 		if (norm & 0x80)
 			res <<= abs(norm);
 		else
 			res >>= norm;

 		c.e = ((a.e & 0x80) ? b.e : a.e) + norm;
 		c.m = res;
 	}

 	return c.f;
 }

 float mulf(float x, float y)
 {
 	static const int max24 = (unsigned)(-1) >> 8;
 	IEEEFloat a;
 	IEEEFloat b;
 	IEEEFloat c;
 	int64_t m0, m1;
 	int64_t res;
 	int norm;

 	a.f = x;
 	b.f = y;
 	c.m = 0;
 	c.e = ((a.e < 0) ? a.e - 127 : 127 - a.e)
 		+ ((b.e < 0) ? b.e - 127 : 127 - b.e) + 127;
 	c.s = 0;

 	m0 = a.m & max24 | (1 << 23);
 	m1 = b.m & max24 | (1 << 23);

 	res = m0 * m1;
 	norm = 40 - clz64(res);
 	if (norm & 0x80)
 		res <<= abs(norm);
 	else
 		res >>= norm;

 	c.m = res;
 	c.s = a.s | b.s;

 	return c.f;
 }
	union IEEEFloat
	{
	struct
	{
	int m : 23;
	int e : 8;
	int s : 1;
	};

	int i;
	float f;
	};

	int clz(uint32_t x)
	{
	static const char debruijn32[32] = {
	0, 31, 9, 30, 3, 8, 13, 29, 2, 5, 7, 21, 12, 24, 28, 19,
	1, 10, 4, 14, 6, 22, 25, 20, 11, 15, 23, 26, 16, 27, 17, 18
	};

	x \|= x >> 1;
	x \|= x >> 2;
	x \|= x >> 4;
	x \|= x >> 8;
	x \|= x >> 16;
	x++;

	return debruijn32[x * 0x076be629 >> 27];
	}

	int popcnt64(unsigned __int64 w)
	{
	w -= (w >> 1) & 0x5555555555555555ULL;
	w = (w & 0x3333333333333333ULL) + ((w >> 2) & 0x3333333333333333ULL);
	w = (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
	return (int)((w * 0x0101010101010101ULL) >> 56);
	}

	int clz64(uint64_t x)
	{
	int c[] = { 1, 2, 4, 8, 16, 32 };

	for (int i : c)
	x \|= (x >> i);

	return 64 - popcnt64(x);
	}

	float addf(float x, float y)
	{
	static const int max24 = (unsigned)(-1) >> 8;
	IEEEFloat a;
	IEEEFloat b;
	IEEEFloat c;
	int m0;
	int m1;

	a.f = x;
	b.f = y;
	c.m = 0;
	c.e = 0;
	c.s = 0;

	if (a.m == b.m && a.e == b.e && a.s != b.s)
	{
	c.s = a.s ^ b.s;
	return c.f;
	}

	m0 = a.m & max24 \| (1 << 23);
	m1 = b.m & max24 \| (1 << 23);

	if (a.e > b.e)
	{
	int res = 0, norm = 0;

	if (a.e & 0x80)
	m1 = (m1 >> (a.e - b.e));
	else
	m0 = (m0 >> (b.e - a.e));

	if (a.s ^ b.s)
	res = (m1 > m0) ? m1 - m0 : m0 - m1;
	else
	res = m0 + m1;

	if (a.s == 1 && b.s == 1 \|\| m0 > m1)
	c.s = 1;

	norm = 8 - clz(res);
	res >>= norm;

	c.e = ((a.e & 0x80) ? a.e : b.e) + norm;
	c.m = res;
	}
	else
	{
	int res = 0, norm = 0;

	if (a.e & 0x80)
	m0 = (m0 >> (b.e - a.e));
	else
	m1 = (m1 >> (a.e - b.e));

	if (a.s ^ b.s)
	res = (m1 > m0) ? m1 - m0 : m0 - m1;
	else
	res = m0 + m1;

	if (a.s && b.s \|\| m1 > m0 && b.s \|\| m0 > m1 && a.s)
	c.s = 1;

	norm = 8 - clz(res);
	if (norm & 0x80)
	res <<= abs(norm);
	else
	res >>= norm;

	c.e = ((a.e & 0x80) ? b.e : a.e) + norm;
	c.m = res;
	}

	return c.f;
	}

	float mulf(float x, float y)
	{
	static const int max24 = (unsigned)(-1) >> 8;
	IEEEFloat a;
	IEEEFloat b;
	IEEEFloat c;
	int64_t m0, m1;
	int64_t res;
	int norm;

	a.f = x;
	b.f = y;
	c.m = 0;
	c.e = ((a.e < 0) ? a.e - 127 : 127 - a.e)
	+ ((b.e < 0) ? b.e - 127 : 127 - b.e) + 127;
	c.s = 0;

	m0 = a.m & max24 \| (1 << 23);
	m1 = b.m & max24 \| (1 << 23);

	res = m0 * m1;
	norm = 40 - clz64(res);
	if (norm & 0x80)
	res <<= abs(norm);
	else
	res >>= norm;

	c.m = res;
	c.s = a.s \| b.s;

	return c.f;
	}