apivovarov · February 21, 2025 22:20
diff --git a/add_uint32_using_float32.cc b/add_uint32_using_float32.cc
 #include <iostream>
 #include <cstdint>
 #include <limits>
 #include <vector>
 #include <utility>

 uint32_t add_uint32_using_float32(uint32_t a, uint32_t b) {
    // Split the 32-bit numbers into two 16-bit halves (high and low)
    const uint32_t mask16 = 0xFFFF; // 16-bit mask
    uint32_t a_low = a & mask16;    // Lower 16 bits of a
    uint32_t a_high = a >> 16;      // Upper 16 bits of a
    uint32_t b_low = b & mask16;    // Lower 16 bits of b
    uint32_t b_high = b >> 16;      // Upper 16 bits of b

    // Convert the 16-bit parts to float32 for addition
    float a_low_f = static_cast<float>(a_low);
    float a_high_f = static_cast<float>(a_high);
    float b_low_f = static_cast<float>(b_low);
    float b_high_f = static_cast<float>(b_high);

    // Add the lower 16 bits using float32
    float sum_low_f = a_low_f + b_low_f;
    uint32_t sum_low = static_cast<uint32_t>(sum_low_f);

    // Extract the carry from the lower 16-bit addition
    // The carry is the 17th bit (bit 16) of sum_low
    uint32_t carry_low = (sum_low >> 16) & 1;

    // Add the upper 16 bits, including the carry from the lower addition
    float sum_high_f = a_high_f + b_high_f + static_cast<float>(carry_low);
    uint32_t sum_high = static_cast<uint32_t>(sum_high_f);

    // Combine the results: high part shifted left by 16 bits, ORed with low part
    uint32_t result = (sum_high << 16) | (sum_low & mask16);

    return result;
 }


 int main() {
 std::vector<std::pair<uint32_t, uint32_t>> aa={
  {1000000000, 1000000001},
  {1500000000, 500000000},
  {500000000, 500000000},
  {2147483647, 1},
  {2147483648, 1},
  {4294967295, 1},
  {4294967295, 2}, 
  {2000000000, 147483647},
  {2000000000, 147483648},
 };

 for (auto a : aa) {
  uint32_t c = add_uint32_using_float32(a.first, a.second);
  uint32_t ec = a.first + a.second;
  std::cout << a.first << " + " << a.second << " = " << c << std::endl;
  if (c != ec) {
    std::cout << "Error: res / exp: " << c << " / " << ec << std::endl; 
  }
 }

  return 0;
 }
	#include <iostream>
	#include <cstdint>
	#include <limits>
	#include <vector>
	#include <utility>

	uint32_t add_uint32_using_float32(uint32_t a, uint32_t b) {
	// Split the 32-bit numbers into two 16-bit halves (high and low)
	const uint32_t mask16 = 0xFFFF; // 16-bit mask
	uint32_t a_low = a & mask16; // Lower 16 bits of a
	uint32_t a_high = a >> 16; // Upper 16 bits of a
	uint32_t b_low = b & mask16; // Lower 16 bits of b
	uint32_t b_high = b >> 16; // Upper 16 bits of b

	// Convert the 16-bit parts to float32 for addition
	float a_low_f = static_cast<float>(a_low);
	float a_high_f = static_cast<float>(a_high);
	float b_low_f = static_cast<float>(b_low);
	float b_high_f = static_cast<float>(b_high);

	// Add the lower 16 bits using float32
	float sum_low_f = a_low_f + b_low_f;
	uint32_t sum_low = static_cast<uint32_t>(sum_low_f);

	// Extract the carry from the lower 16-bit addition
	// The carry is the 17th bit (bit 16) of sum_low
	uint32_t carry_low = (sum_low >> 16) & 1;

	// Add the upper 16 bits, including the carry from the lower addition
	float sum_high_f = a_high_f + b_high_f + static_cast<float>(carry_low);
	uint32_t sum_high = static_cast<uint32_t>(sum_high_f);

	// Combine the results: high part shifted left by 16 bits, ORed with low part
	uint32_t result = (sum_high << 16) \| (sum_low & mask16);

	return result;
	}


	int main() {
	std::vector<std::pair<uint32_t, uint32_t>> aa={
	{1000000000, 1000000001},
	{1500000000, 500000000},
	{500000000, 500000000},
	{2147483647, 1},
	{2147483648, 1},
	{4294967295, 1},
	{4294967295, 2},
	{2000000000, 147483647},
	{2000000000, 147483648},
	};

	for (auto a : aa) {
	uint32_t c = add_uint32_using_float32(a.first, a.second);
	uint32_t ec = a.first + a.second;
	std::cout << a.first << " + " << a.second << " = " << c << std::endl;
	if (c != ec) {
	std::cout << "Error: res / exp: " << c << " / " << ec << std::endl;
	}
	}

	return 0;
	}