apivovarov · February 20, 2025 02:06
diff --git a/int32_add_using_float.cc b/int32_add_using_float.cc
 #include <iostream>
 #include <cstdint>
 #include <limits>
 #include <vector>
 #include <utility>

 // Grok 3 early
 int32_t int32_add_using_float(int32_t a, int32_t b) {
    // Masks for splitting into high and low 16-bit parts
    const int32_t MASK_16BIT = 0xFFFF;  // 16-bit mask: 0x0000FFFF
    const int32_t MASK_HIGH = 0xFFFF0000;  // High 16-bit mask

    // Split 'a' and 'b' into high and low 16-bit parts
    int32_t a_low = a & MASK_16BIT;  // Lower 16 bits of 'a'
    int32_t a_high = (a >> 16) & MASK_16BIT;  // Upper 16 bits of 'a'
    int32_t b_low = b & MASK_16BIT;  // Lower 16 bits of 'b'
    int32_t b_high = (b >> 16) & MASK_16BIT;  // Upper 16 bits of 'b'

    // Convert to float for addition (16-bit values are exactly representable in float)
    float a_low_f = static_cast<float>(a_low);
    float a_high_f = static_cast<float>(a_high);
    float b_low_f = static_cast<float>(b_low);
    float b_high_f = static_cast<float>(b_high);

    // Add the low parts using float
    float sum_low_f = a_low_f + b_low_f;
    int32_t sum_low = static_cast<int32_t>(sum_low_f);  // Convert back to int32_t

    // Check for carry from low part addition (if sum >= 2^16)
    int32_t carry = (sum_low >> 16) & 0x1;  // Extract carry bit (0 or 1)
    sum_low &= MASK_16BIT;  // Keep only the lower 16 bits of the result

    // Add the high parts and include the carry from the low part
    float sum_high_f = a_high_f + b_high_f + static_cast<float>(carry);
    int32_t sum_high = static_cast<int32_t>(sum_high_f);  // Convert back to int32_t

    // Combine the results: high part shifted left by 16 bits, ORed with low part
    int32_t result = (sum_high << 16) | sum_low;

    return result;
 }

 // Claude (modified)
 int32_t add_int32_using_float32(int32_t a, int32_t b) {
    const uint32_t SHIFT = 16;
    const uint32_t MASK = 0xFFFF;
    
    // Split a and b into high and low parts
    // uint32_t a_high = (static_cast<uint32_t>(a) >> SHIFT) & MASK;
    // uint32_t a_low = static_cast<uint32_t>(a) & MASK;
    // uint32_t b_high = (static_cast<uint32_t>(b) >> SHIFT) & MASK;
    // uint32_t b_low = static_cast<uint32_t>(b) & MASK;
    
    int32_t a_high = a >> SHIFT; // Sign-extended
    uint32_t a_low = static_cast<uint32_t>(a) & MASK;
    int32_t b_high = b >> SHIFT; // Sign-extended
    uint32_t b_low = static_cast<uint32_t>(b) & MASK;

    // Add low parts (this will fit in float32)
    float low_sum = static_cast<float>(a_low) + static_cast<float>(b_low);
    
    // Handle carry from low parts
    uint32_t carry = static_cast<uint32_t>(low_sum) >> SHIFT;
    uint32_t low_result = static_cast<uint32_t>(low_sum) & MASK;
    
    // Add high parts (this will fit in float32)
    float high_sum = static_cast<float>(a_high) + static_cast<float>(b_high) + static_cast<float>(carry);
    uint32_t high_result = static_cast<uint32_t>(high_sum) & MASK;
    
    // Combine results
    uint32_t result = (high_result << SHIFT) | low_result;
    
    return static_cast<int32_t>(result);
 }

 int main() {
 std::vector<std::pair<int, int>> aa={
  {-1000000000, -1000000001},
  {-1000000000, 1000000001},
  {1000000000, -1000000001},
  {1000000000, 1000000001},
  {1500000000, 500000000},
  {-500000000, 500000000},
  {-2147483648, 1},
  {-2147483648, -1},
  {2147483647, 1},
  {2147483647, -1},
  {2000000000, 147483647},
  {2000000000, 147483648},
  {-2000000000, -147483648},
  {-2000000000, -147483649},
  {2000000000, -147483648},
 };

 for (auto a : aa) {
  // Grok 3
  // int32_t c = int32_add_using_float(a.first, a.second);
  // Claude
  int32_t c = add_int32_using_float32(a.first, a.second);
  int32_t ec = a.first + a.second;
  std::cout << a.first << " + " << a.second << " = " << c << std::endl;
  if (c != ec) {
    std::cout << "Error: res / exp: " << c << " / " << ec << std::endl; 
  }
 }


    return 0;
 }
	#include <iostream>
	#include <cstdint>
	#include <limits>
	#include <vector>
	#include <utility>

	// Grok 3 early
	int32_t int32_add_using_float(int32_t a, int32_t b) {
	// Masks for splitting into high and low 16-bit parts
	const int32_t MASK_16BIT = 0xFFFF; // 16-bit mask: 0x0000FFFF
	const int32_t MASK_HIGH = 0xFFFF0000; // High 16-bit mask

	// Split 'a' and 'b' into high and low 16-bit parts
	int32_t a_low = a & MASK_16BIT; // Lower 16 bits of 'a'
	int32_t a_high = (a >> 16) & MASK_16BIT; // Upper 16 bits of 'a'
	int32_t b_low = b & MASK_16BIT; // Lower 16 bits of 'b'
	int32_t b_high = (b >> 16) & MASK_16BIT; // Upper 16 bits of 'b'

	// Convert to float for addition (16-bit values are exactly representable in float)
	float a_low_f = static_cast<float>(a_low);
	float a_high_f = static_cast<float>(a_high);
	float b_low_f = static_cast<float>(b_low);
	float b_high_f = static_cast<float>(b_high);

	// Add the low parts using float
	float sum_low_f = a_low_f + b_low_f;
	int32_t sum_low = static_cast<int32_t>(sum_low_f); // Convert back to int32_t

	// Check for carry from low part addition (if sum >= 2^16)
	int32_t carry = (sum_low >> 16) & 0x1; // Extract carry bit (0 or 1)
	sum_low &= MASK_16BIT; // Keep only the lower 16 bits of the result

	// Add the high parts and include the carry from the low part
	float sum_high_f = a_high_f + b_high_f + static_cast<float>(carry);
	int32_t sum_high = static_cast<int32_t>(sum_high_f); // Convert back to int32_t

	// Combine the results: high part shifted left by 16 bits, ORed with low part
	int32_t result = (sum_high << 16) \| sum_low;

	return result;
	}

	// Claude (modified)
	int32_t add_int32_using_float32(int32_t a, int32_t b) {
	const uint32_t SHIFT = 16;
	const uint32_t MASK = 0xFFFF;

	// Split a and b into high and low parts
	// uint32_t a_high = (static_cast<uint32_t>(a) >> SHIFT) & MASK;
	// uint32_t a_low = static_cast<uint32_t>(a) & MASK;
	// uint32_t b_high = (static_cast<uint32_t>(b) >> SHIFT) & MASK;
	// uint32_t b_low = static_cast<uint32_t>(b) & MASK;

	int32_t a_high = a >> SHIFT; // Sign-extended
	uint32_t a_low = static_cast<uint32_t>(a) & MASK;
	int32_t b_high = b >> SHIFT; // Sign-extended
	uint32_t b_low = static_cast<uint32_t>(b) & MASK;

	// Add low parts (this will fit in float32)
	float low_sum = static_cast<float>(a_low) + static_cast<float>(b_low);

	// Handle carry from low parts
	uint32_t carry = static_cast<uint32_t>(low_sum) >> SHIFT;
	uint32_t low_result = static_cast<uint32_t>(low_sum) & MASK;

	// Add high parts (this will fit in float32)
	float high_sum = static_cast<float>(a_high) + static_cast<float>(b_high) + static_cast<float>(carry);
	uint32_t high_result = static_cast<uint32_t>(high_sum) & MASK;

	// Combine results
	uint32_t result = (high_result << SHIFT) \| low_result;

	return static_cast<int32_t>(result);
	}

	int main() {
	std::vector<std::pair<int, int>> aa={
	{-1000000000, -1000000001},
	{-1000000000, 1000000001},
	{1000000000, -1000000001},
	{1000000000, 1000000001},
	{1500000000, 500000000},
	{-500000000, 500000000},
	{-2147483648, 1},
	{-2147483648, -1},
	{2147483647, 1},
	{2147483647, -1},
	{2000000000, 147483647},
	{2000000000, 147483648},
	{-2000000000, -147483648},
	{-2000000000, -147483649},
	{2000000000, -147483648},
	};

	for (auto a : aa) {
	// Grok 3
	// int32_t c = int32_add_using_float(a.first, a.second);
	// Claude
	int32_t c = add_int32_using_float32(a.first, a.second);
	int32_t ec = a.first + a.second;
	std::cout << a.first << " + " << a.second << " = " << c << std::endl;
	if (c != ec) {
	std::cout << "Error: res / exp: " << c << " / " << ec << std::endl;
	}
	}


	return 0;
	}