Skip to content

Instantly share code, notes, and snippets.

@apivovarov
Created February 20, 2025 02:06
Show Gist options
  • Save apivovarov/0af889bc5a4cd8a7dddd51c734751f48 to your computer and use it in GitHub Desktop.
Save apivovarov/0af889bc5a4cd8a7dddd51c734751f48 to your computer and use it in GitHub Desktop.
#include <iostream>
#include <cstdint>
#include <limits>
#include <vector>
#include <utility>
// Grok 3 early
int32_t int32_add_using_float(int32_t a, int32_t b) {
// Masks for splitting into high and low 16-bit parts
const int32_t MASK_16BIT = 0xFFFF; // 16-bit mask: 0x0000FFFF
const int32_t MASK_HIGH = 0xFFFF0000; // High 16-bit mask
// Split 'a' and 'b' into high and low 16-bit parts
int32_t a_low = a & MASK_16BIT; // Lower 16 bits of 'a'
int32_t a_high = (a >> 16) & MASK_16BIT; // Upper 16 bits of 'a'
int32_t b_low = b & MASK_16BIT; // Lower 16 bits of 'b'
int32_t b_high = (b >> 16) & MASK_16BIT; // Upper 16 bits of 'b'
// Convert to float for addition (16-bit values are exactly representable in float)
float a_low_f = static_cast<float>(a_low);
float a_high_f = static_cast<float>(a_high);
float b_low_f = static_cast<float>(b_low);
float b_high_f = static_cast<float>(b_high);
// Add the low parts using float
float sum_low_f = a_low_f + b_low_f;
int32_t sum_low = static_cast<int32_t>(sum_low_f); // Convert back to int32_t
// Check for carry from low part addition (if sum >= 2^16)
int32_t carry = (sum_low >> 16) & 0x1; // Extract carry bit (0 or 1)
sum_low &= MASK_16BIT; // Keep only the lower 16 bits of the result
// Add the high parts and include the carry from the low part
float sum_high_f = a_high_f + b_high_f + static_cast<float>(carry);
int32_t sum_high = static_cast<int32_t>(sum_high_f); // Convert back to int32_t
// Combine the results: high part shifted left by 16 bits, ORed with low part
int32_t result = (sum_high << 16) | sum_low;
return result;
}
// Claude (modified)
int32_t add_int32_using_float32(int32_t a, int32_t b) {
const uint32_t SHIFT = 16;
const uint32_t MASK = 0xFFFF;
// Split a and b into high and low parts
// uint32_t a_high = (static_cast<uint32_t>(a) >> SHIFT) & MASK;
// uint32_t a_low = static_cast<uint32_t>(a) & MASK;
// uint32_t b_high = (static_cast<uint32_t>(b) >> SHIFT) & MASK;
// uint32_t b_low = static_cast<uint32_t>(b) & MASK;
int32_t a_high = a >> SHIFT; // Sign-extended
uint32_t a_low = static_cast<uint32_t>(a) & MASK;
int32_t b_high = b >> SHIFT; // Sign-extended
uint32_t b_low = static_cast<uint32_t>(b) & MASK;
// Add low parts (this will fit in float32)
float low_sum = static_cast<float>(a_low) + static_cast<float>(b_low);
// Handle carry from low parts
uint32_t carry = static_cast<uint32_t>(low_sum) >> SHIFT;
uint32_t low_result = static_cast<uint32_t>(low_sum) & MASK;
// Add high parts (this will fit in float32)
float high_sum = static_cast<float>(a_high) + static_cast<float>(b_high) + static_cast<float>(carry);
uint32_t high_result = static_cast<uint32_t>(high_sum) & MASK;
// Combine results
uint32_t result = (high_result << SHIFT) | low_result;
return static_cast<int32_t>(result);
}
int main() {
std::vector<std::pair<int, int>> aa={
{-1000000000, -1000000001},
{-1000000000, 1000000001},
{1000000000, -1000000001},
{1000000000, 1000000001},
{1500000000, 500000000},
{-500000000, 500000000},
{-2147483648, 1},
{-2147483648, -1},
{2147483647, 1},
{2147483647, -1},
{2000000000, 147483647},
{2000000000, 147483648},
{-2000000000, -147483648},
{-2000000000, -147483649},
{2000000000, -147483648},
};
for (auto a : aa) {
// Grok 3
// int32_t c = int32_add_using_float(a.first, a.second);
// Claude
int32_t c = add_int32_using_float32(a.first, a.second);
int32_t ec = a.first + a.second;
std::cout << a.first << " + " << a.second << " = " << c << std::endl;
if (c != ec) {
std::cout << "Error: res / exp: " << c << " / " << ec << std::endl;
}
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment