Created
December 17, 2022 09:33
-
-
Save mratsim/c5970c80591751cf3513e7d6db6c287b to your computer and use it in GitHub Desktop.
Cuda 11.5 or lower muladd-carry bug
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://forums.developer.nvidia.com/t/wrong-result-returned-by-madc-hi-u64-ptx-instruction-for-specific-operands/196094 | |
# https://github.com/tickinbuaa/CudaTest | |
#include <cuda_runtime.h> | |
#include <memory> | |
__device__ | |
inline void mac_with_carry(uint64_t &lo, uint64_t &hi, const uint64_t &a, const uint64_t &b, const uint64_t &c) { | |
if (blockIdx.x == 0 && threadIdx.x == 0) { | |
printf("GPU calculation input: a = %lx b = %lx c = %lx\n", a, b, c); | |
} | |
asm("mad.lo.cc.u64 %0, %2, %3, %4;\n\t" | |
"madc.hi.u64 %1, %2, %3, 0;\n\t" | |
:"=l"(lo), "=l"(hi): "l"(a), "l"(b), "l"(c)); | |
if (blockIdx.x == 0 && threadIdx.x == 0) { | |
printf("GPU calculation result: hi = %lx low = %lx\n", hi, lo); | |
} | |
} | |
__global__ | |
void test(uint64_t *out, uint32_t num){ | |
unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x; | |
if (tid >= num) { | |
return; | |
} | |
uint64_t a = 0x42737a020c0d6393UL; | |
uint64_t b = 0xffffffff00000001UL; | |
uint64_t c = 0xc999e990f3f29c6dUL; | |
mac_with_carry(out[tid << 1], out[(tid << 1) + 1], a, b, c); | |
} | |
int main() { | |
uint64_t *d_out; | |
uint32_t num = 1; | |
cudaMalloc(&d_out, num * 2 * sizeof(uint64_t)); | |
const uint32_t BLOCK_SIZE = 256; | |
uint32_t block_num = (num + BLOCK_SIZE - 1) / BLOCK_SIZE; | |
test<<<block_num, BLOCK_SIZE>>>(d_out, num); | |
cudaDeviceSynchronize(); | |
unsigned __int128 a = 0x42737a020c0d6393UL; | |
unsigned __int128 b = 0xffffffff00000001UL; | |
unsigned __int128 c = 0xc999e990f3f29c6dUL; | |
unsigned __int128 result = a * b + c; | |
printf("Cpu result: hi:%lx low:%lx\n", (uint64_t)((result >> 64) & 0xffffffffffffffffUL), (uint64_t)(result & 0xffffffffffffffffUL)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment