Created
October 7, 2014 22:07
-
-
Save allanmac/bca063e25a0f4ef75004 to your computer and use it in GitHub Desktop.
Try to generate XMAD instructions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -*- compile-command: "nvcc -m 32 -arch sm_50 -Xptxas=-v,-abi=no -cubin xmad.cu"; -*- | |
// | |
// | |
// | |
#define KERNEL_QUALIFIERS extern "C" __global__ | |
#define RESTRICT __restrict__ | |
// | |
// | |
// | |
typedef unsigned int u32; | |
KERNEL_QUALIFIERS | |
void vmad_kernel(const short2* const RESTRICT va, | |
const short2* const RESTRICT vb, | |
const int* const RESTRICT vc, | |
int* const vd) | |
{ | |
const short2 a = va[threadIdx.x]; | |
const short2 b = vb[threadIdx.x]; | |
const int c = vc[threadIdx.x]; | |
int d; | |
// asm volatile("vmad.s32.s32.s32 %0, %1.h0, %2.h0, %3;" : "=r"(d) : "r"(a), "r"(b), "r"(c)); | |
asm volatile("mad.wide.s16 %0, %1, %2, %3;" : "=r"(d) : "h"(a.x), "h"(b.x), "r"(c)); | |
vd[threadIdx.x] = d; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment