Skip to content

Instantly share code, notes, and snippets.

@GitHubEmploy
Created February 13, 2025 01:06
Show Gist options
  • Save GitHubEmploy/025915f7f6756b8019c2e6136017b389 to your computer and use it in GitHub Desktop.
Save GitHubEmploy/025915f7f6756b8019c2e6136017b389 to your computer and use it in GitHub Desktop.
.version 6.4
.target sm_75
.address_size 64
// Kernel function: vector addition
.visible .entry vectorAdd(
.param .u64 param0, // pointer to vector A
.param .u64 param1, // pointer to vector B
.param .u64 param2, // pointer to output vector C
.param .u32 param3 // number of elements
)
{
// Load kernel parameters.
.reg .u64 %A_ptr, %B_ptr, %C_ptr;
.reg .u32 %n, %tid, %temp;
ld.param.u64 %A_ptr, [param0];
ld.param.u64 %B_ptr, [param1];
ld.param.u64 %C_ptr, [param2];
ld.param.u32 %n, [param3];
// Calculate thread index.
mov.u32 %tid, %tid;
cvt.u64.u32 %temp, %tid;
// Check if within bounds.
setp.ge.u32 p, %tid, %n;
@p bra END;
// Perform vector addition.
ld.global.s32 %r1, [%A_ptr + %temp*4];
ld.global.s32 %r2, [%B_ptr + %temp*4];
add.s32 %r3, %r1, %r2;
st.global.s32 [%C_ptr + %temp*4], %r3;
END:
ret;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment