-
-
Save tcantenot/3ca21aefd9178c31d03d22660f04fec3 to your computer and use it in GitHub Desktop.
Test to see if the bit hack "Conditionally set or clear bits without branching" maps to a single Maxwell LOP3.LUT opcode
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -*- compile-command: "nvcc -m 32 -arch sm_50 -Xptxas=-v,-abi=no -cubin lop3.cu"; -*- | |
#define KERNEL_QUALIFIERS extern "C" __global__ | |
// | |
// Bit hack: "Conditionally set or clear bits without branching" | |
// http://graphics.stanford.edu/~seander/bithacks.html#ConditionalSetOrClearBitsWithoutBranching | |
// | |
// This bit hack *should* map to a single LOP3.LUT opcode: | |
// | |
// unsigned int | |
// set_or_clear(int flag, unsigned int mask, unsigned int word) | |
// { | |
// int neg = -flag; // flag is 0 or 1 | |
// | |
// return word ^ ((neg ^ word) & mask); | |
// } | |
// | |
// ... and it does! | |
// | |
// If flag is 0 or 1: | |
// | | |
// | I2I.S32.S32 R1, -R1; | |
// | LOP3.LUT R0, R2, R0, R1, 0xb8; | |
// | | |
// | |
// If flag is 0 or -1: | |
// | | |
// | LOP3.LUT R0, R1, R0, R2, 0xb8; | |
// | | |
// | |
KERNEL_QUALIFIERS | |
void | |
lop3Test(const int* const flags, // assumes flags are 0 or 1 | |
const unsigned int* const masks, | |
const unsigned int* const words, | |
unsigned int* const out) | |
{ | |
const int neg = -flags[threadIdx.x]; | |
const unsigned int mask = masks[threadIdx.x]; | |
const unsigned int word = words[threadIdx.x]; | |
out[threadIdx.x] = word ^ ((neg ^ word) & mask); | |
} | |
// | |
// | |
// | |
KERNEL_QUALIFIERS | |
void | |
lop3Test2(const unsigned int* const flags, // assumes flags are 0x0 or -1 | |
const unsigned int* const masks, | |
const unsigned int* const words, | |
unsigned int* const out) | |
{ | |
const unsigned int flag = flags[threadIdx.x]; | |
const unsigned int mask = masks[threadIdx.x]; | |
const unsigned int word = words[threadIdx.x]; | |
out[threadIdx.x] = word ^ ((flag ^ word) & mask); | |
} | |
// | |
// | |
// |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment