Created
November 12, 2015 22:43
-
-
Save dwilliamson/54e837b922e189d9a3c6 to your computer and use it in GitHub Desktop.
floor(double) for HLSL, SM5
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
double MaskOutFraction(double v) | |
{ | |
// Alias double as 2 32-bit integers | |
uint d0, d1; | |
asuint(v, d0, d1); | |
// 0 ... 51 mantissa 0 ... 19 | |
// 52 ... 62 exponent 20 ... 30 | |
// 63 ... 63 sign | |
// Already a fraction? | |
int exponent = ((d1 >> 20) & 0x7FF) - 1023; | |
if (exponent < 0) | |
return 0; | |
// Calculate how many bits to shift to remove the fraction | |
// As there is no check here for mask_bits <= 0, if the input double is large enough | |
// such that it can't have any fractional representation, thie function will return | |
// an incorrect result. | |
// As this is the GPU, I've decided against that branch. | |
int mask_bits = max(52 - exponent, 0); | |
// Calculate low 31-bits of the inverted mantissa mask | |
uint lo_shift_bits = min(mask_bits, 31); | |
uint lo_mask = (1 << lo_shift_bits) - 1; | |
// Can't do (1<<32)-1 with 32-bit integer so OR in the final bit if need be | |
lo_mask |= mask_bits > 31 ? 0x80000000 : 0; | |
// Calculate high 20 bits of the inverted mantissa mask | |
uint hi_shift_bits = max(mask_bits - 32, 0); | |
uint hi_mask = (1 << hi_shift_bits) - 1; | |
// Mask out the fractional bits and recombine as a double | |
d0 &= ~lo_mask; | |
d1 &= ~hi_mask; | |
v = asdouble(d0, d1); | |
return v; | |
} | |
// HLSL(SM5) doesn't support floor(double) so implement it in software | |
double Floor(double v) | |
{ | |
double r = MaskOutFraction(v); | |
return v - r < 0 ? r - 1 : r; | |
} | |
double2 Floor(double2 v) | |
{ | |
v.x = Floor(v.x); | |
v.y = Floor(v.y); | |
return v; | |
} | |
double3 Floor(double3 v) | |
{ | |
v.x = Floor(v.x); | |
v.y = Floor(v.y); | |
v.z = Floor(v.z); | |
return v; | |
} | |
double4 Floor(double4 v) | |
{ | |
v.x = Floor(v.x); | |
v.y = Floor(v.y); | |
v.z = Floor(v.z); | |
v.w = Floor(v.w); | |
return v; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment