Last active
September 20, 2021 23:51
-
-
Save Artem-B/ec4290809650f5092d61d6dafa6b0131 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
namespace { | |
template <int N> | |
struct __Tag; | |
# 54 "__clang_cuda_texture_intrinsics.h" 3 | |
template <class> | |
struct __FT; | |
template <> | |
struct __FT<float> { | |
using __bt = float; | |
using __ft = float4; | |
}; | |
template <> | |
struct __FT<char> { | |
using __bt = char; | |
using __ft = int4; | |
}; | |
template <> | |
struct __FT<signed char> { | |
using __bt = signed char; | |
using __ft = int4; | |
}; | |
template <> | |
struct __FT<unsigned char> { | |
using __bt = unsigned char; | |
using __ft = uint4; | |
}; | |
template <> | |
struct __FT<short> { | |
using __bt = short; | |
using __ft = int4; | |
}; | |
template <> | |
struct __FT<ushort> { | |
using __bt = ushort; | |
using __ft = uint4; | |
}; | |
template <> | |
struct __FT<int> { | |
using __bt = int; | |
using __ft = int4; | |
}; | |
template <> | |
struct __FT<uint> { | |
using __bt = uint; | |
using __ft = uint4; | |
}; | |
template <class __T> | |
struct __FT { | |
using __bt = decltype(__T::x); | |
using __ft = typename __FT<__bt>::__ft; | |
}; | |
template <class __op> | |
struct __tex_fetch_v4; | |
template <> | |
struct __tex_fetch_v4<__Tag<-1>>; | |
# 192 "__clang_cuda_texture_intrinsics.h" 3 | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1D_v2")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, | |
float __x) { | |
int4 __r; | |
asm("tex.1d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x) { | |
uint4 __r; | |
asm("tex.1d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x) { | |
float4 __r; | |
asm("tex.1d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5}];" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "f"(__x)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1D_rmnf_v2")>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
float __x) { | |
float4 __r; | |
asm("tex.1d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x) { | |
float4 __r; | |
asm("tex.1d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1Dfetch_v2")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, int __x); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, int __x) { | |
int4 __r; | |
asm("tex.1d.v4" | |
".s32." | |
"s32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__x)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
int __x) { | |
uint4 __r; | |
asm("tex.1d.v4" | |
".u32." | |
"s32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__x)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
int __x) { | |
float4 __r; | |
asm("tex.1d.v4" | |
".f32." | |
"s32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5}];" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "r"(__x)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1Dfetch_rmnf_v2")>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
int __x); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
int __x) { | |
float4 __r; | |
asm("tex.1d.v4" | |
".s32." | |
"s32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__x)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
int __x) { | |
float4 __r; | |
asm("tex.1d.v4" | |
".u32." | |
"s32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__x)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex1D")>> | |
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1D_v2")>> {}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex1Dfetch")>> | |
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1Dfetch_v2")>> {}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DGrad_v2")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __dPdx, float __dPdy); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __dPdx, float __dPdy) { | |
int4 __r; | |
asm("tex.grad.1d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5}], {%6}, {%7};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__dPdx), "f"(__dPdy)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __dPdx, | |
float __dPdy) { | |
uint4 __r; | |
asm("tex.grad.1d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5}], {%6}, {%7};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__dPdx), "f"(__dPdy)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __dPdx, | |
float __dPdy) { | |
float4 __r; | |
asm("tex.grad.1d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5}], {%6}, {%7};" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__dPdx), "f"(__dPdy)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DGrad_rmnf_v2")>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x, float __dPdx, | |
float __dPdy); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
float __x, float __dPdx, | |
float __dPdy) { | |
float4 __r; | |
asm("tex.grad.1d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5}], {%6}, {%7};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__dPdx), "f"(__dPdy)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __dPdx, | |
float __dPdy) { | |
float4 __r; | |
asm("tex.grad.1d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5}], {%6}, {%7};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__dPdx), "f"(__dPdy)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex1DGrad")>> | |
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DGrad_v2")>> {}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DLayered_v2")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
int __layer); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
int __layer) { | |
int4 __r; | |
asm("tex.a1d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, int __layer) { | |
uint4 __r; | |
asm("tex.a1d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, int __layer) { | |
float4 __r; | |
asm("tex.a1d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DLayered_rmnf_v2")>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x, int __layer); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
float __x, int __layer) { | |
float4 __r; | |
asm("tex.a1d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, int __layer) { | |
float4 __r; | |
asm("tex.a1d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex1DLayered")>> | |
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DLayered_v2")>> {}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DLayeredGrad_v2")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
int __layer, float __dPdx, | |
float __dPdy); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
int __layer, float __dPdx, | |
float __dPdy) { | |
int4 __r; | |
asm("tex.grad.a1d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}], {%7}, {%8};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__dPdx), "f"(__dPdy)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, int __layer, | |
float __dPdx, float __dPdy) { | |
uint4 __r; | |
asm("tex.grad.a1d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}], {%7}, {%8};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__dPdx), "f"(__dPdy)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, int __layer, | |
float __dPdx, float __dPdy) { | |
float4 __r; | |
asm("tex.grad.a1d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}], {%7}, {%8};" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__dPdx), "f"(__dPdy)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DLayeredGrad_rmnf_v2")>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x, int __layer, | |
float __dPdx, float __dPdy); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
float __x, int __layer, | |
float __dPdx, float __dPdy) { | |
float4 __r; | |
asm("tex.grad.a1d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}], {%7}, {%8};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__dPdx), "f"(__dPdy)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, int __layer, | |
float __dPdx, float __dPdy) { | |
float4 __r; | |
asm("tex.grad.a1d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}], {%7}, {%8};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__dPdx), "f"(__dPdy)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex1DLayeredGrad")>> | |
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DLayeredGrad_v2")>> {}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DLayeredLod_v2")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
int __layer, float __level); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
int __layer, float __level) { | |
int4 __r; | |
asm("tex.level.a1d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}], %7;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, int __layer, | |
float __level) { | |
uint4 __r; | |
asm("tex.level.a1d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}], %7;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, int __layer, | |
float __level) { | |
float4 __r; | |
asm("tex.level.a1d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}], %7;" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__level)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DLayeredLod_rmnf_v2")>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x, int __layer, | |
float __level); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
float __x, int __layer, | |
float __level) { | |
float4 __r; | |
asm("tex.level.a1d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}], %7;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, int __layer, | |
float __level) { | |
float4 __r; | |
asm("tex.level.a1d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}], %7;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__level)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex1DLayeredLod")>> | |
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DLayeredLod_v2")>> {}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DLod_v2")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __level); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __level) { | |
int4 __r; | |
asm("tex.level.1d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5}], %6;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __level) { | |
uint4 __r; | |
asm("tex.level.1d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5}], %6;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __level) { | |
float4 __r; | |
asm("tex.level.1d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5}], %6;" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__level)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DLod_rmnf_v2")>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x, float __level); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
float __x, float __level) { | |
float4 __r; | |
asm("tex.level.1d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5}], %6;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __level) { | |
float4 __r; | |
asm("tex.level.1d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5}], %6;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__level)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex1DLod")>> | |
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DLod_v2")>> {}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2D_v2")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y) { | |
int4 __r; | |
asm("tex.2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y) { | |
uint4 __r; | |
asm("tex.2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y) { | |
float4 __r; | |
asm("tex.2d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2D_rmnf_v2")>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x, float __y); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
float __x, float __y) { | |
float4 __r; | |
asm("tex.2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y) { | |
float4 __r; | |
asm("tex.2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2D")>> | |
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2D_v2")>> {}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2D_sparse")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, unsigned char* __ir); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, unsigned char* __ir) { | |
int4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t" | |
" selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
unsigned char* __ir) { | |
uint4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t" | |
" selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
unsigned char* __ir) { | |
float4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.2d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t" | |
" selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DGrad_v2")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, const float2* __dPdx, | |
const float2* __dPdy); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, const float2* __dPdx, | |
const float2* __dPdy) { | |
int4 __r; | |
asm("tex.grad.2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}], {%7, %8}, {%9, %10};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), | |
"f"(__dPdy->x), "f"(__dPdy->y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
const float2* __dPdx, | |
const float2* __dPdy) { | |
uint4 __r; | |
asm("tex.grad.2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}], {%7, %8}, {%9, %10};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), | |
"f"(__dPdy->x), "f"(__dPdy->y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
const float2* __dPdx, | |
const float2* __dPdy) { | |
float4 __r; | |
asm("tex.grad.2d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}], {%7, %8}, {%9, %10};" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), | |
"f"(__dPdy->x), "f"(__dPdy->y)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DGrad_rmnf_v2")>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x, float __y, | |
const float2* __dPdx, | |
const float2* __dPdy); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
const float2* __dPdx, | |
const float2* __dPdy) { | |
float4 __r; | |
asm("tex.grad.2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}], {%7, %8}, {%9, %10};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), | |
"f"(__dPdy->x), "f"(__dPdy->y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
const float2* __dPdx, | |
const float2* __dPdy) { | |
float4 __r; | |
asm("tex.grad.2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}], {%7, %8}, {%9, %10};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), | |
"f"(__dPdy->x), "f"(__dPdy->y)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2DGrad_v2")>> | |
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DGrad_v2")>> {}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2DGrad_sparse")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, const float2* __dPdx, | |
const float2* __dPdy, | |
unsigned char* __ir); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, const float2* __dPdx, | |
const float2* __dPdy, | |
unsigned char* __ir) { | |
int4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.grad.2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], {%8, %9}, {%10, %11};\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), | |
"f"(__dPdy->x), "f"(__dPdy->y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
const float2* __dPdx, | |
const float2* __dPdy, | |
unsigned char* __ir) { | |
uint4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.grad.2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], {%8, %9}, {%10, %11};\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), | |
"f"(__dPdy->x), "f"(__dPdy->y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
const float2* __dPdx, | |
const float2* __dPdy, | |
unsigned char* __ir) { | |
float4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.grad.2d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], {%8, %9}, {%10, %11};\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), | |
"f"(__dPdy->x), "f"(__dPdy->y)); | |
return __r; | |
} | |
}; | |
# 259 "__clang_cuda_texture_intrinsics.h" 3 | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DLayered_v2")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, int __layer); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, int __layer) { | |
int4 __r; | |
asm("tex.a2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
int __layer) { | |
uint4 __r; | |
asm("tex.a2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
int __layer) { | |
float4 __r; | |
asm("tex.a2d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DLayered_rmnf_v2")>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x, float __y, | |
int __layer); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
int __layer) { | |
float4 __r; | |
asm("tex.a2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
int __layer) { | |
float4 __r; | |
asm("tex.a2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2DLayered")>> | |
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DLayered_v2")>> {}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2DLayered_sparse")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, int __layer, | |
unsigned char* __ir); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, int __layer, | |
unsigned char* __ir) { | |
int4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.a2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, int __layer, | |
unsigned char* __ir) { | |
uint4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.a2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
int __layer, | |
unsigned char* __ir) { | |
float4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.a2d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
}; | |
# 273 "__clang_cuda_texture_intrinsics.h" 3 | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DLayeredGrad_v2")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, int __layer, | |
const float2* __dPdx, | |
const float2* __dPdy); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, int __layer, | |
const float2* __dPdx, | |
const float2* __dPdy) { | |
int4 __r; | |
asm("tex.grad.a2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], {%8, %9}, {%10, %11};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x), | |
"f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, int __layer, | |
const float2* __dPdx, | |
const float2* __dPdy) { | |
uint4 __r; | |
asm("tex.grad.a2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], {%8, %9}, {%10, %11};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x), | |
"f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
int __layer, | |
const float2* __dPdx, | |
const float2* __dPdy) { | |
float4 __r; | |
asm("tex.grad.a2d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], {%8, %9}, {%10, %11};" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x), | |
"f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DLayeredGrad_rmnf_v2")>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x, float __y, int __layer, | |
const float2* __dPdx, | |
const float2* __dPdy); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
float __x, float __y, int __layer, | |
const float2* __dPdx, | |
const float2* __dPdy) { | |
float4 __r; | |
asm("tex.grad.a2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], {%8, %9}, {%10, %11};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x), | |
"f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, int __layer, | |
const float2* __dPdx, | |
const float2* __dPdy) { | |
float4 __r; | |
asm("tex.grad.a2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], {%8, %9}, {%10, %11};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x), | |
"f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2DLayeredGrad_v2")>> | |
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DLayeredGrad_v2")>> {}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2DLayeredGrad_sparse")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, int __layer, | |
const float2* __dPdx, | |
const float2* __dPdy, | |
unsigned char* __ir); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, int __layer, | |
const float2* __dPdx, | |
const float2* __dPdy, | |
unsigned char* __ir) { | |
int4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.grad.a2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], {%9, %10}, {%11, " | |
"%12};\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x), | |
"f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, int __layer, | |
const float2* __dPdx, | |
const float2* __dPdy, | |
unsigned char* __ir) { | |
uint4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.grad.a2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], {%9, %10}, {%11, " | |
"%12};\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x), | |
"f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>( | |
cudaTextureObject_t __obj, float __x, float __y, int __layer, | |
const float2* __dPdx, const float2* __dPdy, unsigned char* __ir) { | |
float4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.grad.a2d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], {%9, %10}, {%11, " | |
"%12};\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x), | |
"f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y)); | |
return __r; | |
} | |
}; | |
# 293 "__clang_cuda_texture_intrinsics.h" 3 | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DLayeredLod_v2")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, int __layer, float __level); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, int __layer, | |
float __level) { | |
int4 __r; | |
asm("tex.level.a2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, int __layer, | |
float __level) { | |
uint4 __r; | |
asm("tex.level.a2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
int __layer, float __level) { | |
float4 __r; | |
asm("tex.level.a2d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__level)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DLayeredLod_rmnf_v2")>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x, float __y, int __layer, | |
float __level); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
float __x, float __y, int __layer, | |
float __level) { | |
float4 __r; | |
asm("tex.level.a2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, int __layer, | |
float __level) { | |
float4 __r; | |
asm("tex.level.a2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__level)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2DLayeredLod")>> | |
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DLayeredLod_v2")>> {}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2DLayeredLod_sparse")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, int __layer, float __level, | |
unsigned char* __ir); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, int __layer, | |
float __level, unsigned char* __ir) { | |
int4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.level.a2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, int __layer, | |
float __level, | |
unsigned char* __ir) { | |
uint4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.level.a2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
int __layer, float __level, | |
unsigned char* __ir) { | |
float4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.level.a2d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__level)); | |
return __r; | |
} | |
}; | |
# 308 "__clang_cuda_texture_intrinsics.h" 3 | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DLod_v2")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, float __level); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, float __level) { | |
int4 __r; | |
asm("tex.level.2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}], %7;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
float __level) { | |
uint4 __r; | |
asm("tex.level.2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}], %7;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
float __level) { | |
float4 __r; | |
asm("tex.level.2d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}], %7;" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__level)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DLod_rmnf_v2")>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x, float __y, | |
float __level); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
float __level) { | |
float4 __r; | |
asm("tex.level.2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}], %7;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
float __level) { | |
float4 __r; | |
asm("tex.level.2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}], %7;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__level)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2DLod")>> | |
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DLod_v2")>> {}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2DLod_sparse")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, float __level, | |
unsigned char* __ir); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, float __level, | |
unsigned char* __ir) { | |
int4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.level.2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], %8;\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
float __level, | |
unsigned char* __ir) { | |
uint4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.level.2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], %8;\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
float __level, | |
unsigned char* __ir) { | |
float4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.level.2d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], %8;\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__level)); | |
return __r; | |
} | |
}; | |
# 339 "__clang_cuda_texture_intrinsics.h" 3 | |
template <> | |
struct __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_v2") * 100 + 0>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, int __comp); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, int __comp) { | |
int4 __r; | |
asm("tld4.r.2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, int __comp) { | |
uint4 __r; | |
asm("tld4.r.2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
int __comp) { | |
float4 __r; | |
asm("tld4.r.2d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_rmnf_v2") * 100 + 0>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x, float __y, int __comp); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
float __x, float __y, int __comp) { | |
float4 __r; | |
asm("tld4.r.2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
int __comp) { | |
float4 __r; | |
asm("tld4.r.2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__itex2Dgather") * 100 + 0>> | |
: __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_v2") * 100 + 0>> {}; | |
template <> | |
struct __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__itex2Dgather_sparse") * 100 + 0>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, unsigned char* __ir, | |
int __comp); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, unsigned char* __ir, | |
int __comp) { | |
int4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tld4.r.2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
unsigned char* __ir, int __comp) { | |
uint4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tld4.r.2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
unsigned char* __ir, | |
int __comp) { | |
float4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tld4.r.2d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
}; | |
; | |
template <> | |
struct __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_v2") * 100 + 1>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, int __comp); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, int __comp) { | |
int4 __r; | |
asm("tld4.g.2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, int __comp) { | |
uint4 __r; | |
asm("tld4.g.2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
int __comp) { | |
float4 __r; | |
asm("tld4.g.2d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_rmnf_v2") * 100 + 1>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x, float __y, int __comp); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
float __x, float __y, int __comp) { | |
float4 __r; | |
asm("tld4.g.2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
int __comp) { | |
float4 __r; | |
asm("tld4.g.2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__itex2Dgather") * 100 + 1>> | |
: __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_v2") * 100 + 1>> {}; | |
template <> | |
struct __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__itex2Dgather_sparse") * 100 + 1>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, unsigned char* __ir, | |
int __comp); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, unsigned char* __ir, | |
int __comp) { | |
int4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tld4.g.2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
unsigned char* __ir, int __comp) { | |
uint4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tld4.g.2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
unsigned char* __ir, | |
int __comp) { | |
float4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tld4.g.2d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
}; | |
; | |
template <> | |
struct __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_v2") * 100 + 2>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, int __comp); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, int __comp) { | |
int4 __r; | |
asm("tld4.b.2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, int __comp) { | |
uint4 __r; | |
asm("tld4.b.2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
int __comp) { | |
float4 __r; | |
asm("tld4.b.2d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_rmnf_v2") * 100 + 2>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x, float __y, int __comp); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
float __x, float __y, int __comp) { | |
float4 __r; | |
asm("tld4.b.2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
int __comp) { | |
float4 __r; | |
asm("tld4.b.2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__itex2Dgather") * 100 + 2>> | |
: __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_v2") * 100 + 2>> {}; | |
template <> | |
struct __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__itex2Dgather_sparse") * 100 + 2>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, unsigned char* __ir, | |
int __comp); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, unsigned char* __ir, | |
int __comp) { | |
int4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tld4.b.2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
unsigned char* __ir, int __comp) { | |
uint4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tld4.b.2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
unsigned char* __ir, | |
int __comp) { | |
float4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tld4.b.2d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
}; | |
; | |
template <> | |
struct __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_v2") * 100 + 3>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, int __comp); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, int __comp) { | |
int4 __r; | |
asm("tld4.a.2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, int __comp) { | |
uint4 __r; | |
asm("tld4.a.2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
int __comp) { | |
float4 __r; | |
asm("tld4.a.2d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_rmnf_v2") * 100 + 3>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x, float __y, int __comp); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
float __x, float __y, int __comp) { | |
float4 __r; | |
asm("tld4.a.2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
int __comp) { | |
float4 __r; | |
asm("tld4.a.2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__itex2Dgather") * 100 + 3>> | |
: __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_v2") * 100 + 3>> {}; | |
template <> | |
struct __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__itex2Dgather_sparse") * 100 + 3>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, unsigned char* __ir, | |
int __comp); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, unsigned char* __ir, | |
int __comp) { | |
int4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tld4.a.2d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
unsigned char* __ir, int __comp) { | |
uint4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tld4.a.2d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
unsigned char* __ir, | |
int __comp) { | |
float4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tld4.a.2d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y)); | |
return __r; | |
} | |
}; | |
; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2Dgather_v2")>> { | |
template <class __T> | |
__attribute__((device)) static __T __run(cudaTextureObject_t __obj, float __x, | |
float __y, int __comp) { | |
switch (__comp) { | |
case 0: | |
return __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_v2") * 100 + | |
0>>::__run<__T>(__obj, __x, __y, __comp); | |
case 1: | |
return __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_v2") * 100 + | |
1>>::__run<__T>(__obj, __x, __y, __comp); | |
case 2: | |
return __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_v2") * 100 + | |
2>>::__run<__T>(__obj, __x, __y, __comp); | |
case 3: | |
return __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_v2") * 100 + | |
3>>::__run<__T>(__obj, __x, __y, __comp); | |
} | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2Dgather")>> | |
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2Dgather_v2")>> {}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2Dgather_rmnf_v2")>> { | |
template <class __T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x, float __y, | |
int __comp) { | |
switch (__comp) { | |
case 0: | |
return __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_rmnf_v2") * 100 + | |
0>>::__run<__T>(__obj, __x, __y, __comp); | |
case 1: | |
return __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_rmnf_v2") * 100 + | |
1>>::__run<__T>(__obj, __x, __y, __comp); | |
case 2: | |
return __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_rmnf_v2") * 100 + | |
2>>::__run<__T>(__obj, __x, __y, __comp); | |
case 3: | |
return __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_rmnf_v2") * 100 + | |
3>>::__run<__T>(__obj, __x, __y, __comp); | |
} | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2Dgather_sparse")>> { | |
template <class __T> | |
__attribute__((device)) static __T __run(cudaTextureObject_t __obj, float __x, | |
float __y, unsigned char* __ir, | |
int __comp) { | |
switch (__comp) { | |
case 0: | |
return __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__itex2Dgather_sparse") * 100 + | |
0>>::__run<__T>(__obj, __x, __y, __ir, __comp); | |
case 1: | |
return __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__itex2Dgather_sparse") * 100 + | |
1>>::__run<__T>(__obj, __x, __y, __ir, __comp); | |
case 2: | |
return __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__itex2Dgather_sparse") * 100 + | |
2>>::__run<__T>(__obj, __x, __y, __ir, __comp); | |
case 3: | |
return __tex_fetch_v4< | |
__Tag<10000 + __nvvm_texture_op("__itex2Dgather_sparse") * 100 + | |
3>>::__run<__T>(__obj, __x, __y, __ir, __comp); | |
} | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex3D_v2")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, float __z); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, float __z) { | |
int4 __r; | |
asm("tex.3d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z) { | |
uint4 __r; | |
asm("tex.3d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
float __z) { | |
float4 __r; | |
asm("tex.3d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex3D_rmnf_v2")>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x, float __y, float __z); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z) { | |
float4 __r; | |
asm("tex.3d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z) { | |
float4 __r; | |
asm("tex.3d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex3D")>> | |
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex3D_v2")>> {}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex3D_sparse")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, float __z, | |
unsigned char* __ir); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, float __z, | |
unsigned char* __ir) { | |
int4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.3d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
unsigned char* __ir) { | |
uint4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.3d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
unsigned char* __ir) { | |
float4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.3d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); | |
return __r; | |
} | |
}; | |
# 420 "__clang_cuda_texture_intrinsics.h" 3 | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex3DGrad_v2")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, float __z, | |
const float4* __dPdx, | |
const float4* __dPdy); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, float __z, | |
const float4* __dPdx, | |
const float4* __dPdy) { | |
int4 __r; | |
asm("tex.grad.3d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " | |
"{%8, %9, %10, %10}, {%11, %12, %13, %13};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), | |
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), | |
"f"(__dPdy->z)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
const float4* __dPdx, | |
const float4* __dPdy) { | |
uint4 __r; | |
asm("tex.grad.3d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " | |
"{%8, %9, %10, %10}, {%11, %12, %13, %13};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), | |
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), | |
"f"(__dPdy->z)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
const float4* __dPdx, | |
const float4* __dPdy) { | |
float4 __r; | |
asm("tex.grad.3d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " | |
"{%8, %9, %10, %10}, {%11, %12, %13, %13};" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), | |
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), | |
"f"(__dPdy->z)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex3DGrad_rmnf_v2")>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
const float4* __dPdx, | |
const float4* __dPdy); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
const float4* __dPdx, | |
const float4* __dPdy) { | |
float4 __r; | |
asm("tex.grad.3d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " | |
"{%8, %9, %10, %10}, {%11, %12, %13, %13};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), | |
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), | |
"f"(__dPdy->z)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
const float4* __dPdx, | |
const float4* __dPdy) { | |
float4 __r; | |
asm("tex.grad.3d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " | |
"{%8, %9, %10, %10}, {%11, %12, %13, %13};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), | |
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), | |
"f"(__dPdy->z)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex3DGrad_v2")>> | |
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex3DGrad_v2")>> {}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex3DGrad_sparse")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, float __z, | |
const float4* __dPdx, | |
const float4* __dPdy, | |
unsigned char* __ir); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, float __z, | |
const float4* __dPdx, | |
const float4* __dPdy, | |
unsigned char* __ir) { | |
int4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.grad.3d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], " | |
"{%9, %10, %11, %11}, {%12, %13, %14, %14};\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), | |
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), | |
"f"(__dPdy->z)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
const float4* __dPdx, | |
const float4* __dPdy, | |
unsigned char* __ir) { | |
uint4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.grad.3d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], " | |
"{%9, %10, %11, %11}, {%12, %13, %14, %14};\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), | |
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), | |
"f"(__dPdy->z)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
const float4* __dPdx, | |
const float4* __dPdy, | |
unsigned char* __ir) { | |
float4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.grad.3d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], " | |
"{%9, %10, %11, %11}, {%12, %13, %14, %14};\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), | |
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), | |
"f"(__dPdy->z)); | |
return __r; | |
} | |
}; | |
# 441 "__clang_cuda_texture_intrinsics.h" 3 | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex3DLod_v2")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, float __z, float __level); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, float __z, | |
float __level) { | |
int4 __r; | |
asm("tex.level.3d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
float __level) { | |
uint4 __r; | |
asm("tex.level.3d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
float __level) { | |
float4 __r; | |
asm("tex.level.3d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex3DLod_rmnf_v2")>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
float __level); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
float __level) { | |
float4 __r; | |
asm("tex.level.3d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
float __level) { | |
float4 __r; | |
asm("tex.level.3d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex3DLod")>> | |
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex3DLod_v2")>> {}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex3DLod_sparse")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, float __z, float __level, | |
unsigned char* __ir); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, float __z, float __level, | |
unsigned char* __ir) { | |
int4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.level.3d.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
float __level, | |
unsigned char* __ir) { | |
uint4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.level.3d.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
float __level, | |
unsigned char* __ir) { | |
float4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.level.3d.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); | |
return __r; | |
} | |
}; | |
# 457 "__clang_cuda_texture_intrinsics.h" 3 | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemap_v2")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, float __z); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, float __z) { | |
int4 __r; | |
asm("tex.cube.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z) { | |
uint4 __r; | |
asm("tex.cube.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, | |
float __z) { | |
float4 __r; | |
asm("tex.cube.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemap_rmnf_v2")>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x, float __y, float __z); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z) { | |
float4 __r; | |
asm("tex.cube.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z) { | |
float4 __r; | |
asm("tex.cube.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itexCubemap")>> | |
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemap_v2")>> {}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itexCubemap_sparse")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, float __z, | |
unsigned char* __ir); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, float __z, | |
unsigned char* __ir) { | |
int4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.cube.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
unsigned char* __ir) { | |
uint4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.cube.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
unsigned char* __ir) { | |
float4 __r; | |
asm("{.reg .pred %%p0;\n\t" | |
"tex.cube.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t" | |
"selp.u16 %4, 1, 0, %%p0; }" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w), | |
"=h"(*__ir) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapGrad_v2")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, float __z, | |
const float4* __dPdx, | |
const float4* __dPdy); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, float __z, | |
const float4* __dPdx, | |
const float4* __dPdy) { | |
int4 __r; | |
asm("tex.grad.cube.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " | |
"{%8, %9, %10, %10}, {%11, %12, %13, %13};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), | |
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), | |
"f"(__dPdy->z)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
const float4* __dPdx, | |
const float4* __dPdy) { | |
uint4 __r; | |
asm("tex.grad.cube.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " | |
"{%8, %9, %10, %10}, {%11, %12, %13, %13};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), | |
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), | |
"f"(__dPdy->z)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
const float4* __dPdx, | |
const float4* __dPdy) { | |
float4 __r; | |
asm("tex.grad.cube.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " | |
"{%8, %9, %10, %10}, {%11, %12, %13, %13};" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), | |
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), | |
"f"(__dPdy->z)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapGrad_rmnf_v2")>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
const float4* __dPdx, | |
const float4* __dPdy); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
const float4* __dPdx, | |
const float4* __dPdy) { | |
float4 __r; | |
asm("tex.grad.cube.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " | |
"{%8, %9, %10, %10}, {%11, %12, %13, %13};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), | |
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), | |
"f"(__dPdy->z)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
const float4* __dPdx, | |
const float4* __dPdy) { | |
float4 __r; | |
asm("tex.grad.cube.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " | |
"{%8, %9, %10, %10}, {%11, %12, %13, %13};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), | |
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), | |
"f"(__dPdy->z)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itexCubemapGrad_v2")>> | |
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapGrad_v2")>> {}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapLayered_v2")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, float __z, int __layer); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, float __z, int __layer) { | |
int4 __r; | |
asm("tex.acube.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
int __layer) { | |
uint4 __r; | |
asm("tex.acube.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
int __layer) { | |
float4 __r; | |
asm("tex.acube.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapLayered_rmnf_v2")>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
int __layer); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
int __layer) { | |
float4 __r; | |
asm("tex.acube.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
int __layer) { | |
float4 __r; | |
asm("tex.acube.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itexCubemapLayered")>> | |
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapLayered_v2")>> {}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapLayeredGrad_v2")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, float __z, int __layer, | |
const float4* __dPdx, | |
const float4* __dPdy); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, float __z, int __layer, | |
const float4* __dPdx, | |
const float4* __dPdy) { | |
int4 __r; | |
asm("tex.grad.acube.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], " | |
"{%9, %10, %11, %11}, {%12, %13, %14, %14};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z), | |
"f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), | |
"f"(__dPdy->y), "f"(__dPdy->z)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
int __layer, const float4* __dPdx, | |
const float4* __dPdy) { | |
uint4 __r; | |
asm("tex.grad.acube.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], " | |
"{%9, %10, %11, %11}, {%12, %13, %14, %14};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z), | |
"f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), | |
"f"(__dPdy->y), "f"(__dPdy->z)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
int __layer, | |
const float4* __dPdx, | |
const float4* __dPdy) { | |
float4 __r; | |
asm("tex.grad.acube.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], " | |
"{%9, %10, %11, %11}, {%12, %13, %14, %14};" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z), | |
"f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), | |
"f"(__dPdy->y), "f"(__dPdy->z)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4< | |
__Tag<__nvvm_texture_op("__texCubemapLayeredGrad_rmnf_v2")>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
int __layer, const float4* __dPdx, | |
const float4* __dPdy); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
int __layer, const float4* __dPdx, | |
const float4* __dPdy) { | |
float4 __r; | |
asm("tex.grad.acube.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], " | |
"{%9, %10, %11, %11}, {%12, %13, %14, %14};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z), | |
"f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), | |
"f"(__dPdy->y), "f"(__dPdy->z)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
int __layer, const float4* __dPdx, | |
const float4* __dPdy) { | |
float4 __r; | |
asm("tex.grad.acube.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], " | |
"{%9, %10, %11, %11}, {%12, %13, %14, %14};" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z), | |
"f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), | |
"f"(__dPdy->y), "f"(__dPdy->z)); | |
return __r; | |
} | |
}; | |
# 493 "__clang_cuda_texture_intrinsics.h" 3 | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itexCubemapLayeredGrad_v2")>> | |
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapLayeredGrad_v2")>> {}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapLayeredLod_v2")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, float __z, int __layer, | |
float __level); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, float __z, int __layer, | |
float __level) { | |
int4 __r; | |
asm("tex.level.acube.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], %9;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
int __layer, float __level) { | |
uint4 __r; | |
asm("tex.level.acube.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], %9;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
int __layer, float __level) { | |
float4 __r; | |
asm("tex.level.acube.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], %9;" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4< | |
__Tag<__nvvm_texture_op("__texCubemapLayeredLod_rmnf_v2")>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
int __layer, float __level); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
int __layer, float __level) { | |
float4 __r; | |
asm("tex.level.acube.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], %9;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
int __layer, float __level) { | |
float4 __r; | |
asm("tex.level.acube.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], %9;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itexCubemapLayeredLod")>> | |
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapLayeredLod_v2")>> {}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapLod_v2")>> { | |
template <class T> | |
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, | |
float __y, float __z, float __level); | |
template <> | |
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, | |
float __y, float __z, | |
float __level) { | |
int4 __r; | |
asm("tex.level.cube.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
float __level) { | |
uint4 __r; | |
asm("tex.level.cube.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
float __level) { | |
float4 __r; | |
asm("tex.level.cube.v4" | |
".f32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" | |
: "=" | |
"f"(__r.x), | |
"=" | |
"f"(__r.y), | |
"=" | |
"f"(__r.z), | |
"=" | |
"f"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapLod_rmnf_v2")>> { | |
template <class T> | |
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
float __level); | |
template <> | |
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
float __level) { | |
float4 __r; | |
asm("tex.level.cube.v4" | |
".s32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); | |
return __r; | |
} | |
template <> | |
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, | |
float __x, float __y, float __z, | |
float __level) { | |
float4 __r; | |
asm("tex.level.cube.v4" | |
".u32." | |
"f32" | |
"\t" | |
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" | |
: "=" | |
"r"(__r.x), | |
"=" | |
"r"(__r.y), | |
"=" | |
"r"(__r.z), | |
"=" | |
"r"(__r.w) | |
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); | |
return __r; | |
} | |
}; | |
template <> | |
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itexCubemapLod")>> | |
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapLod_v2")>> {}; | |
template <class __DestT, class __SrcT> | |
struct __convert { | |
template <bool __IsConvertible = std::is_convertible<__DestT, __SrcT>::value, | |
int __N = sizeof(__DestT) / sizeof(typename __FT<__DestT>::__bt)> | |
__attribute__((device)) static __DestT __run(__SrcT __v) { | |
return __v; | |
} | |
template <> | |
__attribute__((device)) static __DestT __run<false, 1>(__SrcT __v) { | |
return {__v.x}; | |
} | |
template <> | |
__attribute__((device)) static __DestT __run<false, 2>(__SrcT __v) { | |
return {__v.x, __v.y}; | |
} | |
template <> | |
__attribute__((device)) static __DestT __run<false, 3>(__SrcT __v) { | |
return {__v.x, __v.y, __v.z}; | |
} | |
template <> | |
__attribute__((device)) static __DestT __run<false, 4>(__SrcT __v) { | |
return {__v.x, __v.y, __v.z, __v.w}; | |
} | |
}; | |
template <class __op, class __T, class... __Args> | |
__attribute__((device)) static void __tex_fetch(__T* __ptr, | |
cudaTextureObject_t __handle, | |
__Args... __args) { | |
using __FT = typename __FT<__T>::__ft; | |
*__ptr = __convert<__T, __FT>::__run( | |
__tex_fetch_v4<__op>::template __run<__FT>(__handle, __args...)); | |
} | |
template <class __T> | |
__attribute__((device)) cudaTextureObject_t __tex_handle_to_obj(__T __handle) { | |
cudaTextureObject_t __obj; | |
asm("mov.b64 %0, %1; " : "=l"(__obj) : "l"(__handle)); | |
return __obj; | |
} | |
template <class __op, class __T, class __HandleT, class... __Args> | |
__attribute__((device)) static void __tex_fetch(__T* __ptr, __HandleT __handle, | |
__Args... __args) { | |
using __FT = typename __FT<__T>::__ft; | |
*__ptr = | |
__convert<__T, __FT>::__run(__tex_fetch_v4<__op>::template __run<__FT>( | |
__tex_handle_to_obj(__handle), __args...)); | |
} | |
template <class __op, class __DataT, class __RetT, int __TexT, class... __Args> | |
__attribute__((device)) static void __tex_fetch( | |
__DataT*, __RetT* __ptr, | |
texture<__DataT, __TexT, cudaReadModeNormalizedFloat> __handle, | |
__Args... __args) { | |
using __FT = typename __FT<__DataT>::__ft; | |
*__ptr = __convert<__RetT, float4>::__run( | |
__tex_fetch_v4<__op>::template __run<__FT>(__tex_handle_to_obj(__handle), | |
__args...)); | |
} | |
template <class __op, class __DataT, class __RetT, int __TexT, class... __Args> | |
__attribute__((device)) static void __tex_fetch( | |
__DataT*, __RetT* __ptr, | |
texture<__DataT, __TexT, cudaReadModeElementType> __handle, | |
__Args... __args) { | |
using __FT = typename __FT<__DataT>::__ft; | |
*__ptr = | |
__convert<__RetT, __FT>::__run(__tex_fetch_v4<__op>::template __run<__FT>( | |
__tex_handle_to_obj(__handle), __args...)); | |
} | |
} // namespace |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment