Created
November 3, 2020 20:01
-
-
Save BtbN/3ddad5d136208c45badd75f994cf7a70 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.visible .entry Subsample_Bicubic_uchar( | |
.param .u64 Subsample_Bicubic_uchar_param_0, | |
.param .u64 Subsample_Bicubic_uchar_param_1, | |
.param .u32 Subsample_Bicubic_uchar_param_2, | |
.param .u32 Subsample_Bicubic_uchar_param_3, | |
.param .u32 Subsample_Bicubic_uchar_param_4, | |
.param .u32 Subsample_Bicubic_uchar_param_5, | |
.param .u32 Subsample_Bicubic_uchar_param_6, | |
.param .u32 Subsample_Bicubic_uchar_param_7 | |
) | |
{ | |
.reg .pred %p<5>; | |
.reg .f32 %f<138>; | |
.reg .b32 %r<17>; | |
.reg .b64 %rd<6>; | |
ld.param.u64 %rd1, [Subsample_Bicubic_uchar_param_0]; | |
ld.param.u64 %rd2, [Subsample_Bicubic_uchar_param_1]; | |
ld.param.u32 %r3, [Subsample_Bicubic_uchar_param_2]; | |
ld.param.u32 %r4, [Subsample_Bicubic_uchar_param_3]; | |
ld.param.u32 %r5, [Subsample_Bicubic_uchar_param_4]; | |
ld.param.u32 %r6, [Subsample_Bicubic_uchar_param_5]; | |
ld.param.u32 %r7, [Subsample_Bicubic_uchar_param_6]; | |
ld.param.u32 %r8, [Subsample_Bicubic_uchar_param_7]; | |
mov.u32 %r9, %ctaid.x; | |
mov.u32 %r10, %ntid.x; | |
mov.u32 %r11, %tid.x; | |
mad.lo.s32 %r1, %r10, %r9, %r11; | |
mov.u32 %r12, %ntid.y; | |
mov.u32 %r13, %ctaid.y; | |
mov.u32 %r14, %tid.y; | |
mad.lo.s32 %r2, %r12, %r13, %r14; | |
setp.ge.s32 %p1, %r2, %r4; | |
setp.ge.s32 %p2, %r1, %r3; | |
or.pred %p3, %p1, %p2; | |
@%p3 bra BB0_2; | |
cvt.rn.f32.s32 %f1, %r6; | |
cvt.rn.f32.s32 %f2, %r3; | |
div.rn.f32 %f3, %f1, %f2; | |
cvt.rn.f32.s32 %f4, %r4; | |
cvt.rn.f32.s32 %f5, %r7; | |
div.rn.f32 %f6, %f5, %f4; | |
cvt.rn.f32.s32 %f7, %r1; | |
add.f32 %f8, %f7, 0f3F000000; | |
fma.rn.f32 %f9, %f8, %f3, 0fBF000000; | |
cvt.rn.f32.s32 %f10, %r2; | |
add.f32 %f11, %f10, 0f3F000000; | |
fma.rn.f32 %f12, %f11, %f6, 0fBF000000; | |
cvt.rmi.f32.f32 %f13, %f9; | |
cvt.rmi.f32.f32 %f14, %f12; | |
add.f32 %f15, %f13, 0fBF800000; | |
add.f32 %f16, %f14, 0fBF800000; | |
tex.2d.v4.f32.f32 {%f17, %f18, %f19, %f20}, [%rd1, {%f15, %f16}]; | |
tex.2d.v4.f32.f32 {%f21, %f22, %f23, %f24}, [%rd1, {%f13, %f16}]; | |
add.f32 %f25, %f13, 0f3F800000; | |
tex.2d.v4.f32.f32 {%f26, %f27, %f28, %f29}, [%rd1, {%f25, %f16}]; | |
add.f32 %f30, %f13, 0f40000000; | |
tex.2d.v4.f32.f32 {%f31, %f32, %f33, %f34}, [%rd1, {%f30, %f16}]; | |
sub.f32 %f35, %f9, %f13; | |
add.f32 %f36, %f35, 0f3F800000; | |
fma.rn.f32 %f37, %f36, 0fBF400000, 0f40700000; | |
mov.f32 %f38, 0f3F800000; | |
sub.f32 %f39, %f38, %f35; | |
fma.rn.f32 %f40, %f35, 0f3FA00000, 0fC0100000; | |
fma.rn.f32 %f41, %f36, %f37, 0fC0C00000; | |
mul.f32 %f42, %f35, %f40; | |
fma.rn.f32 %f43, %f39, 0f3FA00000, 0fC0100000; | |
fma.rn.f32 %f44, %f36, %f41, 0f40400000; | |
mul.f32 %f45, %f39, %f43; | |
fma.rn.f32 %f46, %f35, %f42, 0f3F800000; | |
sub.f32 %f47, %f38, %f44; | |
sub.f32 %f48, %f47, %f46; | |
fma.rn.f32 %f49, %f39, %f45, 0f3F800000; | |
sub.f32 %f50, %f48, %f49; | |
mul.f32 %f51, %f21, %f46; | |
fma.rn.f32 %f52, %f17, %f44, %f51; | |
fma.rn.f32 %f53, %f26, %f49, %f52; | |
fma.rn.f32 %f54, %f31, %f50, %f53; | |
tex.2d.v4.f32.f32 {%f55, %f56, %f57, %f58}, [%rd1, {%f15, %f14}]; | |
tex.2d.v4.f32.f32 {%f59, %f60, %f61, %f62}, [%rd1, {%f13, %f14}]; | |
tex.2d.v4.f32.f32 {%f63, %f64, %f65, %f66}, [%rd1, {%f25, %f14}]; | |
tex.2d.v4.f32.f32 {%f67, %f68, %f69, %f70}, [%rd1, {%f30, %f14}]; | |
mul.f32 %f71, %f59, %f46; | |
fma.rn.f32 %f72, %f55, %f44, %f71; | |
fma.rn.f32 %f73, %f63, %f49, %f72; | |
fma.rn.f32 %f74, %f67, %f50, %f73; | |
add.f32 %f75, %f14, 0f3F800000; | |
tex.2d.v4.f32.f32 {%f76, %f77, %f78, %f79}, [%rd1, {%f15, %f75}]; | |
tex.2d.v4.f32.f32 {%f80, %f81, %f82, %f83}, [%rd1, {%f13, %f75}]; | |
tex.2d.v4.f32.f32 {%f84, %f85, %f86, %f87}, [%rd1, {%f25, %f75}]; | |
tex.2d.v4.f32.f32 {%f88, %f89, %f90, %f91}, [%rd1, {%f30, %f75}]; | |
mul.f32 %f92, %f80, %f46; | |
fma.rn.f32 %f93, %f76, %f44, %f92; | |
fma.rn.f32 %f94, %f84, %f49, %f93; | |
fma.rn.f32 %f95, %f50, %f88, %f94; | |
add.f32 %f96, %f14, 0f40000000; | |
tex.2d.v4.f32.f32 {%f97, %f98, %f99, %f100}, [%rd1, {%f15, %f96}]; | |
tex.2d.v4.f32.f32 {%f101, %f102, %f103, %f104}, [%rd1, {%f13, %f96}]; | |
tex.2d.v4.f32.f32 {%f105, %f106, %f107, %f108}, [%rd1, {%f25, %f96}]; | |
tex.2d.v4.f32.f32 {%f109, %f110, %f111, %f112}, [%rd1, {%f30, %f96}]; | |
sub.f32 %f113, %f12, %f14; | |
add.f32 %f114, %f113, 0f3F800000; | |
fma.rn.f32 %f115, %f114, 0fBF400000, 0f40700000; | |
sub.f32 %f116, %f38, %f113; | |
fma.rn.f32 %f117, %f113, 0f3FA00000, 0fC0100000; | |
fma.rn.f32 %f118, %f114, %f115, 0fC0C00000; | |
mul.f32 %f119, %f113, %f117; | |
fma.rn.f32 %f120, %f116, 0f3FA00000, 0fC0100000; | |
fma.rn.f32 %f121, %f114, %f118, 0f40400000; | |
mul.f32 %f122, %f116, %f120; | |
fma.rn.f32 %f123, %f113, %f119, 0f3F800000; | |
sub.f32 %f124, %f38, %f121; | |
sub.f32 %f125, %f124, %f123; | |
fma.rn.f32 %f126, %f116, %f122, 0f3F800000; | |
sub.f32 %f127, %f125, %f126; | |
setp.gt.s32 %p4, %r8, 8; | |
selp.f32 %f128, 0f477FFF00, 0f437F0000, %p4; | |
mul.f32 %f129, %f46, %f101; | |
fma.rn.f32 %f130, %f44, %f97, %f129; | |
fma.rn.f32 %f131, %f49, %f105, %f130; | |
fma.rn.f32 %f132, %f50, %f109, %f131; | |
mul.f32 %f133, %f123, %f74; | |
fma.rn.f32 %f134, %f121, %f54, %f133; | |
fma.rn.f32 %f135, %f126, %f95, %f134; | |
fma.rn.f32 %f136, %f127, %f132, %f135; | |
mul.f32 %f137, %f128, %f136; | |
cvt.rzi.u32.f32 %r15, %f137; | |
mad.lo.s32 %r16, %r2, %r5, %r1; | |
cvta.to.global.u64 %rd3, %rd2; | |
cvt.s64.s32 %rd4, %r16; | |
add.s64 %rd5, %rd3, %rd4; | |
st.global.u8 [%rd5], %r15; | |
BB0_2: | |
ret; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// .globl Subsample_Lanczos_uchar | |
.visible .entry Subsample_Lanczos_uchar( | |
.param .u64 Subsample_Lanczos_uchar_param_0, | |
.param .u64 Subsample_Lanczos_uchar_param_1, | |
.param .u32 Subsample_Lanczos_uchar_param_2, | |
.param .u32 Subsample_Lanczos_uchar_param_3, | |
.param .u32 Subsample_Lanczos_uchar_param_4, | |
.param .u32 Subsample_Lanczos_uchar_param_5, | |
.param .u32 Subsample_Lanczos_uchar_param_6, | |
.param .u32 Subsample_Lanczos_uchar_param_7 | |
) | |
{ | |
.local .align 4 .b8 __local_depot6[28]; | |
.reg .b64 %SP; | |
.reg .b64 %SPL; | |
.reg .pred %p<221>; | |
.reg .f32 %f<953>; | |
.reg .b32 %r<1480>; | |
.reg .b64 %rd<179>; | |
mov.u64 %SPL, __local_depot6; | |
cvta.local.u64 %SP, %SPL; | |
ld.param.u64 %rd97, [Subsample_Lanczos_uchar_param_0]; | |
ld.param.u64 %rd98, [Subsample_Lanczos_uchar_param_1]; | |
ld.param.u32 %r562, [Subsample_Lanczos_uchar_param_2]; | |
ld.param.u32 %r563, [Subsample_Lanczos_uchar_param_3]; | |
ld.param.u32 %r564, [Subsample_Lanczos_uchar_param_4]; | |
ld.param.u32 %r565, [Subsample_Lanczos_uchar_param_5]; | |
ld.param.u32 %r566, [Subsample_Lanczos_uchar_param_6]; | |
ld.param.u32 %r567, [Subsample_Lanczos_uchar_param_7]; | |
mov.u32 %r568, %ntid.x; | |
mov.u32 %r569, %ctaid.x; | |
mov.u32 %r570, %tid.x; | |
mad.lo.s32 %r1, %r568, %r569, %r570; | |
mov.u32 %r571, %ntid.y; | |
mov.u32 %r572, %ctaid.y; | |
mov.u32 %r573, %tid.y; | |
mad.lo.s32 %r2, %r571, %r572, %r573; | |
setp.ge.s32 %p1, %r2, %r563; | |
setp.ge.s32 %p2, %r1, %r562; | |
or.pred %p3, %p1, %p2; | |
@%p3 bra BB6_386; | |
cvt.rn.f32.s32 %f309, %r565; | |
cvt.rn.f32.s32 %f310, %r562; | |
div.rn.f32 %f311, %f309, %f310; | |
cvt.rn.f32.s32 %f312, %r563; | |
cvt.rn.f32.s32 %f313, %r566; | |
div.rn.f32 %f314, %f313, %f312; | |
cvt.rn.f32.s32 %f315, %r1; | |
add.f32 %f316, %f315, 0f3F000000; | |
fma.rn.f32 %f317, %f316, %f311, 0fBF000000; | |
cvt.rn.f32.s32 %f318, %r2; | |
add.f32 %f319, %f318, 0f3F000000; | |
fma.rn.f32 %f1, %f319, %f314, 0fBF000000; | |
cvt.rmi.f32.f32 %f2, %f317; | |
cvt.rmi.f32.f32 %f3, %f1; | |
sub.f32 %f320, %f317, %f2; | |
add.f32 %f321, %f320, 0f3F800000; | |
mul.f32 %f4, %f321, 0f40490FDB; | |
mul.f32 %f5, %f320, 0f40490FDB; | |
add.f32 %f322, %f320, 0fBF800000; | |
mul.f32 %f6, %f322, 0f40490FDB; | |
add.f32 %f323, %f320, 0fC0000000; | |
mul.f32 %f7, %f323, 0f40490FDB; | |
setp.eq.f32 %p4, %f4, 0f00000000; | |
mov.f32 %f952, 0f3F800000; | |
mov.f32 %f861, %f952; | |
@%p4 bra BB6_49; | |
abs.f32 %f324, %f4; | |
setp.neu.f32 %p5, %f324, 0f7F800000; | |
mov.f32 %f849, %f4; | |
@%p5 bra BB6_4; | |
mov.f32 %f325, 0f00000000; | |
mul.rn.f32 %f849, %f4, %f325; | |
BB6_4: | |
mul.f32 %f326, %f849, 0f3F22F983; | |
cvt.rni.s32.f32 %r1329, %f326; | |
cvt.rn.f32.s32 %f327, %r1329; | |
neg.f32 %f328, %f327; | |
mov.f32 %f329, 0f3FC90FDA; | |
fma.rn.f32 %f330, %f328, %f329, %f849; | |
mov.f32 %f331, 0f33A22168; | |
fma.rn.f32 %f332, %f328, %f331, %f330; | |
mov.f32 %f333, 0f27C234C5; | |
fma.rn.f32 %f850, %f328, %f333, %f332; | |
abs.f32 %f334, %f849; | |
setp.leu.f32 %p6, %f334, 0f47CE4780; | |
@%p6 bra BB6_15; | |
mov.b32 %r4, %f849; | |
shl.b32 %r576, %r4, 8; | |
or.b32 %r5, %r576, -2147483648; | |
add.u64 %rd100, %SP, 0; | |
add.u64 %rd148, %SPL, 0; | |
mov.u32 %r1321, 0; | |
mov.u64 %rd147, __cudart_i2opi_f; | |
mov.u32 %r1320, -6; | |
BB6_6: | |
.pragma "nounroll"; | |
ld.const.u32 %r579, [%rd147]; | |
// inline asm | |
{ | |
mad.lo.cc.u32 %r577, %r579, %r5, %r1321; | |
madc.hi.u32 %r1321, %r579, %r5, 0; | |
} | |
// inline asm | |
st.local.u32 [%rd148], %r577; | |
add.s64 %rd148, %rd148, 4; | |
add.s64 %rd147, %rd147, 4; | |
add.s32 %r1320, %r1320, 1; | |
setp.ne.s32 %p7, %r1320, 0; | |
@%p7 bra BB6_6; | |
bfe.u32 %r582, %r4, 23, 8; | |
add.s32 %r583, %r582, -128; | |
shr.u32 %r584, %r583, 5; | |
and.b32 %r10, %r4, -2147483648; | |
cvta.to.local.u64 %rd102, %rd100; | |
st.local.u32 [%rd102+24], %r1321; | |
bfe.u32 %r11, %r4, 23, 5; | |
mov.u32 %r585, 6; | |
sub.s32 %r586, %r585, %r584; | |
mul.wide.s32 %rd103, %r586, 4; | |
add.s64 %rd6, %rd102, %rd103; | |
ld.local.u32 %r1322, [%rd6]; | |
ld.local.u32 %r1323, [%rd6+-4]; | |
setp.eq.s32 %p8, %r11, 0; | |
@%p8 bra BB6_9; | |
mov.u32 %r587, 32; | |
sub.s32 %r588, %r587, %r11; | |
shr.u32 %r589, %r1323, %r588; | |
shl.b32 %r590, %r1322, %r11; | |
add.s32 %r1322, %r589, %r590; | |
ld.local.u32 %r591, [%rd6+-8]; | |
shr.u32 %r592, %r591, %r588; | |
shl.b32 %r593, %r1323, %r11; | |
add.s32 %r1323, %r592, %r593; | |
BB6_9: | |
shr.u32 %r594, %r1323, 30; | |
shl.b32 %r595, %r1322, 2; | |
add.s32 %r1324, %r594, %r595; | |
shl.b32 %r19, %r1323, 2; | |
shr.u32 %r596, %r1324, 31; | |
shr.u32 %r597, %r1322, 30; | |
add.s32 %r20, %r596, %r597; | |
setp.eq.s32 %p9, %r596, 0; | |
@%p9 bra BB6_10; | |
not.b32 %r598, %r1324; | |
neg.s32 %r1326, %r19; | |
setp.eq.s32 %p10, %r19, 0; | |
selp.u32 %r599, 1, 0, %p10; | |
add.s32 %r1324, %r599, %r598; | |
xor.b32 %r1325, %r10, -2147483648; | |
bra.uni BB6_12; | |
BB6_10: | |
mov.u32 %r1325, %r10; | |
mov.u32 %r1326, %r19; | |
BB6_12: | |
clz.b32 %r1328, %r1324; | |
setp.eq.s32 %p11, %r1328, 0; | |
shl.b32 %r600, %r1324, %r1328; | |
mov.u32 %r601, 32; | |
sub.s32 %r602, %r601, %r1328; | |
shr.u32 %r603, %r1326, %r602; | |
add.s32 %r604, %r603, %r600; | |
selp.b32 %r28, %r1324, %r604, %p11; | |
mov.u32 %r605, -921707870; | |
mul.hi.u32 %r1327, %r28, %r605; | |
setp.eq.s32 %p12, %r10, 0; | |
neg.s32 %r606, %r20; | |
selp.b32 %r1329, %r20, %r606, %p12; | |
setp.lt.s32 %p13, %r1327, 1; | |
@%p13 bra BB6_14; | |
mul.lo.s32 %r607, %r28, -921707870; | |
shr.u32 %r608, %r607, 31; | |
shl.b32 %r609, %r1327, 1; | |
add.s32 %r1327, %r608, %r609; | |
add.s32 %r1328, %r1328, 1; | |
BB6_14: | |
mov.u32 %r610, 126; | |
sub.s32 %r611, %r610, %r1328; | |
shl.b32 %r612, %r611, 23; | |
add.s32 %r613, %r1327, 1; | |
shr.u32 %r614, %r613, 7; | |
add.s32 %r615, %r614, 1; | |
shr.u32 %r616, %r615, 1; | |
add.s32 %r617, %r616, %r612; | |
or.b32 %r618, %r617, %r1325; | |
mov.b32 %f850, %r618; | |
BB6_15: | |
mul.rn.f32 %f13, %f850, %f850; | |
and.b32 %r36, %r1329, 1; | |
setp.eq.s32 %p14, %r36, 0; | |
@%p14 bra BB6_17; | |
mov.f32 %f335, 0fBAB6061A; | |
mov.f32 %f336, 0f37CCF5CE; | |
fma.rn.f32 %f851, %f336, %f13, %f335; | |
bra.uni BB6_18; | |
BB6_17: | |
mov.f32 %f337, 0f3C08839E; | |
mov.f32 %f338, 0fB94CA1F9; | |
fma.rn.f32 %f851, %f338, %f13, %f337; | |
BB6_18: | |
@%p14 bra BB6_20; | |
mov.f32 %f339, 0f3D2AAAA5; | |
fma.rn.f32 %f340, %f851, %f13, %f339; | |
mov.f32 %f341, 0fBF000000; | |
fma.rn.f32 %f852, %f340, %f13, %f341; | |
bra.uni BB6_21; | |
BB6_20: | |
mov.f32 %f342, 0fBE2AAAA3; | |
fma.rn.f32 %f343, %f851, %f13, %f342; | |
mov.f32 %f344, 0f00000000; | |
fma.rn.f32 %f852, %f343, %f13, %f344; | |
BB6_21: | |
fma.rn.f32 %f853, %f852, %f850, %f850; | |
@%p14 bra BB6_23; | |
mov.f32 %f345, 0f3F800000; | |
fma.rn.f32 %f853, %f852, %f13, %f345; | |
BB6_23: | |
and.b32 %r619, %r1329, 2; | |
setp.eq.s32 %p17, %r619, 0; | |
@%p17 bra BB6_25; | |
mov.f32 %f346, 0f00000000; | |
mov.f32 %f347, 0fBF800000; | |
fma.rn.f32 %f853, %f853, %f347, %f346; | |
BB6_25: | |
mul.f32 %f855, %f4, 0f3F000000; | |
abs.f32 %f348, %f855; | |
setp.neu.f32 %p18, %f348, 0f7F800000; | |
@%p18 bra BB6_27; | |
mov.f32 %f349, 0f00000000; | |
mul.rn.f32 %f855, %f855, %f349; | |
BB6_27: | |
mul.f32 %f350, %f855, 0f3F22F983; | |
cvt.rni.s32.f32 %r1339, %f350; | |
cvt.rn.f32.s32 %f351, %r1339; | |
neg.f32 %f352, %f351; | |
fma.rn.f32 %f354, %f352, %f329, %f855; | |
fma.rn.f32 %f356, %f352, %f331, %f354; | |
fma.rn.f32 %f856, %f352, %f333, %f356; | |
abs.f32 %f358, %f855; | |
setp.leu.f32 %p19, %f358, 0f47CE4780; | |
@%p19 bra BB6_38; | |
mov.b32 %r38, %f855; | |
shr.u32 %r39, %r38, 23; | |
shl.b32 %r622, %r38, 8; | |
or.b32 %r40, %r622, -2147483648; | |
add.u64 %rd105, %SP, 0; | |
add.u64 %rd150, %SPL, 0; | |
mov.u32 %r1331, 0; | |
mov.u64 %rd149, __cudart_i2opi_f; | |
mov.u32 %r1330, -6; | |
BB6_29: | |
.pragma "nounroll"; | |
ld.const.u32 %r625, [%rd149]; | |
// inline asm | |
{ | |
mad.lo.cc.u32 %r623, %r625, %r40, %r1331; | |
madc.hi.u32 %r1331, %r625, %r40, 0; | |
} | |
// inline asm | |
st.local.u32 [%rd150], %r623; | |
add.s64 %rd150, %rd150, 4; | |
add.s64 %rd149, %rd149, 4; | |
add.s32 %r1330, %r1330, 1; | |
setp.ne.s32 %p20, %r1330, 0; | |
@%p20 bra BB6_29; | |
and.b32 %r628, %r39, 255; | |
add.s32 %r629, %r628, -128; | |
shr.u32 %r630, %r629, 5; | |
and.b32 %r45, %r38, -2147483648; | |
cvta.to.local.u64 %rd107, %rd105; | |
st.local.u32 [%rd107+24], %r1331; | |
mov.u32 %r631, 6; | |
sub.s32 %r632, %r631, %r630; | |
mul.wide.s32 %rd108, %r632, 4; | |
add.s64 %rd12, %rd107, %rd108; | |
ld.local.u32 %r1332, [%rd12]; | |
ld.local.u32 %r1333, [%rd12+-4]; | |
and.b32 %r48, %r39, 31; | |
setp.eq.s32 %p21, %r48, 0; | |
@%p21 bra BB6_32; | |
mov.u32 %r633, 32; | |
sub.s32 %r634, %r633, %r48; | |
shr.u32 %r635, %r1333, %r634; | |
shl.b32 %r636, %r1332, %r48; | |
add.s32 %r1332, %r635, %r636; | |
ld.local.u32 %r637, [%rd12+-8]; | |
shr.u32 %r638, %r637, %r634; | |
shl.b32 %r639, %r1333, %r48; | |
add.s32 %r1333, %r638, %r639; | |
BB6_32: | |
shr.u32 %r640, %r1333, 30; | |
shl.b32 %r641, %r1332, 2; | |
add.s32 %r1334, %r640, %r641; | |
shl.b32 %r54, %r1333, 2; | |
shr.u32 %r642, %r1334, 31; | |
shr.u32 %r643, %r1332, 30; | |
add.s32 %r55, %r642, %r643; | |
setp.eq.s32 %p22, %r642, 0; | |
@%p22 bra BB6_33; | |
not.b32 %r644, %r1334; | |
neg.s32 %r1336, %r54; | |
setp.eq.s32 %p23, %r54, 0; | |
selp.u32 %r645, 1, 0, %p23; | |
add.s32 %r1334, %r645, %r644; | |
xor.b32 %r1335, %r45, -2147483648; | |
bra.uni BB6_35; | |
BB6_33: | |
mov.u32 %r1335, %r45; | |
mov.u32 %r1336, %r54; | |
BB6_35: | |
clz.b32 %r1338, %r1334; | |
setp.eq.s32 %p24, %r1338, 0; | |
shl.b32 %r646, %r1334, %r1338; | |
mov.u32 %r647, 32; | |
sub.s32 %r648, %r647, %r1338; | |
shr.u32 %r649, %r1336, %r648; | |
add.s32 %r650, %r649, %r646; | |
selp.b32 %r63, %r1334, %r650, %p24; | |
mov.u32 %r651, -921707870; | |
mul.hi.u32 %r1337, %r63, %r651; | |
setp.eq.s32 %p25, %r45, 0; | |
neg.s32 %r652, %r55; | |
selp.b32 %r1339, %r55, %r652, %p25; | |
setp.lt.s32 %p26, %r1337, 1; | |
@%p26 bra BB6_37; | |
mul.lo.s32 %r653, %r63, -921707870; | |
shr.u32 %r654, %r653, 31; | |
shl.b32 %r655, %r1337, 1; | |
add.s32 %r1337, %r654, %r655; | |
add.s32 %r1338, %r1338, 1; | |
BB6_37: | |
mov.u32 %r656, 126; | |
sub.s32 %r657, %r656, %r1338; | |
shl.b32 %r658, %r657, 23; | |
add.s32 %r659, %r1337, 1; | |
shr.u32 %r660, %r659, 7; | |
add.s32 %r661, %r660, 1; | |
shr.u32 %r662, %r661, 1; | |
add.s32 %r663, %r662, %r658; | |
or.b32 %r664, %r663, %r1335; | |
mov.b32 %f856, %r664; | |
BB6_38: | |
mul.rn.f32 %f31, %f856, %f856; | |
and.b32 %r71, %r1339, 1; | |
setp.eq.s32 %p27, %r71, 0; | |
@%p27 bra BB6_40; | |
mov.f32 %f359, 0fBAB6061A; | |
mov.f32 %f360, 0f37CCF5CE; | |
fma.rn.f32 %f857, %f360, %f31, %f359; | |
bra.uni BB6_41; | |
BB6_40: | |
mov.f32 %f361, 0f3C08839E; | |
mov.f32 %f362, 0fB94CA1F9; | |
fma.rn.f32 %f857, %f362, %f31, %f361; | |
BB6_41: | |
@%p27 bra BB6_43; | |
mov.f32 %f363, 0f3D2AAAA5; | |
fma.rn.f32 %f364, %f857, %f31, %f363; | |
mov.f32 %f365, 0fBF000000; | |
fma.rn.f32 %f858, %f364, %f31, %f365; | |
bra.uni BB6_44; | |
BB6_43: | |
mov.f32 %f366, 0fBE2AAAA3; | |
fma.rn.f32 %f367, %f857, %f31, %f366; | |
mov.f32 %f368, 0f00000000; | |
fma.rn.f32 %f858, %f367, %f31, %f368; | |
BB6_44: | |
fma.rn.f32 %f859, %f858, %f856, %f856; | |
@%p27 bra BB6_46; | |
mov.f32 %f369, 0f3F800000; | |
fma.rn.f32 %f859, %f858, %f31, %f369; | |
BB6_46: | |
and.b32 %r665, %r1339, 2; | |
setp.eq.s32 %p30, %r665, 0; | |
@%p30 bra BB6_48; | |
mov.f32 %f370, 0f00000000; | |
mov.f32 %f371, 0fBF800000; | |
fma.rn.f32 %f859, %f859, %f371, %f370; | |
BB6_48: | |
mul.f32 %f372, %f4, %f4; | |
mul.f32 %f373, %f372, 0f3F000000; | |
mul.f32 %f374, %f853, %f859; | |
div.rn.f32 %f861, %f374, %f373; | |
BB6_49: | |
setp.eq.f32 %p31, %f5, 0f00000000; | |
mov.f32 %f874, %f952; | |
@%p31 bra BB6_97; | |
add.u64 %rd13, %SPL, 0; | |
abs.f32 %f376, %f5; | |
setp.neu.f32 %p32, %f376, 0f7F800000; | |
mov.f32 %f862, %f5; | |
@%p32 bra BB6_52; | |
mov.f32 %f377, 0f00000000; | |
mul.rn.f32 %f862, %f5, %f377; | |
BB6_52: | |
mul.f32 %f378, %f862, 0f3F22F983; | |
cvt.rni.s32.f32 %r1349, %f378; | |
cvt.rn.f32.s32 %f379, %r1349; | |
neg.f32 %f380, %f379; | |
mov.f32 %f381, 0f3FC90FDA; | |
fma.rn.f32 %f382, %f380, %f381, %f862; | |
mov.f32 %f383, 0f33A22168; | |
fma.rn.f32 %f384, %f380, %f383, %f382; | |
mov.f32 %f385, 0f27C234C5; | |
fma.rn.f32 %f863, %f380, %f385, %f384; | |
abs.f32 %f386, %f862; | |
add.s64 %rd14, %rd13, 24; | |
setp.leu.f32 %p33, %f386, 0f47CE4780; | |
@%p33 bra BB6_63; | |
mov.b32 %r73, %f862; | |
shr.u32 %r74, %r73, 23; | |
shl.b32 %r668, %r73, 8; | |
or.b32 %r75, %r668, -2147483648; | |
mov.u32 %r1341, 0; | |
mov.u64 %rd151, __cudart_i2opi_f; | |
mov.u32 %r1340, -6; | |
mov.u64 %rd152, %rd13; | |
BB6_54: | |
.pragma "nounroll"; | |
ld.const.u32 %r671, [%rd151]; | |
// inline asm | |
{ | |
mad.lo.cc.u32 %r669, %r671, %r75, %r1341; | |
madc.hi.u32 %r1341, %r671, %r75, 0; | |
} | |
// inline asm | |
st.local.u32 [%rd152], %r669; | |
add.s64 %rd152, %rd152, 4; | |
add.s64 %rd151, %rd151, 4; | |
add.s32 %r1340, %r1340, 1; | |
setp.ne.s32 %p34, %r1340, 0; | |
@%p34 bra BB6_54; | |
and.b32 %r674, %r74, 255; | |
add.s32 %r675, %r674, -128; | |
shr.u32 %r676, %r675, 5; | |
and.b32 %r80, %r73, -2147483648; | |
st.local.u32 [%rd14], %r1341; | |
mov.u32 %r677, 6; | |
sub.s32 %r678, %r677, %r676; | |
mul.wide.s32 %rd111, %r678, 4; | |
add.s64 %rd19, %rd13, %rd111; | |
ld.local.u32 %r1342, [%rd19]; | |
ld.local.u32 %r1343, [%rd19+-4]; | |
and.b32 %r83, %r74, 31; | |
setp.eq.s32 %p35, %r83, 0; | |
@%p35 bra BB6_57; | |
mov.u32 %r679, 32; | |
sub.s32 %r680, %r679, %r83; | |
shr.u32 %r681, %r1343, %r680; | |
shl.b32 %r682, %r1342, %r83; | |
add.s32 %r1342, %r681, %r682; | |
ld.local.u32 %r683, [%rd19+-8]; | |
shr.u32 %r684, %r683, %r680; | |
shl.b32 %r685, %r1343, %r83; | |
add.s32 %r1343, %r684, %r685; | |
BB6_57: | |
shr.u32 %r686, %r1343, 30; | |
shl.b32 %r687, %r1342, 2; | |
add.s32 %r1344, %r686, %r687; | |
shl.b32 %r89, %r1343, 2; | |
shr.u32 %r688, %r1344, 31; | |
shr.u32 %r689, %r1342, 30; | |
add.s32 %r90, %r688, %r689; | |
setp.eq.s32 %p36, %r688, 0; | |
@%p36 bra BB6_58; | |
not.b32 %r690, %r1344; | |
neg.s32 %r1346, %r89; | |
setp.eq.s32 %p37, %r89, 0; | |
selp.u32 %r691, 1, 0, %p37; | |
add.s32 %r1344, %r691, %r690; | |
xor.b32 %r1345, %r80, -2147483648; | |
bra.uni BB6_60; | |
BB6_58: | |
mov.u32 %r1345, %r80; | |
mov.u32 %r1346, %r89; | |
BB6_60: | |
clz.b32 %r1348, %r1344; | |
setp.eq.s32 %p38, %r1348, 0; | |
shl.b32 %r692, %r1344, %r1348; | |
mov.u32 %r693, 32; | |
sub.s32 %r694, %r693, %r1348; | |
shr.u32 %r695, %r1346, %r694; | |
add.s32 %r696, %r695, %r692; | |
selp.b32 %r98, %r1344, %r696, %p38; | |
mov.u32 %r697, -921707870; | |
mul.hi.u32 %r1347, %r98, %r697; | |
setp.eq.s32 %p39, %r80, 0; | |
neg.s32 %r698, %r90; | |
selp.b32 %r1349, %r90, %r698, %p39; | |
setp.lt.s32 %p40, %r1347, 1; | |
@%p40 bra BB6_62; | |
mul.lo.s32 %r699, %r98, -921707870; | |
shr.u32 %r700, %r699, 31; | |
shl.b32 %r701, %r1347, 1; | |
add.s32 %r1347, %r700, %r701; | |
add.s32 %r1348, %r1348, 1; | |
BB6_62: | |
mov.u32 %r702, 126; | |
sub.s32 %r703, %r702, %r1348; | |
shl.b32 %r704, %r703, 23; | |
add.s32 %r705, %r1347, 1; | |
shr.u32 %r706, %r705, 7; | |
add.s32 %r707, %r706, 1; | |
shr.u32 %r708, %r707, 1; | |
add.s32 %r709, %r708, %r704; | |
or.b32 %r710, %r709, %r1345; | |
mov.b32 %f863, %r710; | |
BB6_63: | |
mul.rn.f32 %f50, %f863, %f863; | |
and.b32 %r106, %r1349, 1; | |
setp.eq.s32 %p41, %r106, 0; | |
@%p41 bra BB6_65; | |
mov.f32 %f387, 0fBAB6061A; | |
mov.f32 %f388, 0f37CCF5CE; | |
fma.rn.f32 %f864, %f388, %f50, %f387; | |
bra.uni BB6_66; | |
BB6_65: | |
mov.f32 %f389, 0f3C08839E; | |
mov.f32 %f390, 0fB94CA1F9; | |
fma.rn.f32 %f864, %f390, %f50, %f389; | |
BB6_66: | |
@%p41 bra BB6_68; | |
mov.f32 %f391, 0f3D2AAAA5; | |
fma.rn.f32 %f392, %f864, %f50, %f391; | |
mov.f32 %f393, 0fBF000000; | |
fma.rn.f32 %f865, %f392, %f50, %f393; | |
bra.uni BB6_69; | |
BB6_68: | |
mov.f32 %f394, 0fBE2AAAA3; | |
fma.rn.f32 %f395, %f864, %f50, %f394; | |
mov.f32 %f396, 0f00000000; | |
fma.rn.f32 %f865, %f395, %f50, %f396; | |
BB6_69: | |
fma.rn.f32 %f866, %f865, %f863, %f863; | |
@%p41 bra BB6_71; | |
mov.f32 %f397, 0f3F800000; | |
fma.rn.f32 %f866, %f865, %f50, %f397; | |
BB6_71: | |
and.b32 %r711, %r1349, 2; | |
setp.eq.s32 %p44, %r711, 0; | |
@%p44 bra BB6_73; | |
mov.f32 %f398, 0f00000000; | |
mov.f32 %f399, 0fBF800000; | |
fma.rn.f32 %f866, %f866, %f399, %f398; | |
BB6_73: | |
mul.f32 %f868, %f5, 0f3F000000; | |
abs.f32 %f400, %f868; | |
setp.neu.f32 %p45, %f400, 0f7F800000; | |
@%p45 bra BB6_75; | |
mov.f32 %f401, 0f00000000; | |
mul.rn.f32 %f868, %f868, %f401; | |
BB6_75: | |
mul.f32 %f402, %f868, 0f3F22F983; | |
cvt.rni.s32.f32 %r1359, %f402; | |
cvt.rn.f32.s32 %f403, %r1359; | |
neg.f32 %f404, %f403; | |
fma.rn.f32 %f406, %f404, %f381, %f868; | |
fma.rn.f32 %f408, %f404, %f383, %f406; | |
fma.rn.f32 %f869, %f404, %f385, %f408; | |
abs.f32 %f410, %f868; | |
setp.leu.f32 %p46, %f410, 0f47CE4780; | |
@%p46 bra BB6_86; | |
mov.b32 %r108, %f868; | |
shr.u32 %r109, %r108, 23; | |
shl.b32 %r714, %r108, 8; | |
or.b32 %r110, %r714, -2147483648; | |
mov.u32 %r1351, 0; | |
mov.u64 %rd153, __cudart_i2opi_f; | |
mov.u32 %r1350, -6; | |
mov.u64 %rd154, %rd13; | |
BB6_77: | |
.pragma "nounroll"; | |
ld.const.u32 %r717, [%rd153]; | |
// inline asm | |
{ | |
mad.lo.cc.u32 %r715, %r717, %r110, %r1351; | |
madc.hi.u32 %r1351, %r717, %r110, 0; | |
} | |
// inline asm | |
st.local.u32 [%rd154], %r715; | |
add.s64 %rd154, %rd154, 4; | |
add.s64 %rd153, %rd153, 4; | |
add.s32 %r1350, %r1350, 1; | |
setp.ne.s32 %p47, %r1350, 0; | |
@%p47 bra BB6_77; | |
and.b32 %r720, %r109, 255; | |
add.s32 %r721, %r720, -128; | |
shr.u32 %r722, %r721, 5; | |
and.b32 %r115, %r108, -2147483648; | |
st.local.u32 [%rd14], %r1351; | |
mov.u32 %r723, 6; | |
sub.s32 %r724, %r723, %r722; | |
mul.wide.s32 %rd113, %r724, 4; | |
add.s64 %rd24, %rd13, %rd113; | |
ld.local.u32 %r1352, [%rd24]; | |
ld.local.u32 %r1353, [%rd24+-4]; | |
and.b32 %r118, %r109, 31; | |
setp.eq.s32 %p48, %r118, 0; | |
@%p48 bra BB6_80; | |
mov.u32 %r725, 32; | |
sub.s32 %r726, %r725, %r118; | |
shr.u32 %r727, %r1353, %r726; | |
shl.b32 %r728, %r1352, %r118; | |
add.s32 %r1352, %r727, %r728; | |
ld.local.u32 %r729, [%rd24+-8]; | |
shr.u32 %r730, %r729, %r726; | |
shl.b32 %r731, %r1353, %r118; | |
add.s32 %r1353, %r730, %r731; | |
BB6_80: | |
shr.u32 %r732, %r1353, 30; | |
shl.b32 %r733, %r1352, 2; | |
add.s32 %r1354, %r732, %r733; | |
shl.b32 %r124, %r1353, 2; | |
shr.u32 %r734, %r1354, 31; | |
shr.u32 %r735, %r1352, 30; | |
add.s32 %r125, %r734, %r735; | |
setp.eq.s32 %p49, %r734, 0; | |
@%p49 bra BB6_81; | |
not.b32 %r736, %r1354; | |
neg.s32 %r1356, %r124; | |
setp.eq.s32 %p50, %r124, 0; | |
selp.u32 %r737, 1, 0, %p50; | |
add.s32 %r1354, %r737, %r736; | |
xor.b32 %r1355, %r115, -2147483648; | |
bra.uni BB6_83; | |
BB6_81: | |
mov.u32 %r1355, %r115; | |
mov.u32 %r1356, %r124; | |
BB6_83: | |
clz.b32 %r1358, %r1354; | |
setp.eq.s32 %p51, %r1358, 0; | |
shl.b32 %r738, %r1354, %r1358; | |
mov.u32 %r739, 32; | |
sub.s32 %r740, %r739, %r1358; | |
shr.u32 %r741, %r1356, %r740; | |
add.s32 %r742, %r741, %r738; | |
selp.b32 %r133, %r1354, %r742, %p51; | |
mov.u32 %r743, -921707870; | |
mul.hi.u32 %r1357, %r133, %r743; | |
setp.eq.s32 %p52, %r115, 0; | |
neg.s32 %r744, %r125; | |
selp.b32 %r1359, %r125, %r744, %p52; | |
setp.lt.s32 %p53, %r1357, 1; | |
@%p53 bra BB6_85; | |
mul.lo.s32 %r745, %r133, -921707870; | |
shr.u32 %r746, %r745, 31; | |
shl.b32 %r747, %r1357, 1; | |
add.s32 %r1357, %r746, %r747; | |
add.s32 %r1358, %r1358, 1; | |
BB6_85: | |
mov.u32 %r748, 126; | |
sub.s32 %r749, %r748, %r1358; | |
shl.b32 %r750, %r749, 23; | |
add.s32 %r751, %r1357, 1; | |
shr.u32 %r752, %r751, 7; | |
add.s32 %r753, %r752, 1; | |
shr.u32 %r754, %r753, 1; | |
add.s32 %r755, %r754, %r750; | |
or.b32 %r756, %r755, %r1355; | |
mov.b32 %f869, %r756; | |
BB6_86: | |
mul.rn.f32 %f68, %f869, %f869; | |
and.b32 %r141, %r1359, 1; | |
setp.eq.s32 %p54, %r141, 0; | |
@%p54 bra BB6_88; | |
mov.f32 %f411, 0fBAB6061A; | |
mov.f32 %f412, 0f37CCF5CE; | |
fma.rn.f32 %f870, %f412, %f68, %f411; | |
bra.uni BB6_89; | |
BB6_88: | |
mov.f32 %f413, 0f3C08839E; | |
mov.f32 %f414, 0fB94CA1F9; | |
fma.rn.f32 %f870, %f414, %f68, %f413; | |
BB6_89: | |
@%p54 bra BB6_91; | |
mov.f32 %f415, 0f3D2AAAA5; | |
fma.rn.f32 %f416, %f870, %f68, %f415; | |
mov.f32 %f417, 0fBF000000; | |
fma.rn.f32 %f871, %f416, %f68, %f417; | |
bra.uni BB6_92; | |
BB6_91: | |
mov.f32 %f418, 0fBE2AAAA3; | |
fma.rn.f32 %f419, %f870, %f68, %f418; | |
mov.f32 %f420, 0f00000000; | |
fma.rn.f32 %f871, %f419, %f68, %f420; | |
BB6_92: | |
fma.rn.f32 %f872, %f871, %f869, %f869; | |
@%p54 bra BB6_94; | |
mov.f32 %f421, 0f3F800000; | |
fma.rn.f32 %f872, %f871, %f68, %f421; | |
BB6_94: | |
and.b32 %r757, %r1359, 2; | |
setp.eq.s32 %p57, %r757, 0; | |
@%p57 bra BB6_96; | |
mov.f32 %f422, 0f00000000; | |
mov.f32 %f423, 0fBF800000; | |
fma.rn.f32 %f872, %f872, %f423, %f422; | |
BB6_96: | |
mul.f32 %f424, %f5, %f5; | |
mul.f32 %f425, %f424, 0f3F000000; | |
mul.f32 %f426, %f866, %f872; | |
div.rn.f32 %f874, %f426, %f425; | |
BB6_97: | |
setp.eq.f32 %p58, %f6, 0f00000000; | |
mov.f32 %f887, %f952; | |
@%p58 bra BB6_145; | |
add.u64 %rd25, %SPL, 0; | |
abs.f32 %f428, %f6; | |
setp.neu.f32 %p59, %f428, 0f7F800000; | |
mov.f32 %f875, %f6; | |
@%p59 bra BB6_100; | |
mov.f32 %f429, 0f00000000; | |
mul.rn.f32 %f875, %f6, %f429; | |
BB6_100: | |
mul.f32 %f430, %f875, 0f3F22F983; | |
cvt.rni.s32.f32 %r1369, %f430; | |
cvt.rn.f32.s32 %f431, %r1369; | |
neg.f32 %f432, %f431; | |
mov.f32 %f433, 0f3FC90FDA; | |
fma.rn.f32 %f434, %f432, %f433, %f875; | |
mov.f32 %f435, 0f33A22168; | |
fma.rn.f32 %f436, %f432, %f435, %f434; | |
mov.f32 %f437, 0f27C234C5; | |
fma.rn.f32 %f876, %f432, %f437, %f436; | |
abs.f32 %f438, %f875; | |
add.s64 %rd26, %rd25, 24; | |
setp.leu.f32 %p60, %f438, 0f47CE4780; | |
@%p60 bra BB6_111; | |
mov.b32 %r143, %f875; | |
shr.u32 %r144, %r143, 23; | |
shl.b32 %r760, %r143, 8; | |
or.b32 %r145, %r760, -2147483648; | |
mov.u32 %r1361, 0; | |
mov.u64 %rd155, __cudart_i2opi_f; | |
mov.u32 %r1360, -6; | |
mov.u64 %rd156, %rd25; | |
BB6_102: | |
.pragma "nounroll"; | |
ld.const.u32 %r763, [%rd155]; | |
// inline asm | |
{ | |
mad.lo.cc.u32 %r761, %r763, %r145, %r1361; | |
madc.hi.u32 %r1361, %r763, %r145, 0; | |
} | |
// inline asm | |
st.local.u32 [%rd156], %r761; | |
add.s64 %rd156, %rd156, 4; | |
add.s64 %rd155, %rd155, 4; | |
add.s32 %r1360, %r1360, 1; | |
setp.ne.s32 %p61, %r1360, 0; | |
@%p61 bra BB6_102; | |
and.b32 %r766, %r144, 255; | |
add.s32 %r767, %r766, -128; | |
shr.u32 %r768, %r767, 5; | |
and.b32 %r150, %r143, -2147483648; | |
st.local.u32 [%rd26], %r1361; | |
mov.u32 %r769, 6; | |
sub.s32 %r770, %r769, %r768; | |
mul.wide.s32 %rd116, %r770, 4; | |
add.s64 %rd31, %rd25, %rd116; | |
ld.local.u32 %r1362, [%rd31]; | |
ld.local.u32 %r1363, [%rd31+-4]; | |
and.b32 %r153, %r144, 31; | |
setp.eq.s32 %p62, %r153, 0; | |
@%p62 bra BB6_105; | |
mov.u32 %r771, 32; | |
sub.s32 %r772, %r771, %r153; | |
shr.u32 %r773, %r1363, %r772; | |
shl.b32 %r774, %r1362, %r153; | |
add.s32 %r1362, %r773, %r774; | |
ld.local.u32 %r775, [%rd31+-8]; | |
shr.u32 %r776, %r775, %r772; | |
shl.b32 %r777, %r1363, %r153; | |
add.s32 %r1363, %r776, %r777; | |
BB6_105: | |
shr.u32 %r778, %r1363, 30; | |
shl.b32 %r779, %r1362, 2; | |
add.s32 %r1364, %r778, %r779; | |
shl.b32 %r159, %r1363, 2; | |
shr.u32 %r780, %r1364, 31; | |
shr.u32 %r781, %r1362, 30; | |
add.s32 %r160, %r780, %r781; | |
setp.eq.s32 %p63, %r780, 0; | |
@%p63 bra BB6_106; | |
not.b32 %r782, %r1364; | |
neg.s32 %r1366, %r159; | |
setp.eq.s32 %p64, %r159, 0; | |
selp.u32 %r783, 1, 0, %p64; | |
add.s32 %r1364, %r783, %r782; | |
xor.b32 %r1365, %r150, -2147483648; | |
bra.uni BB6_108; | |
BB6_106: | |
mov.u32 %r1365, %r150; | |
mov.u32 %r1366, %r159; | |
BB6_108: | |
clz.b32 %r1368, %r1364; | |
setp.eq.s32 %p65, %r1368, 0; | |
shl.b32 %r784, %r1364, %r1368; | |
mov.u32 %r785, 32; | |
sub.s32 %r786, %r785, %r1368; | |
shr.u32 %r787, %r1366, %r786; | |
add.s32 %r788, %r787, %r784; | |
selp.b32 %r168, %r1364, %r788, %p65; | |
mov.u32 %r789, -921707870; | |
mul.hi.u32 %r1367, %r168, %r789; | |
setp.eq.s32 %p66, %r150, 0; | |
neg.s32 %r790, %r160; | |
selp.b32 %r1369, %r160, %r790, %p66; | |
setp.lt.s32 %p67, %r1367, 1; | |
@%p67 bra BB6_110; | |
mul.lo.s32 %r791, %r168, -921707870; | |
shr.u32 %r792, %r791, 31; | |
shl.b32 %r793, %r1367, 1; | |
add.s32 %r1367, %r792, %r793; | |
add.s32 %r1368, %r1368, 1; | |
BB6_110: | |
mov.u32 %r794, 126; | |
sub.s32 %r795, %r794, %r1368; | |
shl.b32 %r796, %r795, 23; | |
add.s32 %r797, %r1367, 1; | |
shr.u32 %r798, %r797, 7; | |
add.s32 %r799, %r798, 1; | |
shr.u32 %r800, %r799, 1; | |
add.s32 %r801, %r800, %r796; | |
or.b32 %r802, %r801, %r1365; | |
mov.b32 %f876, %r802; | |
BB6_111: | |
mul.rn.f32 %f87, %f876, %f876; | |
and.b32 %r176, %r1369, 1; | |
setp.eq.s32 %p68, %r176, 0; | |
@%p68 bra BB6_113; | |
mov.f32 %f439, 0fBAB6061A; | |
mov.f32 %f440, 0f37CCF5CE; | |
fma.rn.f32 %f877, %f440, %f87, %f439; | |
bra.uni BB6_114; | |
BB6_113: | |
mov.f32 %f441, 0f3C08839E; | |
mov.f32 %f442, 0fB94CA1F9; | |
fma.rn.f32 %f877, %f442, %f87, %f441; | |
BB6_114: | |
@%p68 bra BB6_116; | |
mov.f32 %f443, 0f3D2AAAA5; | |
fma.rn.f32 %f444, %f877, %f87, %f443; | |
mov.f32 %f445, 0fBF000000; | |
fma.rn.f32 %f878, %f444, %f87, %f445; | |
bra.uni BB6_117; | |
BB6_116: | |
mov.f32 %f446, 0fBE2AAAA3; | |
fma.rn.f32 %f447, %f877, %f87, %f446; | |
mov.f32 %f448, 0f00000000; | |
fma.rn.f32 %f878, %f447, %f87, %f448; | |
BB6_117: | |
fma.rn.f32 %f879, %f878, %f876, %f876; | |
@%p68 bra BB6_119; | |
mov.f32 %f449, 0f3F800000; | |
fma.rn.f32 %f879, %f878, %f87, %f449; | |
BB6_119: | |
and.b32 %r803, %r1369, 2; | |
setp.eq.s32 %p71, %r803, 0; | |
@%p71 bra BB6_121; | |
mov.f32 %f450, 0f00000000; | |
mov.f32 %f451, 0fBF800000; | |
fma.rn.f32 %f879, %f879, %f451, %f450; | |
BB6_121: | |
mul.f32 %f881, %f6, 0f3F000000; | |
abs.f32 %f452, %f881; | |
setp.neu.f32 %p72, %f452, 0f7F800000; | |
@%p72 bra BB6_123; | |
mov.f32 %f453, 0f00000000; | |
mul.rn.f32 %f881, %f881, %f453; | |
BB6_123: | |
mul.f32 %f454, %f881, 0f3F22F983; | |
cvt.rni.s32.f32 %r1379, %f454; | |
cvt.rn.f32.s32 %f455, %r1379; | |
neg.f32 %f456, %f455; | |
fma.rn.f32 %f458, %f456, %f433, %f881; | |
fma.rn.f32 %f460, %f456, %f435, %f458; | |
fma.rn.f32 %f882, %f456, %f437, %f460; | |
abs.f32 %f462, %f881; | |
setp.leu.f32 %p73, %f462, 0f47CE4780; | |
@%p73 bra BB6_134; | |
mov.b32 %r178, %f881; | |
shr.u32 %r179, %r178, 23; | |
shl.b32 %r806, %r178, 8; | |
or.b32 %r180, %r806, -2147483648; | |
mov.u32 %r1371, 0; | |
mov.u64 %rd157, __cudart_i2opi_f; | |
mov.u32 %r1370, -6; | |
mov.u64 %rd158, %rd25; | |
BB6_125: | |
.pragma "nounroll"; | |
ld.const.u32 %r809, [%rd157]; | |
// inline asm | |
{ | |
mad.lo.cc.u32 %r807, %r809, %r180, %r1371; | |
madc.hi.u32 %r1371, %r809, %r180, 0; | |
} | |
// inline asm | |
st.local.u32 [%rd158], %r807; | |
add.s64 %rd158, %rd158, 4; | |
add.s64 %rd157, %rd157, 4; | |
add.s32 %r1370, %r1370, 1; | |
setp.ne.s32 %p74, %r1370, 0; | |
@%p74 bra BB6_125; | |
and.b32 %r812, %r179, 255; | |
add.s32 %r813, %r812, -128; | |
shr.u32 %r814, %r813, 5; | |
and.b32 %r185, %r178, -2147483648; | |
st.local.u32 [%rd26], %r1371; | |
mov.u32 %r815, 6; | |
sub.s32 %r816, %r815, %r814; | |
mul.wide.s32 %rd118, %r816, 4; | |
add.s64 %rd36, %rd25, %rd118; | |
ld.local.u32 %r1372, [%rd36]; | |
ld.local.u32 %r1373, [%rd36+-4]; | |
and.b32 %r188, %r179, 31; | |
setp.eq.s32 %p75, %r188, 0; | |
@%p75 bra BB6_128; | |
mov.u32 %r817, 32; | |
sub.s32 %r818, %r817, %r188; | |
shr.u32 %r819, %r1373, %r818; | |
shl.b32 %r820, %r1372, %r188; | |
add.s32 %r1372, %r819, %r820; | |
ld.local.u32 %r821, [%rd36+-8]; | |
shr.u32 %r822, %r821, %r818; | |
shl.b32 %r823, %r1373, %r188; | |
add.s32 %r1373, %r822, %r823; | |
BB6_128: | |
shr.u32 %r824, %r1373, 30; | |
shl.b32 %r825, %r1372, 2; | |
add.s32 %r1374, %r824, %r825; | |
shl.b32 %r194, %r1373, 2; | |
shr.u32 %r826, %r1374, 31; | |
shr.u32 %r827, %r1372, 30; | |
add.s32 %r195, %r826, %r827; | |
setp.eq.s32 %p76, %r826, 0; | |
@%p76 bra BB6_129; | |
not.b32 %r828, %r1374; | |
neg.s32 %r1376, %r194; | |
setp.eq.s32 %p77, %r194, 0; | |
selp.u32 %r829, 1, 0, %p77; | |
add.s32 %r1374, %r829, %r828; | |
xor.b32 %r1375, %r185, -2147483648; | |
bra.uni BB6_131; | |
BB6_129: | |
mov.u32 %r1375, %r185; | |
mov.u32 %r1376, %r194; | |
BB6_131: | |
clz.b32 %r1378, %r1374; | |
setp.eq.s32 %p78, %r1378, 0; | |
shl.b32 %r830, %r1374, %r1378; | |
mov.u32 %r831, 32; | |
sub.s32 %r832, %r831, %r1378; | |
shr.u32 %r833, %r1376, %r832; | |
add.s32 %r834, %r833, %r830; | |
selp.b32 %r203, %r1374, %r834, %p78; | |
mov.u32 %r835, -921707870; | |
mul.hi.u32 %r1377, %r203, %r835; | |
setp.eq.s32 %p79, %r185, 0; | |
neg.s32 %r836, %r195; | |
selp.b32 %r1379, %r195, %r836, %p79; | |
setp.lt.s32 %p80, %r1377, 1; | |
@%p80 bra BB6_133; | |
mul.lo.s32 %r837, %r203, -921707870; | |
shr.u32 %r838, %r837, 31; | |
shl.b32 %r839, %r1377, 1; | |
add.s32 %r1377, %r838, %r839; | |
add.s32 %r1378, %r1378, 1; | |
BB6_133: | |
mov.u32 %r840, 126; | |
sub.s32 %r841, %r840, %r1378; | |
shl.b32 %r842, %r841, 23; | |
add.s32 %r843, %r1377, 1; | |
shr.u32 %r844, %r843, 7; | |
add.s32 %r845, %r844, 1; | |
shr.u32 %r846, %r845, 1; | |
add.s32 %r847, %r846, %r842; | |
or.b32 %r848, %r847, %r1375; | |
mov.b32 %f882, %r848; | |
BB6_134: | |
mul.rn.f32 %f105, %f882, %f882; | |
and.b32 %r211, %r1379, 1; | |
setp.eq.s32 %p81, %r211, 0; | |
@%p81 bra BB6_136; | |
mov.f32 %f463, 0fBAB6061A; | |
mov.f32 %f464, 0f37CCF5CE; | |
fma.rn.f32 %f883, %f464, %f105, %f463; | |
bra.uni BB6_137; | |
BB6_136: | |
mov.f32 %f465, 0f3C08839E; | |
mov.f32 %f466, 0fB94CA1F9; | |
fma.rn.f32 %f883, %f466, %f105, %f465; | |
BB6_137: | |
@%p81 bra BB6_139; | |
mov.f32 %f467, 0f3D2AAAA5; | |
fma.rn.f32 %f468, %f883, %f105, %f467; | |
mov.f32 %f469, 0fBF000000; | |
fma.rn.f32 %f884, %f468, %f105, %f469; | |
bra.uni BB6_140; | |
BB6_139: | |
mov.f32 %f470, 0fBE2AAAA3; | |
fma.rn.f32 %f471, %f883, %f105, %f470; | |
mov.f32 %f472, 0f00000000; | |
fma.rn.f32 %f884, %f471, %f105, %f472; | |
BB6_140: | |
fma.rn.f32 %f885, %f884, %f882, %f882; | |
@%p81 bra BB6_142; | |
mov.f32 %f473, 0f3F800000; | |
fma.rn.f32 %f885, %f884, %f105, %f473; | |
BB6_142: | |
and.b32 %r849, %r1379, 2; | |
setp.eq.s32 %p84, %r849, 0; | |
@%p84 bra BB6_144; | |
mov.f32 %f474, 0f00000000; | |
mov.f32 %f475, 0fBF800000; | |
fma.rn.f32 %f885, %f885, %f475, %f474; | |
BB6_144: | |
mul.f32 %f476, %f6, %f6; | |
mul.f32 %f477, %f476, 0f3F000000; | |
mul.f32 %f478, %f879, %f885; | |
div.rn.f32 %f887, %f478, %f477; | |
BB6_145: | |
setp.eq.f32 %p85, %f7, 0f00000000; | |
mov.f32 %f900, %f952; | |
@%p85 bra BB6_193; | |
add.u64 %rd37, %SPL, 0; | |
abs.f32 %f480, %f7; | |
setp.neu.f32 %p86, %f480, 0f7F800000; | |
mov.f32 %f888, %f7; | |
@%p86 bra BB6_148; | |
mov.f32 %f481, 0f00000000; | |
mul.rn.f32 %f888, %f7, %f481; | |
BB6_148: | |
mul.f32 %f482, %f888, 0f3F22F983; | |
cvt.rni.s32.f32 %r1389, %f482; | |
cvt.rn.f32.s32 %f483, %r1389; | |
neg.f32 %f484, %f483; | |
mov.f32 %f485, 0f3FC90FDA; | |
fma.rn.f32 %f486, %f484, %f485, %f888; | |
mov.f32 %f487, 0f33A22168; | |
fma.rn.f32 %f488, %f484, %f487, %f486; | |
mov.f32 %f489, 0f27C234C5; | |
fma.rn.f32 %f889, %f484, %f489, %f488; | |
abs.f32 %f490, %f888; | |
add.s64 %rd38, %rd37, 24; | |
setp.leu.f32 %p87, %f490, 0f47CE4780; | |
@%p87 bra BB6_159; | |
mov.b32 %r213, %f888; | |
shr.u32 %r214, %r213, 23; | |
shl.b32 %r852, %r213, 8; | |
or.b32 %r215, %r852, -2147483648; | |
mov.u32 %r1381, 0; | |
mov.u64 %rd159, __cudart_i2opi_f; | |
mov.u32 %r1380, -6; | |
mov.u64 %rd160, %rd37; | |
BB6_150: | |
.pragma "nounroll"; | |
ld.const.u32 %r855, [%rd159]; | |
// inline asm | |
{ | |
mad.lo.cc.u32 %r853, %r855, %r215, %r1381; | |
madc.hi.u32 %r1381, %r855, %r215, 0; | |
} | |
// inline asm | |
st.local.u32 [%rd160], %r853; | |
add.s64 %rd160, %rd160, 4; | |
add.s64 %rd159, %rd159, 4; | |
add.s32 %r1380, %r1380, 1; | |
setp.ne.s32 %p88, %r1380, 0; | |
@%p88 bra BB6_150; | |
and.b32 %r858, %r214, 255; | |
add.s32 %r859, %r858, -128; | |
shr.u32 %r860, %r859, 5; | |
and.b32 %r220, %r213, -2147483648; | |
st.local.u32 [%rd38], %r1381; | |
mov.u32 %r861, 6; | |
sub.s32 %r862, %r861, %r860; | |
mul.wide.s32 %rd121, %r862, 4; | |
add.s64 %rd43, %rd37, %rd121; | |
ld.local.u32 %r1382, [%rd43]; | |
ld.local.u32 %r1383, [%rd43+-4]; | |
and.b32 %r223, %r214, 31; | |
setp.eq.s32 %p89, %r223, 0; | |
@%p89 bra BB6_153; | |
mov.u32 %r863, 32; | |
sub.s32 %r864, %r863, %r223; | |
shr.u32 %r865, %r1383, %r864; | |
shl.b32 %r866, %r1382, %r223; | |
add.s32 %r1382, %r865, %r866; | |
ld.local.u32 %r867, [%rd43+-8]; | |
shr.u32 %r868, %r867, %r864; | |
shl.b32 %r869, %r1383, %r223; | |
add.s32 %r1383, %r868, %r869; | |
BB6_153: | |
shr.u32 %r870, %r1383, 30; | |
shl.b32 %r871, %r1382, 2; | |
add.s32 %r1384, %r870, %r871; | |
shl.b32 %r229, %r1383, 2; | |
shr.u32 %r872, %r1384, 31; | |
shr.u32 %r873, %r1382, 30; | |
add.s32 %r230, %r872, %r873; | |
setp.eq.s32 %p90, %r872, 0; | |
@%p90 bra BB6_154; | |
not.b32 %r874, %r1384; | |
neg.s32 %r1386, %r229; | |
setp.eq.s32 %p91, %r229, 0; | |
selp.u32 %r875, 1, 0, %p91; | |
add.s32 %r1384, %r875, %r874; | |
xor.b32 %r1385, %r220, -2147483648; | |
bra.uni BB6_156; | |
BB6_154: | |
mov.u32 %r1385, %r220; | |
mov.u32 %r1386, %r229; | |
BB6_156: | |
clz.b32 %r1388, %r1384; | |
setp.eq.s32 %p92, %r1388, 0; | |
shl.b32 %r876, %r1384, %r1388; | |
mov.u32 %r877, 32; | |
sub.s32 %r878, %r877, %r1388; | |
shr.u32 %r879, %r1386, %r878; | |
add.s32 %r880, %r879, %r876; | |
selp.b32 %r238, %r1384, %r880, %p92; | |
mov.u32 %r881, -921707870; | |
mul.hi.u32 %r1387, %r238, %r881; | |
setp.eq.s32 %p93, %r220, 0; | |
neg.s32 %r882, %r230; | |
selp.b32 %r1389, %r230, %r882, %p93; | |
setp.lt.s32 %p94, %r1387, 1; | |
@%p94 bra BB6_158; | |
mul.lo.s32 %r883, %r238, -921707870; | |
shr.u32 %r884, %r883, 31; | |
shl.b32 %r885, %r1387, 1; | |
add.s32 %r1387, %r884, %r885; | |
add.s32 %r1388, %r1388, 1; | |
BB6_158: | |
mov.u32 %r886, 126; | |
sub.s32 %r887, %r886, %r1388; | |
shl.b32 %r888, %r887, 23; | |
add.s32 %r889, %r1387, 1; | |
shr.u32 %r890, %r889, 7; | |
add.s32 %r891, %r890, 1; | |
shr.u32 %r892, %r891, 1; | |
add.s32 %r893, %r892, %r888; | |
or.b32 %r894, %r893, %r1385; | |
mov.b32 %f889, %r894; | |
BB6_159: | |
mul.rn.f32 %f124, %f889, %f889; | |
and.b32 %r246, %r1389, 1; | |
setp.eq.s32 %p95, %r246, 0; | |
@%p95 bra BB6_161; | |
mov.f32 %f491, 0fBAB6061A; | |
mov.f32 %f492, 0f37CCF5CE; | |
fma.rn.f32 %f890, %f492, %f124, %f491; | |
bra.uni BB6_162; | |
BB6_161: | |
mov.f32 %f493, 0f3C08839E; | |
mov.f32 %f494, 0fB94CA1F9; | |
fma.rn.f32 %f890, %f494, %f124, %f493; | |
BB6_162: | |
@%p95 bra BB6_164; | |
mov.f32 %f495, 0f3D2AAAA5; | |
fma.rn.f32 %f496, %f890, %f124, %f495; | |
mov.f32 %f497, 0fBF000000; | |
fma.rn.f32 %f891, %f496, %f124, %f497; | |
bra.uni BB6_165; | |
BB6_164: | |
mov.f32 %f498, 0fBE2AAAA3; | |
fma.rn.f32 %f499, %f890, %f124, %f498; | |
mov.f32 %f500, 0f00000000; | |
fma.rn.f32 %f891, %f499, %f124, %f500; | |
BB6_165: | |
fma.rn.f32 %f892, %f891, %f889, %f889; | |
@%p95 bra BB6_167; | |
mov.f32 %f501, 0f3F800000; | |
fma.rn.f32 %f892, %f891, %f124, %f501; | |
BB6_167: | |
and.b32 %r895, %r1389, 2; | |
setp.eq.s32 %p98, %r895, 0; | |
@%p98 bra BB6_169; | |
mov.f32 %f502, 0f00000000; | |
mov.f32 %f503, 0fBF800000; | |
fma.rn.f32 %f892, %f892, %f503, %f502; | |
BB6_169: | |
mul.f32 %f894, %f7, 0f3F000000; | |
abs.f32 %f504, %f894; | |
setp.neu.f32 %p99, %f504, 0f7F800000; | |
@%p99 bra BB6_171; | |
mov.f32 %f505, 0f00000000; | |
mul.rn.f32 %f894, %f894, %f505; | |
BB6_171: | |
mul.f32 %f506, %f894, 0f3F22F983; | |
cvt.rni.s32.f32 %r1399, %f506; | |
cvt.rn.f32.s32 %f507, %r1399; | |
neg.f32 %f508, %f507; | |
fma.rn.f32 %f510, %f508, %f485, %f894; | |
fma.rn.f32 %f512, %f508, %f487, %f510; | |
fma.rn.f32 %f895, %f508, %f489, %f512; | |
abs.f32 %f514, %f894; | |
setp.leu.f32 %p100, %f514, 0f47CE4780; | |
@%p100 bra BB6_182; | |
mov.b32 %r248, %f894; | |
shr.u32 %r249, %r248, 23; | |
shl.b32 %r898, %r248, 8; | |
or.b32 %r250, %r898, -2147483648; | |
mov.u32 %r1391, 0; | |
mov.u64 %rd161, __cudart_i2opi_f; | |
mov.u32 %r1390, -6; | |
mov.u64 %rd162, %rd37; | |
BB6_173: | |
.pragma "nounroll"; | |
ld.const.u32 %r901, [%rd161]; | |
// inline asm | |
{ | |
mad.lo.cc.u32 %r899, %r901, %r250, %r1391; | |
madc.hi.u32 %r1391, %r901, %r250, 0; | |
} | |
// inline asm | |
st.local.u32 [%rd162], %r899; | |
add.s64 %rd162, %rd162, 4; | |
add.s64 %rd161, %rd161, 4; | |
add.s32 %r1390, %r1390, 1; | |
setp.ne.s32 %p101, %r1390, 0; | |
@%p101 bra BB6_173; | |
and.b32 %r904, %r249, 255; | |
add.s32 %r905, %r904, -128; | |
shr.u32 %r906, %r905, 5; | |
and.b32 %r255, %r248, -2147483648; | |
st.local.u32 [%rd38], %r1391; | |
mov.u32 %r907, 6; | |
sub.s32 %r908, %r907, %r906; | |
mul.wide.s32 %rd123, %r908, 4; | |
add.s64 %rd48, %rd37, %rd123; | |
ld.local.u32 %r1392, [%rd48]; | |
ld.local.u32 %r1393, [%rd48+-4]; | |
and.b32 %r258, %r249, 31; | |
setp.eq.s32 %p102, %r258, 0; | |
@%p102 bra BB6_176; | |
mov.u32 %r909, 32; | |
sub.s32 %r910, %r909, %r258; | |
shr.u32 %r911, %r1393, %r910; | |
shl.b32 %r912, %r1392, %r258; | |
add.s32 %r1392, %r911, %r912; | |
ld.local.u32 %r913, [%rd48+-8]; | |
shr.u32 %r914, %r913, %r910; | |
shl.b32 %r915, %r1393, %r258; | |
add.s32 %r1393, %r914, %r915; | |
BB6_176: | |
shr.u32 %r916, %r1393, 30; | |
shl.b32 %r917, %r1392, 2; | |
add.s32 %r1394, %r916, %r917; | |
shl.b32 %r264, %r1393, 2; | |
shr.u32 %r918, %r1394, 31; | |
shr.u32 %r919, %r1392, 30; | |
add.s32 %r265, %r918, %r919; | |
setp.eq.s32 %p103, %r918, 0; | |
@%p103 bra BB6_177; | |
not.b32 %r920, %r1394; | |
neg.s32 %r1396, %r264; | |
setp.eq.s32 %p104, %r264, 0; | |
selp.u32 %r921, 1, 0, %p104; | |
add.s32 %r1394, %r921, %r920; | |
xor.b32 %r1395, %r255, -2147483648; | |
bra.uni BB6_179; | |
BB6_177: | |
mov.u32 %r1395, %r255; | |
mov.u32 %r1396, %r264; | |
BB6_179: | |
clz.b32 %r1398, %r1394; | |
setp.eq.s32 %p105, %r1398, 0; | |
shl.b32 %r922, %r1394, %r1398; | |
mov.u32 %r923, 32; | |
sub.s32 %r924, %r923, %r1398; | |
shr.u32 %r925, %r1396, %r924; | |
add.s32 %r926, %r925, %r922; | |
selp.b32 %r273, %r1394, %r926, %p105; | |
mov.u32 %r927, -921707870; | |
mul.hi.u32 %r1397, %r273, %r927; | |
setp.eq.s32 %p106, %r255, 0; | |
neg.s32 %r928, %r265; | |
selp.b32 %r1399, %r265, %r928, %p106; | |
setp.lt.s32 %p107, %r1397, 1; | |
@%p107 bra BB6_181; | |
mul.lo.s32 %r929, %r273, -921707870; | |
shr.u32 %r930, %r929, 31; | |
shl.b32 %r931, %r1397, 1; | |
add.s32 %r1397, %r930, %r931; | |
add.s32 %r1398, %r1398, 1; | |
BB6_181: | |
mov.u32 %r932, 126; | |
sub.s32 %r933, %r932, %r1398; | |
shl.b32 %r934, %r933, 23; | |
add.s32 %r935, %r1397, 1; | |
shr.u32 %r936, %r935, 7; | |
add.s32 %r937, %r936, 1; | |
shr.u32 %r938, %r937, 1; | |
add.s32 %r939, %r938, %r934; | |
or.b32 %r940, %r939, %r1395; | |
mov.b32 %f895, %r940; | |
BB6_182: | |
mul.rn.f32 %f142, %f895, %f895; | |
and.b32 %r281, %r1399, 1; | |
setp.eq.s32 %p108, %r281, 0; | |
@%p108 bra BB6_184; | |
mov.f32 %f515, 0fBAB6061A; | |
mov.f32 %f516, 0f37CCF5CE; | |
fma.rn.f32 %f896, %f516, %f142, %f515; | |
bra.uni BB6_185; | |
BB6_184: | |
mov.f32 %f517, 0f3C08839E; | |
mov.f32 %f518, 0fB94CA1F9; | |
fma.rn.f32 %f896, %f518, %f142, %f517; | |
BB6_185: | |
@%p108 bra BB6_187; | |
mov.f32 %f519, 0f3D2AAAA5; | |
fma.rn.f32 %f520, %f896, %f142, %f519; | |
mov.f32 %f521, 0fBF000000; | |
fma.rn.f32 %f897, %f520, %f142, %f521; | |
bra.uni BB6_188; | |
BB6_187: | |
mov.f32 %f522, 0fBE2AAAA3; | |
fma.rn.f32 %f523, %f896, %f142, %f522; | |
mov.f32 %f524, 0f00000000; | |
fma.rn.f32 %f897, %f523, %f142, %f524; | |
BB6_188: | |
fma.rn.f32 %f898, %f897, %f895, %f895; | |
@%p108 bra BB6_190; | |
mov.f32 %f525, 0f3F800000; | |
fma.rn.f32 %f898, %f897, %f142, %f525; | |
BB6_190: | |
and.b32 %r941, %r1399, 2; | |
setp.eq.s32 %p111, %r941, 0; | |
@%p111 bra BB6_192; | |
mov.f32 %f526, 0f00000000; | |
mov.f32 %f527, 0fBF800000; | |
fma.rn.f32 %f898, %f898, %f527, %f526; | |
BB6_192: | |
mul.f32 %f528, %f7, %f7; | |
mul.f32 %f529, %f528, 0f3F000000; | |
mul.f32 %f530, %f892, %f898; | |
div.rn.f32 %f900, %f530, %f529; | |
BB6_193: | |
sub.f32 %f532, %f1, %f3; | |
add.f32 %f533, %f532, 0f3F800000; | |
mul.f32 %f156, %f533, 0f40490FDB; | |
mul.f32 %f157, %f532, 0f40490FDB; | |
add.f32 %f534, %f532, 0fBF800000; | |
mul.f32 %f158, %f534, 0f40490FDB; | |
add.f32 %f535, %f532, 0fC0000000; | |
mul.f32 %f159, %f535, 0f40490FDB; | |
setp.eq.f32 %p112, %f156, 0f00000000; | |
mov.f32 %f913, %f952; | |
@%p112 bra BB6_241; | |
add.u64 %rd49, %SPL, 0; | |
abs.f32 %f536, %f156; | |
setp.neu.f32 %p113, %f536, 0f7F800000; | |
mov.f32 %f901, %f156; | |
@%p113 bra BB6_196; | |
mov.f32 %f537, 0f00000000; | |
mul.rn.f32 %f901, %f156, %f537; | |
BB6_196: | |
mul.f32 %f538, %f901, 0f3F22F983; | |
cvt.rni.s32.f32 %r1409, %f538; | |
cvt.rn.f32.s32 %f539, %r1409; | |
neg.f32 %f540, %f539; | |
mov.f32 %f541, 0f3FC90FDA; | |
fma.rn.f32 %f542, %f540, %f541, %f901; | |
mov.f32 %f543, 0f33A22168; | |
fma.rn.f32 %f544, %f540, %f543, %f542; | |
mov.f32 %f545, 0f27C234C5; | |
fma.rn.f32 %f902, %f540, %f545, %f544; | |
abs.f32 %f546, %f901; | |
add.s64 %rd50, %rd49, 24; | |
setp.leu.f32 %p114, %f546, 0f47CE4780; | |
@%p114 bra BB6_207; | |
mov.b32 %r283, %f901; | |
shr.u32 %r284, %r283, 23; | |
shl.b32 %r944, %r283, 8; | |
or.b32 %r285, %r944, -2147483648; | |
mov.u32 %r1401, 0; | |
mov.u64 %rd163, __cudart_i2opi_f; | |
mov.u32 %r1400, -6; | |
mov.u64 %rd164, %rd49; | |
BB6_198: | |
.pragma "nounroll"; | |
ld.const.u32 %r947, [%rd163]; | |
// inline asm | |
{ | |
mad.lo.cc.u32 %r945, %r947, %r285, %r1401; | |
madc.hi.u32 %r1401, %r947, %r285, 0; | |
} | |
// inline asm | |
st.local.u32 [%rd164], %r945; | |
add.s64 %rd164, %rd164, 4; | |
add.s64 %rd163, %rd163, 4; | |
add.s32 %r1400, %r1400, 1; | |
setp.ne.s32 %p115, %r1400, 0; | |
@%p115 bra BB6_198; | |
and.b32 %r950, %r284, 255; | |
add.s32 %r951, %r950, -128; | |
shr.u32 %r952, %r951, 5; | |
and.b32 %r290, %r283, -2147483648; | |
st.local.u32 [%rd50], %r1401; | |
mov.u32 %r953, 6; | |
sub.s32 %r954, %r953, %r952; | |
mul.wide.s32 %rd126, %r954, 4; | |
add.s64 %rd55, %rd49, %rd126; | |
ld.local.u32 %r1402, [%rd55]; | |
ld.local.u32 %r1403, [%rd55+-4]; | |
and.b32 %r293, %r284, 31; | |
setp.eq.s32 %p116, %r293, 0; | |
@%p116 bra BB6_201; | |
mov.u32 %r955, 32; | |
sub.s32 %r956, %r955, %r293; | |
shr.u32 %r957, %r1403, %r956; | |
shl.b32 %r958, %r1402, %r293; | |
add.s32 %r1402, %r957, %r958; | |
ld.local.u32 %r959, [%rd55+-8]; | |
shr.u32 %r960, %r959, %r956; | |
shl.b32 %r961, %r1403, %r293; | |
add.s32 %r1403, %r960, %r961; | |
BB6_201: | |
shr.u32 %r962, %r1403, 30; | |
shl.b32 %r963, %r1402, 2; | |
add.s32 %r1404, %r962, %r963; | |
shl.b32 %r299, %r1403, 2; | |
shr.u32 %r964, %r1404, 31; | |
shr.u32 %r965, %r1402, 30; | |
add.s32 %r300, %r964, %r965; | |
setp.eq.s32 %p117, %r964, 0; | |
@%p117 bra BB6_202; | |
not.b32 %r966, %r1404; | |
neg.s32 %r1406, %r299; | |
setp.eq.s32 %p118, %r299, 0; | |
selp.u32 %r967, 1, 0, %p118; | |
add.s32 %r1404, %r967, %r966; | |
xor.b32 %r1405, %r290, -2147483648; | |
bra.uni BB6_204; | |
BB6_202: | |
mov.u32 %r1405, %r290; | |
mov.u32 %r1406, %r299; | |
BB6_204: | |
clz.b32 %r1408, %r1404; | |
setp.eq.s32 %p119, %r1408, 0; | |
shl.b32 %r968, %r1404, %r1408; | |
mov.u32 %r969, 32; | |
sub.s32 %r970, %r969, %r1408; | |
shr.u32 %r971, %r1406, %r970; | |
add.s32 %r972, %r971, %r968; | |
selp.b32 %r308, %r1404, %r972, %p119; | |
mov.u32 %r973, -921707870; | |
mul.hi.u32 %r1407, %r308, %r973; | |
setp.eq.s32 %p120, %r290, 0; | |
neg.s32 %r974, %r300; | |
selp.b32 %r1409, %r300, %r974, %p120; | |
setp.lt.s32 %p121, %r1407, 1; | |
@%p121 bra BB6_206; | |
mul.lo.s32 %r975, %r308, -921707870; | |
shr.u32 %r976, %r975, 31; | |
shl.b32 %r977, %r1407, 1; | |
add.s32 %r1407, %r976, %r977; | |
add.s32 %r1408, %r1408, 1; | |
BB6_206: | |
mov.u32 %r978, 126; | |
sub.s32 %r979, %r978, %r1408; | |
shl.b32 %r980, %r979, 23; | |
add.s32 %r981, %r1407, 1; | |
shr.u32 %r982, %r981, 7; | |
add.s32 %r983, %r982, 1; | |
shr.u32 %r984, %r983, 1; | |
add.s32 %r985, %r984, %r980; | |
or.b32 %r986, %r985, %r1405; | |
mov.b32 %f902, %r986; | |
BB6_207: | |
mul.rn.f32 %f165, %f902, %f902; | |
and.b32 %r316, %r1409, 1; | |
setp.eq.s32 %p122, %r316, 0; | |
@%p122 bra BB6_209; | |
mov.f32 %f547, 0fBAB6061A; | |
mov.f32 %f548, 0f37CCF5CE; | |
fma.rn.f32 %f903, %f548, %f165, %f547; | |
bra.uni BB6_210; | |
BB6_209: | |
mov.f32 %f549, 0f3C08839E; | |
mov.f32 %f550, 0fB94CA1F9; | |
fma.rn.f32 %f903, %f550, %f165, %f549; | |
BB6_210: | |
@%p122 bra BB6_212; | |
mov.f32 %f551, 0f3D2AAAA5; | |
fma.rn.f32 %f552, %f903, %f165, %f551; | |
mov.f32 %f553, 0fBF000000; | |
fma.rn.f32 %f904, %f552, %f165, %f553; | |
bra.uni BB6_213; | |
BB6_212: | |
mov.f32 %f554, 0fBE2AAAA3; | |
fma.rn.f32 %f555, %f903, %f165, %f554; | |
mov.f32 %f556, 0f00000000; | |
fma.rn.f32 %f904, %f555, %f165, %f556; | |
BB6_213: | |
fma.rn.f32 %f905, %f904, %f902, %f902; | |
@%p122 bra BB6_215; | |
mov.f32 %f557, 0f3F800000; | |
fma.rn.f32 %f905, %f904, %f165, %f557; | |
BB6_215: | |
and.b32 %r987, %r1409, 2; | |
setp.eq.s32 %p125, %r987, 0; | |
@%p125 bra BB6_217; | |
mov.f32 %f558, 0f00000000; | |
mov.f32 %f559, 0fBF800000; | |
fma.rn.f32 %f905, %f905, %f559, %f558; | |
BB6_217: | |
mul.f32 %f907, %f156, 0f3F000000; | |
abs.f32 %f560, %f907; | |
setp.neu.f32 %p126, %f560, 0f7F800000; | |
@%p126 bra BB6_219; | |
mov.f32 %f561, 0f00000000; | |
mul.rn.f32 %f907, %f907, %f561; | |
BB6_219: | |
mul.f32 %f562, %f907, 0f3F22F983; | |
cvt.rni.s32.f32 %r1419, %f562; | |
cvt.rn.f32.s32 %f563, %r1419; | |
neg.f32 %f564, %f563; | |
fma.rn.f32 %f566, %f564, %f541, %f907; | |
fma.rn.f32 %f568, %f564, %f543, %f566; | |
fma.rn.f32 %f908, %f564, %f545, %f568; | |
abs.f32 %f570, %f907; | |
setp.leu.f32 %p127, %f570, 0f47CE4780; | |
@%p127 bra BB6_230; | |
mov.b32 %r318, %f907; | |
shr.u32 %r319, %r318, 23; | |
shl.b32 %r990, %r318, 8; | |
or.b32 %r320, %r990, -2147483648; | |
mov.u32 %r1411, 0; | |
mov.u64 %rd165, __cudart_i2opi_f; | |
mov.u32 %r1410, -6; | |
mov.u64 %rd166, %rd49; | |
BB6_221: | |
.pragma "nounroll"; | |
ld.const.u32 %r993, [%rd165]; | |
// inline asm | |
{ | |
mad.lo.cc.u32 %r991, %r993, %r320, %r1411; | |
madc.hi.u32 %r1411, %r993, %r320, 0; | |
} | |
// inline asm | |
st.local.u32 [%rd166], %r991; | |
add.s64 %rd166, %rd166, 4; | |
add.s64 %rd165, %rd165, 4; | |
add.s32 %r1410, %r1410, 1; | |
setp.ne.s32 %p128, %r1410, 0; | |
@%p128 bra BB6_221; | |
and.b32 %r996, %r319, 255; | |
add.s32 %r997, %r996, -128; | |
shr.u32 %r998, %r997, 5; | |
and.b32 %r325, %r318, -2147483648; | |
st.local.u32 [%rd50], %r1411; | |
mov.u32 %r999, 6; | |
sub.s32 %r1000, %r999, %r998; | |
mul.wide.s32 %rd128, %r1000, 4; | |
add.s64 %rd60, %rd49, %rd128; | |
ld.local.u32 %r1412, [%rd60]; | |
ld.local.u32 %r1413, [%rd60+-4]; | |
and.b32 %r328, %r319, 31; | |
setp.eq.s32 %p129, %r328, 0; | |
@%p129 bra BB6_224; | |
mov.u32 %r1001, 32; | |
sub.s32 %r1002, %r1001, %r328; | |
shr.u32 %r1003, %r1413, %r1002; | |
shl.b32 %r1004, %r1412, %r328; | |
add.s32 %r1412, %r1003, %r1004; | |
ld.local.u32 %r1005, [%rd60+-8]; | |
shr.u32 %r1006, %r1005, %r1002; | |
shl.b32 %r1007, %r1413, %r328; | |
add.s32 %r1413, %r1006, %r1007; | |
BB6_224: | |
shr.u32 %r1008, %r1413, 30; | |
shl.b32 %r1009, %r1412, 2; | |
add.s32 %r1414, %r1008, %r1009; | |
shl.b32 %r334, %r1413, 2; | |
shr.u32 %r1010, %r1414, 31; | |
shr.u32 %r1011, %r1412, 30; | |
add.s32 %r335, %r1010, %r1011; | |
setp.eq.s32 %p130, %r1010, 0; | |
@%p130 bra BB6_225; | |
not.b32 %r1012, %r1414; | |
neg.s32 %r1416, %r334; | |
setp.eq.s32 %p131, %r334, 0; | |
selp.u32 %r1013, 1, 0, %p131; | |
add.s32 %r1414, %r1013, %r1012; | |
xor.b32 %r1415, %r325, -2147483648; | |
bra.uni BB6_227; | |
BB6_225: | |
mov.u32 %r1415, %r325; | |
mov.u32 %r1416, %r334; | |
BB6_227: | |
clz.b32 %r1418, %r1414; | |
setp.eq.s32 %p132, %r1418, 0; | |
shl.b32 %r1014, %r1414, %r1418; | |
mov.u32 %r1015, 32; | |
sub.s32 %r1016, %r1015, %r1418; | |
shr.u32 %r1017, %r1416, %r1016; | |
add.s32 %r1018, %r1017, %r1014; | |
selp.b32 %r343, %r1414, %r1018, %p132; | |
mov.u32 %r1019, -921707870; | |
mul.hi.u32 %r1417, %r343, %r1019; | |
setp.eq.s32 %p133, %r325, 0; | |
neg.s32 %r1020, %r335; | |
selp.b32 %r1419, %r335, %r1020, %p133; | |
setp.lt.s32 %p134, %r1417, 1; | |
@%p134 bra BB6_229; | |
mul.lo.s32 %r1021, %r343, -921707870; | |
shr.u32 %r1022, %r1021, 31; | |
shl.b32 %r1023, %r1417, 1; | |
add.s32 %r1417, %r1022, %r1023; | |
add.s32 %r1418, %r1418, 1; | |
BB6_229: | |
mov.u32 %r1024, 126; | |
sub.s32 %r1025, %r1024, %r1418; | |
shl.b32 %r1026, %r1025, 23; | |
add.s32 %r1027, %r1417, 1; | |
shr.u32 %r1028, %r1027, 7; | |
add.s32 %r1029, %r1028, 1; | |
shr.u32 %r1030, %r1029, 1; | |
add.s32 %r1031, %r1030, %r1026; | |
or.b32 %r1032, %r1031, %r1415; | |
mov.b32 %f908, %r1032; | |
BB6_230: | |
mul.rn.f32 %f183, %f908, %f908; | |
and.b32 %r351, %r1419, 1; | |
setp.eq.s32 %p135, %r351, 0; | |
@%p135 bra BB6_232; | |
mov.f32 %f571, 0fBAB6061A; | |
mov.f32 %f572, 0f37CCF5CE; | |
fma.rn.f32 %f909, %f572, %f183, %f571; | |
bra.uni BB6_233; | |
BB6_232: | |
mov.f32 %f573, 0f3C08839E; | |
mov.f32 %f574, 0fB94CA1F9; | |
fma.rn.f32 %f909, %f574, %f183, %f573; | |
BB6_233: | |
@%p135 bra BB6_235; | |
mov.f32 %f575, 0f3D2AAAA5; | |
fma.rn.f32 %f576, %f909, %f183, %f575; | |
mov.f32 %f577, 0fBF000000; | |
fma.rn.f32 %f910, %f576, %f183, %f577; | |
bra.uni BB6_236; | |
BB6_235: | |
mov.f32 %f578, 0fBE2AAAA3; | |
fma.rn.f32 %f579, %f909, %f183, %f578; | |
mov.f32 %f580, 0f00000000; | |
fma.rn.f32 %f910, %f579, %f183, %f580; | |
BB6_236: | |
fma.rn.f32 %f911, %f910, %f908, %f908; | |
@%p135 bra BB6_238; | |
mov.f32 %f581, 0f3F800000; | |
fma.rn.f32 %f911, %f910, %f183, %f581; | |
BB6_238: | |
and.b32 %r1033, %r1419, 2; | |
setp.eq.s32 %p138, %r1033, 0; | |
@%p138 bra BB6_240; | |
mov.f32 %f582, 0f00000000; | |
mov.f32 %f583, 0fBF800000; | |
fma.rn.f32 %f911, %f911, %f583, %f582; | |
BB6_240: | |
mul.f32 %f584, %f156, %f156; | |
mul.f32 %f585, %f584, 0f3F000000; | |
mul.f32 %f586, %f905, %f911; | |
div.rn.f32 %f913, %f586, %f585; | |
BB6_241: | |
setp.eq.f32 %p139, %f157, 0f00000000; | |
mov.f32 %f926, %f952; | |
@%p139 bra BB6_289; | |
add.u64 %rd61, %SPL, 0; | |
abs.f32 %f588, %f157; | |
setp.neu.f32 %p140, %f588, 0f7F800000; | |
mov.f32 %f914, %f157; | |
@%p140 bra BB6_244; | |
mov.f32 %f589, 0f00000000; | |
mul.rn.f32 %f914, %f157, %f589; | |
BB6_244: | |
mul.f32 %f590, %f914, 0f3F22F983; | |
cvt.rni.s32.f32 %r1429, %f590; | |
cvt.rn.f32.s32 %f591, %r1429; | |
neg.f32 %f592, %f591; | |
mov.f32 %f593, 0f3FC90FDA; | |
fma.rn.f32 %f594, %f592, %f593, %f914; | |
mov.f32 %f595, 0f33A22168; | |
fma.rn.f32 %f596, %f592, %f595, %f594; | |
mov.f32 %f597, 0f27C234C5; | |
fma.rn.f32 %f915, %f592, %f597, %f596; | |
abs.f32 %f598, %f914; | |
add.s64 %rd62, %rd61, 24; | |
setp.leu.f32 %p141, %f598, 0f47CE4780; | |
@%p141 bra BB6_255; | |
mov.b32 %r353, %f914; | |
shr.u32 %r354, %r353, 23; | |
shl.b32 %r1036, %r353, 8; | |
or.b32 %r355, %r1036, -2147483648; | |
mov.u32 %r1421, 0; | |
mov.u64 %rd167, __cudart_i2opi_f; | |
mov.u32 %r1420, -6; | |
mov.u64 %rd168, %rd61; | |
BB6_246: | |
.pragma "nounroll"; | |
ld.const.u32 %r1039, [%rd167]; | |
// inline asm | |
{ | |
mad.lo.cc.u32 %r1037, %r1039, %r355, %r1421; | |
madc.hi.u32 %r1421, %r1039, %r355, 0; | |
} | |
// inline asm | |
st.local.u32 [%rd168], %r1037; | |
add.s64 %rd168, %rd168, 4; | |
add.s64 %rd167, %rd167, 4; | |
add.s32 %r1420, %r1420, 1; | |
setp.ne.s32 %p142, %r1420, 0; | |
@%p142 bra BB6_246; | |
and.b32 %r1042, %r354, 255; | |
add.s32 %r1043, %r1042, -128; | |
shr.u32 %r1044, %r1043, 5; | |
and.b32 %r360, %r353, -2147483648; | |
st.local.u32 [%rd62], %r1421; | |
mov.u32 %r1045, 6; | |
sub.s32 %r1046, %r1045, %r1044; | |
mul.wide.s32 %rd131, %r1046, 4; | |
add.s64 %rd67, %rd61, %rd131; | |
ld.local.u32 %r1422, [%rd67]; | |
ld.local.u32 %r1423, [%rd67+-4]; | |
and.b32 %r363, %r354, 31; | |
setp.eq.s32 %p143, %r363, 0; | |
@%p143 bra BB6_249; | |
mov.u32 %r1047, 32; | |
sub.s32 %r1048, %r1047, %r363; | |
shr.u32 %r1049, %r1423, %r1048; | |
shl.b32 %r1050, %r1422, %r363; | |
add.s32 %r1422, %r1049, %r1050; | |
ld.local.u32 %r1051, [%rd67+-8]; | |
shr.u32 %r1052, %r1051, %r1048; | |
shl.b32 %r1053, %r1423, %r363; | |
add.s32 %r1423, %r1052, %r1053; | |
BB6_249: | |
shr.u32 %r1054, %r1423, 30; | |
shl.b32 %r1055, %r1422, 2; | |
add.s32 %r1424, %r1054, %r1055; | |
shl.b32 %r369, %r1423, 2; | |
shr.u32 %r1056, %r1424, 31; | |
shr.u32 %r1057, %r1422, 30; | |
add.s32 %r370, %r1056, %r1057; | |
setp.eq.s32 %p144, %r1056, 0; | |
@%p144 bra BB6_250; | |
not.b32 %r1058, %r1424; | |
neg.s32 %r1426, %r369; | |
setp.eq.s32 %p145, %r369, 0; | |
selp.u32 %r1059, 1, 0, %p145; | |
add.s32 %r1424, %r1059, %r1058; | |
xor.b32 %r1425, %r360, -2147483648; | |
bra.uni BB6_252; | |
BB6_250: | |
mov.u32 %r1425, %r360; | |
mov.u32 %r1426, %r369; | |
BB6_252: | |
clz.b32 %r1428, %r1424; | |
setp.eq.s32 %p146, %r1428, 0; | |
shl.b32 %r1060, %r1424, %r1428; | |
mov.u32 %r1061, 32; | |
sub.s32 %r1062, %r1061, %r1428; | |
shr.u32 %r1063, %r1426, %r1062; | |
add.s32 %r1064, %r1063, %r1060; | |
selp.b32 %r378, %r1424, %r1064, %p146; | |
mov.u32 %r1065, -921707870; | |
mul.hi.u32 %r1427, %r378, %r1065; | |
setp.eq.s32 %p147, %r360, 0; | |
neg.s32 %r1066, %r370; | |
selp.b32 %r1429, %r370, %r1066, %p147; | |
setp.lt.s32 %p148, %r1427, 1; | |
@%p148 bra BB6_254; | |
mul.lo.s32 %r1067, %r378, -921707870; | |
shr.u32 %r1068, %r1067, 31; | |
shl.b32 %r1069, %r1427, 1; | |
add.s32 %r1427, %r1068, %r1069; | |
add.s32 %r1428, %r1428, 1; | |
BB6_254: | |
mov.u32 %r1070, 126; | |
sub.s32 %r1071, %r1070, %r1428; | |
shl.b32 %r1072, %r1071, 23; | |
add.s32 %r1073, %r1427, 1; | |
shr.u32 %r1074, %r1073, 7; | |
add.s32 %r1075, %r1074, 1; | |
shr.u32 %r1076, %r1075, 1; | |
add.s32 %r1077, %r1076, %r1072; | |
or.b32 %r1078, %r1077, %r1425; | |
mov.b32 %f915, %r1078; | |
BB6_255: | |
mul.rn.f32 %f202, %f915, %f915; | |
and.b32 %r386, %r1429, 1; | |
setp.eq.s32 %p149, %r386, 0; | |
@%p149 bra BB6_257; | |
mov.f32 %f599, 0fBAB6061A; | |
mov.f32 %f600, 0f37CCF5CE; | |
fma.rn.f32 %f916, %f600, %f202, %f599; | |
bra.uni BB6_258; | |
BB6_257: | |
mov.f32 %f601, 0f3C08839E; | |
mov.f32 %f602, 0fB94CA1F9; | |
fma.rn.f32 %f916, %f602, %f202, %f601; | |
BB6_258: | |
@%p149 bra BB6_260; | |
mov.f32 %f603, 0f3D2AAAA5; | |
fma.rn.f32 %f604, %f916, %f202, %f603; | |
mov.f32 %f605, 0fBF000000; | |
fma.rn.f32 %f917, %f604, %f202, %f605; | |
bra.uni BB6_261; | |
BB6_260: | |
mov.f32 %f606, 0fBE2AAAA3; | |
fma.rn.f32 %f607, %f916, %f202, %f606; | |
mov.f32 %f608, 0f00000000; | |
fma.rn.f32 %f917, %f607, %f202, %f608; | |
BB6_261: | |
fma.rn.f32 %f918, %f917, %f915, %f915; | |
@%p149 bra BB6_263; | |
mov.f32 %f609, 0f3F800000; | |
fma.rn.f32 %f918, %f917, %f202, %f609; | |
BB6_263: | |
and.b32 %r1079, %r1429, 2; | |
setp.eq.s32 %p152, %r1079, 0; | |
@%p152 bra BB6_265; | |
mov.f32 %f610, 0f00000000; | |
mov.f32 %f611, 0fBF800000; | |
fma.rn.f32 %f918, %f918, %f611, %f610; | |
BB6_265: | |
mul.f32 %f920, %f157, 0f3F000000; | |
abs.f32 %f612, %f920; | |
setp.neu.f32 %p153, %f612, 0f7F800000; | |
@%p153 bra BB6_267; | |
mov.f32 %f613, 0f00000000; | |
mul.rn.f32 %f920, %f920, %f613; | |
BB6_267: | |
mul.f32 %f614, %f920, 0f3F22F983; | |
cvt.rni.s32.f32 %r1439, %f614; | |
cvt.rn.f32.s32 %f615, %r1439; | |
neg.f32 %f616, %f615; | |
fma.rn.f32 %f618, %f616, %f593, %f920; | |
fma.rn.f32 %f620, %f616, %f595, %f618; | |
fma.rn.f32 %f921, %f616, %f597, %f620; | |
abs.f32 %f622, %f920; | |
setp.leu.f32 %p154, %f622, 0f47CE4780; | |
@%p154 bra BB6_278; | |
mov.b32 %r388, %f920; | |
shr.u32 %r389, %r388, 23; | |
shl.b32 %r1082, %r388, 8; | |
or.b32 %r390, %r1082, -2147483648; | |
mov.u32 %r1431, 0; | |
mov.u64 %rd169, __cudart_i2opi_f; | |
mov.u32 %r1430, -6; | |
mov.u64 %rd170, %rd61; | |
BB6_269: | |
.pragma "nounroll"; | |
ld.const.u32 %r1085, [%rd169]; | |
// inline asm | |
{ | |
mad.lo.cc.u32 %r1083, %r1085, %r390, %r1431; | |
madc.hi.u32 %r1431, %r1085, %r390, 0; | |
} | |
// inline asm | |
st.local.u32 [%rd170], %r1083; | |
add.s64 %rd170, %rd170, 4; | |
add.s64 %rd169, %rd169, 4; | |
add.s32 %r1430, %r1430, 1; | |
setp.ne.s32 %p155, %r1430, 0; | |
@%p155 bra BB6_269; | |
and.b32 %r1088, %r389, 255; | |
add.s32 %r1089, %r1088, -128; | |
shr.u32 %r1090, %r1089, 5; | |
and.b32 %r395, %r388, -2147483648; | |
st.local.u32 [%rd62], %r1431; | |
mov.u32 %r1091, 6; | |
sub.s32 %r1092, %r1091, %r1090; | |
mul.wide.s32 %rd133, %r1092, 4; | |
add.s64 %rd72, %rd61, %rd133; | |
ld.local.u32 %r1432, [%rd72]; | |
ld.local.u32 %r1433, [%rd72+-4]; | |
and.b32 %r398, %r389, 31; | |
setp.eq.s32 %p156, %r398, 0; | |
@%p156 bra BB6_272; | |
mov.u32 %r1093, 32; | |
sub.s32 %r1094, %r1093, %r398; | |
shr.u32 %r1095, %r1433, %r1094; | |
shl.b32 %r1096, %r1432, %r398; | |
add.s32 %r1432, %r1095, %r1096; | |
ld.local.u32 %r1097, [%rd72+-8]; | |
shr.u32 %r1098, %r1097, %r1094; | |
shl.b32 %r1099, %r1433, %r398; | |
add.s32 %r1433, %r1098, %r1099; | |
BB6_272: | |
shr.u32 %r1100, %r1433, 30; | |
shl.b32 %r1101, %r1432, 2; | |
add.s32 %r1434, %r1100, %r1101; | |
shl.b32 %r404, %r1433, 2; | |
shr.u32 %r1102, %r1434, 31; | |
shr.u32 %r1103, %r1432, 30; | |
add.s32 %r405, %r1102, %r1103; | |
setp.eq.s32 %p157, %r1102, 0; | |
@%p157 bra BB6_273; | |
not.b32 %r1104, %r1434; | |
neg.s32 %r1436, %r404; | |
setp.eq.s32 %p158, %r404, 0; | |
selp.u32 %r1105, 1, 0, %p158; | |
add.s32 %r1434, %r1105, %r1104; | |
xor.b32 %r1435, %r395, -2147483648; | |
bra.uni BB6_275; | |
BB6_273: | |
mov.u32 %r1435, %r395; | |
mov.u32 %r1436, %r404; | |
BB6_275: | |
clz.b32 %r1438, %r1434; | |
setp.eq.s32 %p159, %r1438, 0; | |
shl.b32 %r1106, %r1434, %r1438; | |
mov.u32 %r1107, 32; | |
sub.s32 %r1108, %r1107, %r1438; | |
shr.u32 %r1109, %r1436, %r1108; | |
add.s32 %r1110, %r1109, %r1106; | |
selp.b32 %r413, %r1434, %r1110, %p159; | |
mov.u32 %r1111, -921707870; | |
mul.hi.u32 %r1437, %r413, %r1111; | |
setp.eq.s32 %p160, %r395, 0; | |
neg.s32 %r1112, %r405; | |
selp.b32 %r1439, %r405, %r1112, %p160; | |
setp.lt.s32 %p161, %r1437, 1; | |
@%p161 bra BB6_277; | |
mul.lo.s32 %r1113, %r413, -921707870; | |
shr.u32 %r1114, %r1113, 31; | |
shl.b32 %r1115, %r1437, 1; | |
add.s32 %r1437, %r1114, %r1115; | |
add.s32 %r1438, %r1438, 1; | |
BB6_277: | |
mov.u32 %r1116, 126; | |
sub.s32 %r1117, %r1116, %r1438; | |
shl.b32 %r1118, %r1117, 23; | |
add.s32 %r1119, %r1437, 1; | |
shr.u32 %r1120, %r1119, 7; | |
add.s32 %r1121, %r1120, 1; | |
shr.u32 %r1122, %r1121, 1; | |
add.s32 %r1123, %r1122, %r1118; | |
or.b32 %r1124, %r1123, %r1435; | |
mov.b32 %f921, %r1124; | |
BB6_278: | |
mul.rn.f32 %f220, %f921, %f921; | |
and.b32 %r421, %r1439, 1; | |
setp.eq.s32 %p162, %r421, 0; | |
@%p162 bra BB6_280; | |
mov.f32 %f623, 0fBAB6061A; | |
mov.f32 %f624, 0f37CCF5CE; | |
fma.rn.f32 %f922, %f624, %f220, %f623; | |
bra.uni BB6_281; | |
BB6_280: | |
mov.f32 %f625, 0f3C08839E; | |
mov.f32 %f626, 0fB94CA1F9; | |
fma.rn.f32 %f922, %f626, %f220, %f625; | |
BB6_281: | |
@%p162 bra BB6_283; | |
mov.f32 %f627, 0f3D2AAAA5; | |
fma.rn.f32 %f628, %f922, %f220, %f627; | |
mov.f32 %f629, 0fBF000000; | |
fma.rn.f32 %f923, %f628, %f220, %f629; | |
bra.uni BB6_284; | |
BB6_283: | |
mov.f32 %f630, 0fBE2AAAA3; | |
fma.rn.f32 %f631, %f922, %f220, %f630; | |
mov.f32 %f632, 0f00000000; | |
fma.rn.f32 %f923, %f631, %f220, %f632; | |
BB6_284: | |
fma.rn.f32 %f924, %f923, %f921, %f921; | |
@%p162 bra BB6_286; | |
mov.f32 %f633, 0f3F800000; | |
fma.rn.f32 %f924, %f923, %f220, %f633; | |
BB6_286: | |
and.b32 %r1125, %r1439, 2; | |
setp.eq.s32 %p165, %r1125, 0; | |
@%p165 bra BB6_288; | |
mov.f32 %f634, 0f00000000; | |
mov.f32 %f635, 0fBF800000; | |
fma.rn.f32 %f924, %f924, %f635, %f634; | |
BB6_288: | |
mul.f32 %f636, %f157, %f157; | |
mul.f32 %f637, %f636, 0f3F000000; | |
mul.f32 %f638, %f918, %f924; | |
div.rn.f32 %f926, %f638, %f637; | |
BB6_289: | |
setp.eq.f32 %p166, %f158, 0f00000000; | |
mov.f32 %f939, %f952; | |
@%p166 bra BB6_337; | |
add.u64 %rd73, %SPL, 0; | |
abs.f32 %f640, %f158; | |
setp.neu.f32 %p167, %f640, 0f7F800000; | |
mov.f32 %f927, %f158; | |
@%p167 bra BB6_292; | |
mov.f32 %f641, 0f00000000; | |
mul.rn.f32 %f927, %f158, %f641; | |
BB6_292: | |
mul.f32 %f642, %f927, 0f3F22F983; | |
cvt.rni.s32.f32 %r1449, %f642; | |
cvt.rn.f32.s32 %f643, %r1449; | |
neg.f32 %f644, %f643; | |
mov.f32 %f645, 0f3FC90FDA; | |
fma.rn.f32 %f646, %f644, %f645, %f927; | |
mov.f32 %f647, 0f33A22168; | |
fma.rn.f32 %f648, %f644, %f647, %f646; | |
mov.f32 %f649, 0f27C234C5; | |
fma.rn.f32 %f928, %f644, %f649, %f648; | |
abs.f32 %f650, %f927; | |
add.s64 %rd74, %rd73, 24; | |
setp.leu.f32 %p168, %f650, 0f47CE4780; | |
@%p168 bra BB6_303; | |
mov.b32 %r423, %f927; | |
shr.u32 %r424, %r423, 23; | |
shl.b32 %r1128, %r423, 8; | |
or.b32 %r425, %r1128, -2147483648; | |
mov.u32 %r1441, 0; | |
mov.u64 %rd171, __cudart_i2opi_f; | |
mov.u32 %r1440, -6; | |
mov.u64 %rd172, %rd73; | |
BB6_294: | |
.pragma "nounroll"; | |
ld.const.u32 %r1131, [%rd171]; | |
// inline asm | |
{ | |
mad.lo.cc.u32 %r1129, %r1131, %r425, %r1441; | |
madc.hi.u32 %r1441, %r1131, %r425, 0; | |
} | |
// inline asm | |
st.local.u32 [%rd172], %r1129; | |
add.s64 %rd172, %rd172, 4; | |
add.s64 %rd171, %rd171, 4; | |
add.s32 %r1440, %r1440, 1; | |
setp.ne.s32 %p169, %r1440, 0; | |
@%p169 bra BB6_294; | |
and.b32 %r1134, %r424, 255; | |
add.s32 %r1135, %r1134, -128; | |
shr.u32 %r1136, %r1135, 5; | |
and.b32 %r430, %r423, -2147483648; | |
st.local.u32 [%rd74], %r1441; | |
mov.u32 %r1137, 6; | |
sub.s32 %r1138, %r1137, %r1136; | |
mul.wide.s32 %rd136, %r1138, 4; | |
add.s64 %rd79, %rd73, %rd136; | |
ld.local.u32 %r1442, [%rd79]; | |
ld.local.u32 %r1443, [%rd79+-4]; | |
and.b32 %r433, %r424, 31; | |
setp.eq.s32 %p170, %r433, 0; | |
@%p170 bra BB6_297; | |
mov.u32 %r1139, 32; | |
sub.s32 %r1140, %r1139, %r433; | |
shr.u32 %r1141, %r1443, %r1140; | |
shl.b32 %r1142, %r1442, %r433; | |
add.s32 %r1442, %r1141, %r1142; | |
ld.local.u32 %r1143, [%rd79+-8]; | |
shr.u32 %r1144, %r1143, %r1140; | |
shl.b32 %r1145, %r1443, %r433; | |
add.s32 %r1443, %r1144, %r1145; | |
BB6_297: | |
shr.u32 %r1146, %r1443, 30; | |
shl.b32 %r1147, %r1442, 2; | |
add.s32 %r1444, %r1146, %r1147; | |
shl.b32 %r439, %r1443, 2; | |
shr.u32 %r1148, %r1444, 31; | |
shr.u32 %r1149, %r1442, 30; | |
add.s32 %r440, %r1148, %r1149; | |
setp.eq.s32 %p171, %r1148, 0; | |
@%p171 bra BB6_298; | |
not.b32 %r1150, %r1444; | |
neg.s32 %r1446, %r439; | |
setp.eq.s32 %p172, %r439, 0; | |
selp.u32 %r1151, 1, 0, %p172; | |
add.s32 %r1444, %r1151, %r1150; | |
xor.b32 %r1445, %r430, -2147483648; | |
bra.uni BB6_300; | |
BB6_298: | |
mov.u32 %r1445, %r430; | |
mov.u32 %r1446, %r439; | |
BB6_300: | |
clz.b32 %r1448, %r1444; | |
setp.eq.s32 %p173, %r1448, 0; | |
shl.b32 %r1152, %r1444, %r1448; | |
mov.u32 %r1153, 32; | |
sub.s32 %r1154, %r1153, %r1448; | |
shr.u32 %r1155, %r1446, %r1154; | |
add.s32 %r1156, %r1155, %r1152; | |
selp.b32 %r448, %r1444, %r1156, %p173; | |
mov.u32 %r1157, -921707870; | |
mul.hi.u32 %r1447, %r448, %r1157; | |
setp.eq.s32 %p174, %r430, 0; | |
neg.s32 %r1158, %r440; | |
selp.b32 %r1449, %r440, %r1158, %p174; | |
setp.lt.s32 %p175, %r1447, 1; | |
@%p175 bra BB6_302; | |
mul.lo.s32 %r1159, %r448, -921707870; | |
shr.u32 %r1160, %r1159, 31; | |
shl.b32 %r1161, %r1447, 1; | |
add.s32 %r1447, %r1160, %r1161; | |
add.s32 %r1448, %r1448, 1; | |
BB6_302: | |
mov.u32 %r1162, 126; | |
sub.s32 %r1163, %r1162, %r1448; | |
shl.b32 %r1164, %r1163, 23; | |
add.s32 %r1165, %r1447, 1; | |
shr.u32 %r1166, %r1165, 7; | |
add.s32 %r1167, %r1166, 1; | |
shr.u32 %r1168, %r1167, 1; | |
add.s32 %r1169, %r1168, %r1164; | |
or.b32 %r1170, %r1169, %r1445; | |
mov.b32 %f928, %r1170; | |
BB6_303: | |
mul.rn.f32 %f239, %f928, %f928; | |
and.b32 %r456, %r1449, 1; | |
setp.eq.s32 %p176, %r456, 0; | |
@%p176 bra BB6_305; | |
mov.f32 %f651, 0fBAB6061A; | |
mov.f32 %f652, 0f37CCF5CE; | |
fma.rn.f32 %f929, %f652, %f239, %f651; | |
bra.uni BB6_306; | |
BB6_305: | |
mov.f32 %f653, 0f3C08839E; | |
mov.f32 %f654, 0fB94CA1F9; | |
fma.rn.f32 %f929, %f654, %f239, %f653; | |
BB6_306: | |
@%p176 bra BB6_308; | |
mov.f32 %f655, 0f3D2AAAA5; | |
fma.rn.f32 %f656, %f929, %f239, %f655; | |
mov.f32 %f657, 0fBF000000; | |
fma.rn.f32 %f930, %f656, %f239, %f657; | |
bra.uni BB6_309; | |
BB6_308: | |
mov.f32 %f658, 0fBE2AAAA3; | |
fma.rn.f32 %f659, %f929, %f239, %f658; | |
mov.f32 %f660, 0f00000000; | |
fma.rn.f32 %f930, %f659, %f239, %f660; | |
BB6_309: | |
fma.rn.f32 %f931, %f930, %f928, %f928; | |
@%p176 bra BB6_311; | |
mov.f32 %f661, 0f3F800000; | |
fma.rn.f32 %f931, %f930, %f239, %f661; | |
BB6_311: | |
and.b32 %r1171, %r1449, 2; | |
setp.eq.s32 %p179, %r1171, 0; | |
@%p179 bra BB6_313; | |
mov.f32 %f662, 0f00000000; | |
mov.f32 %f663, 0fBF800000; | |
fma.rn.f32 %f931, %f931, %f663, %f662; | |
BB6_313: | |
mul.f32 %f933, %f158, 0f3F000000; | |
abs.f32 %f664, %f933; | |
setp.neu.f32 %p180, %f664, 0f7F800000; | |
@%p180 bra BB6_315; | |
mov.f32 %f665, 0f00000000; | |
mul.rn.f32 %f933, %f933, %f665; | |
BB6_315: | |
mul.f32 %f666, %f933, 0f3F22F983; | |
cvt.rni.s32.f32 %r1459, %f666; | |
cvt.rn.f32.s32 %f667, %r1459; | |
neg.f32 %f668, %f667; | |
fma.rn.f32 %f670, %f668, %f645, %f933; | |
fma.rn.f32 %f672, %f668, %f647, %f670; | |
fma.rn.f32 %f934, %f668, %f649, %f672; | |
abs.f32 %f674, %f933; | |
setp.leu.f32 %p181, %f674, 0f47CE4780; | |
@%p181 bra BB6_326; | |
mov.b32 %r458, %f933; | |
shr.u32 %r459, %r458, 23; | |
shl.b32 %r1174, %r458, 8; | |
or.b32 %r460, %r1174, -2147483648; | |
mov.u32 %r1451, 0; | |
mov.u64 %rd173, __cudart_i2opi_f; | |
mov.u32 %r1450, -6; | |
mov.u64 %rd174, %rd73; | |
BB6_317: | |
.pragma "nounroll"; | |
ld.const.u32 %r1177, [%rd173]; | |
// inline asm | |
{ | |
mad.lo.cc.u32 %r1175, %r1177, %r460, %r1451; | |
madc.hi.u32 %r1451, %r1177, %r460, 0; | |
} | |
// inline asm | |
st.local.u32 [%rd174], %r1175; | |
add.s64 %rd174, %rd174, 4; | |
add.s64 %rd173, %rd173, 4; | |
add.s32 %r1450, %r1450, 1; | |
setp.ne.s32 %p182, %r1450, 0; | |
@%p182 bra BB6_317; | |
and.b32 %r1180, %r459, 255; | |
add.s32 %r1181, %r1180, -128; | |
shr.u32 %r1182, %r1181, 5; | |
and.b32 %r465, %r458, -2147483648; | |
st.local.u32 [%rd74], %r1451; | |
mov.u32 %r1183, 6; | |
sub.s32 %r1184, %r1183, %r1182; | |
mul.wide.s32 %rd138, %r1184, 4; | |
add.s64 %rd84, %rd73, %rd138; | |
ld.local.u32 %r1452, [%rd84]; | |
ld.local.u32 %r1453, [%rd84+-4]; | |
and.b32 %r468, %r459, 31; | |
setp.eq.s32 %p183, %r468, 0; | |
@%p183 bra BB6_320; | |
mov.u32 %r1185, 32; | |
sub.s32 %r1186, %r1185, %r468; | |
shr.u32 %r1187, %r1453, %r1186; | |
shl.b32 %r1188, %r1452, %r468; | |
add.s32 %r1452, %r1187, %r1188; | |
ld.local.u32 %r1189, [%rd84+-8]; | |
shr.u32 %r1190, %r1189, %r1186; | |
shl.b32 %r1191, %r1453, %r468; | |
add.s32 %r1453, %r1190, %r1191; | |
BB6_320: | |
shr.u32 %r1192, %r1453, 30; | |
shl.b32 %r1193, %r1452, 2; | |
add.s32 %r1454, %r1192, %r1193; | |
shl.b32 %r474, %r1453, 2; | |
shr.u32 %r1194, %r1454, 31; | |
shr.u32 %r1195, %r1452, 30; | |
add.s32 %r475, %r1194, %r1195; | |
setp.eq.s32 %p184, %r1194, 0; | |
@%p184 bra BB6_321; | |
not.b32 %r1196, %r1454; | |
neg.s32 %r1456, %r474; | |
setp.eq.s32 %p185, %r474, 0; | |
selp.u32 %r1197, 1, 0, %p185; | |
add.s32 %r1454, %r1197, %r1196; | |
xor.b32 %r1455, %r465, -2147483648; | |
bra.uni BB6_323; | |
BB6_321: | |
mov.u32 %r1455, %r465; | |
mov.u32 %r1456, %r474; | |
BB6_323: | |
clz.b32 %r1458, %r1454; | |
setp.eq.s32 %p186, %r1458, 0; | |
shl.b32 %r1198, %r1454, %r1458; | |
mov.u32 %r1199, 32; | |
sub.s32 %r1200, %r1199, %r1458; | |
shr.u32 %r1201, %r1456, %r1200; | |
add.s32 %r1202, %r1201, %r1198; | |
selp.b32 %r483, %r1454, %r1202, %p186; | |
mov.u32 %r1203, -921707870; | |
mul.hi.u32 %r1457, %r483, %r1203; | |
setp.eq.s32 %p187, %r465, 0; | |
neg.s32 %r1204, %r475; | |
selp.b32 %r1459, %r475, %r1204, %p187; | |
setp.lt.s32 %p188, %r1457, 1; | |
@%p188 bra BB6_325; | |
mul.lo.s32 %r1205, %r483, -921707870; | |
shr.u32 %r1206, %r1205, 31; | |
shl.b32 %r1207, %r1457, 1; | |
add.s32 %r1457, %r1206, %r1207; | |
add.s32 %r1458, %r1458, 1; | |
BB6_325: | |
mov.u32 %r1208, 126; | |
sub.s32 %r1209, %r1208, %r1458; | |
shl.b32 %r1210, %r1209, 23; | |
add.s32 %r1211, %r1457, 1; | |
shr.u32 %r1212, %r1211, 7; | |
add.s32 %r1213, %r1212, 1; | |
shr.u32 %r1214, %r1213, 1; | |
add.s32 %r1215, %r1214, %r1210; | |
or.b32 %r1216, %r1215, %r1455; | |
mov.b32 %f934, %r1216; | |
BB6_326: | |
mul.rn.f32 %f257, %f934, %f934; | |
and.b32 %r491, %r1459, 1; | |
setp.eq.s32 %p189, %r491, 0; | |
@%p189 bra BB6_328; | |
mov.f32 %f675, 0fBAB6061A; | |
mov.f32 %f676, 0f37CCF5CE; | |
fma.rn.f32 %f935, %f676, %f257, %f675; | |
bra.uni BB6_329; | |
BB6_328: | |
mov.f32 %f677, 0f3C08839E; | |
mov.f32 %f678, 0fB94CA1F9; | |
fma.rn.f32 %f935, %f678, %f257, %f677; | |
BB6_329: | |
@%p189 bra BB6_331; | |
mov.f32 %f679, 0f3D2AAAA5; | |
fma.rn.f32 %f680, %f935, %f257, %f679; | |
mov.f32 %f681, 0fBF000000; | |
fma.rn.f32 %f936, %f680, %f257, %f681; | |
bra.uni BB6_332; | |
BB6_331: | |
mov.f32 %f682, 0fBE2AAAA3; | |
fma.rn.f32 %f683, %f935, %f257, %f682; | |
mov.f32 %f684, 0f00000000; | |
fma.rn.f32 %f936, %f683, %f257, %f684; | |
BB6_332: | |
fma.rn.f32 %f937, %f936, %f934, %f934; | |
@%p189 bra BB6_334; | |
mov.f32 %f685, 0f3F800000; | |
fma.rn.f32 %f937, %f936, %f257, %f685; | |
BB6_334: | |
and.b32 %r1217, %r1459, 2; | |
setp.eq.s32 %p192, %r1217, 0; | |
@%p192 bra BB6_336; | |
mov.f32 %f686, 0f00000000; | |
mov.f32 %f687, 0fBF800000; | |
fma.rn.f32 %f937, %f937, %f687, %f686; | |
BB6_336: | |
mul.f32 %f688, %f158, %f158; | |
mul.f32 %f689, %f688, 0f3F000000; | |
mul.f32 %f690, %f931, %f937; | |
div.rn.f32 %f939, %f690, %f689; | |
BB6_337: | |
setp.eq.f32 %p193, %f159, 0f00000000; | |
@%p193 bra BB6_385; | |
add.u64 %rd85, %SPL, 0; | |
abs.f32 %f692, %f159; | |
setp.neu.f32 %p194, %f692, 0f7F800000; | |
mov.f32 %f940, %f159; | |
@%p194 bra BB6_340; | |
mov.f32 %f693, 0f00000000; | |
mul.rn.f32 %f940, %f159, %f693; | |
BB6_340: | |
mul.f32 %f694, %f940, 0f3F22F983; | |
cvt.rni.s32.f32 %r1469, %f694; | |
cvt.rn.f32.s32 %f695, %r1469; | |
neg.f32 %f696, %f695; | |
mov.f32 %f697, 0f3FC90FDA; | |
fma.rn.f32 %f698, %f696, %f697, %f940; | |
mov.f32 %f699, 0f33A22168; | |
fma.rn.f32 %f700, %f696, %f699, %f698; | |
mov.f32 %f701, 0f27C234C5; | |
fma.rn.f32 %f941, %f696, %f701, %f700; | |
abs.f32 %f702, %f940; | |
add.s64 %rd86, %rd85, 24; | |
setp.leu.f32 %p195, %f702, 0f47CE4780; | |
@%p195 bra BB6_351; | |
mov.b32 %r493, %f940; | |
shr.u32 %r494, %r493, 23; | |
shl.b32 %r1220, %r493, 8; | |
or.b32 %r495, %r1220, -2147483648; | |
mov.u32 %r1461, 0; | |
mov.u64 %rd175, __cudart_i2opi_f; | |
mov.u32 %r1460, -6; | |
mov.u64 %rd176, %rd85; | |
BB6_342: | |
.pragma "nounroll"; | |
ld.const.u32 %r1223, [%rd175]; | |
// inline asm | |
{ | |
mad.lo.cc.u32 %r1221, %r1223, %r495, %r1461; | |
madc.hi.u32 %r1461, %r1223, %r495, 0; | |
} | |
// inline asm | |
st.local.u32 [%rd176], %r1221; | |
add.s64 %rd176, %rd176, 4; | |
add.s64 %rd175, %rd175, 4; | |
add.s32 %r1460, %r1460, 1; | |
setp.ne.s32 %p196, %r1460, 0; | |
@%p196 bra BB6_342; | |
and.b32 %r1226, %r494, 255; | |
add.s32 %r1227, %r1226, -128; | |
shr.u32 %r1228, %r1227, 5; | |
and.b32 %r500, %r493, -2147483648; | |
st.local.u32 [%rd86], %r1461; | |
mov.u32 %r1229, 6; | |
sub.s32 %r1230, %r1229, %r1228; | |
mul.wide.s32 %rd141, %r1230, 4; | |
add.s64 %rd91, %rd85, %rd141; | |
ld.local.u32 %r1462, [%rd91]; | |
ld.local.u32 %r1463, [%rd91+-4]; | |
and.b32 %r503, %r494, 31; | |
setp.eq.s32 %p197, %r503, 0; | |
@%p197 bra BB6_345; | |
mov.u32 %r1231, 32; | |
sub.s32 %r1232, %r1231, %r503; | |
shr.u32 %r1233, %r1463, %r1232; | |
shl.b32 %r1234, %r1462, %r503; | |
add.s32 %r1462, %r1233, %r1234; | |
ld.local.u32 %r1235, [%rd91+-8]; | |
shr.u32 %r1236, %r1235, %r1232; | |
shl.b32 %r1237, %r1463, %r503; | |
add.s32 %r1463, %r1236, %r1237; | |
BB6_345: | |
shr.u32 %r1238, %r1463, 30; | |
shl.b32 %r1239, %r1462, 2; | |
add.s32 %r1464, %r1238, %r1239; | |
shl.b32 %r509, %r1463, 2; | |
shr.u32 %r1240, %r1464, 31; | |
shr.u32 %r1241, %r1462, 30; | |
add.s32 %r510, %r1240, %r1241; | |
setp.eq.s32 %p198, %r1240, 0; | |
@%p198 bra BB6_346; | |
not.b32 %r1242, %r1464; | |
neg.s32 %r1466, %r509; | |
setp.eq.s32 %p199, %r509, 0; | |
selp.u32 %r1243, 1, 0, %p199; | |
add.s32 %r1464, %r1243, %r1242; | |
xor.b32 %r1465, %r500, -2147483648; | |
bra.uni BB6_348; | |
BB6_346: | |
mov.u32 %r1465, %r500; | |
mov.u32 %r1466, %r509; | |
BB6_348: | |
clz.b32 %r1468, %r1464; | |
setp.eq.s32 %p200, %r1468, 0; | |
shl.b32 %r1244, %r1464, %r1468; | |
mov.u32 %r1245, 32; | |
sub.s32 %r1246, %r1245, %r1468; | |
shr.u32 %r1247, %r1466, %r1246; | |
add.s32 %r1248, %r1247, %r1244; | |
selp.b32 %r518, %r1464, %r1248, %p200; | |
mov.u32 %r1249, -921707870; | |
mul.hi.u32 %r1467, %r518, %r1249; | |
setp.eq.s32 %p201, %r500, 0; | |
neg.s32 %r1250, %r510; | |
selp.b32 %r1469, %r510, %r1250, %p201; | |
setp.lt.s32 %p202, %r1467, 1; | |
@%p202 bra BB6_350; | |
mul.lo.s32 %r1251, %r518, -921707870; | |
shr.u32 %r1252, %r1251, 31; | |
shl.b32 %r1253, %r1467, 1; | |
add.s32 %r1467, %r1252, %r1253; | |
add.s32 %r1468, %r1468, 1; | |
BB6_350: | |
mov.u32 %r1254, 126; | |
sub.s32 %r1255, %r1254, %r1468; | |
shl.b32 %r1256, %r1255, 23; | |
add.s32 %r1257, %r1467, 1; | |
shr.u32 %r1258, %r1257, 7; | |
add.s32 %r1259, %r1258, 1; | |
shr.u32 %r1260, %r1259, 1; | |
add.s32 %r1261, %r1260, %r1256; | |
or.b32 %r1262, %r1261, %r1465; | |
mov.b32 %f941, %r1262; | |
BB6_351: | |
mul.rn.f32 %f276, %f941, %f941; | |
and.b32 %r526, %r1469, 1; | |
setp.eq.s32 %p203, %r526, 0; | |
@%p203 bra BB6_353; | |
mov.f32 %f703, 0fBAB6061A; | |
mov.f32 %f704, 0f37CCF5CE; | |
fma.rn.f32 %f942, %f704, %f276, %f703; | |
bra.uni BB6_354; | |
BB6_353: | |
mov.f32 %f705, 0f3C08839E; | |
mov.f32 %f706, 0fB94CA1F9; | |
fma.rn.f32 %f942, %f706, %f276, %f705; | |
BB6_354: | |
@%p203 bra BB6_356; | |
mov.f32 %f707, 0f3D2AAAA5; | |
fma.rn.f32 %f708, %f942, %f276, %f707; | |
mov.f32 %f709, 0fBF000000; | |
fma.rn.f32 %f943, %f708, %f276, %f709; | |
bra.uni BB6_357; | |
BB6_356: | |
mov.f32 %f710, 0fBE2AAAA3; | |
fma.rn.f32 %f711, %f942, %f276, %f710; | |
mov.f32 %f712, 0f00000000; | |
fma.rn.f32 %f943, %f711, %f276, %f712; | |
BB6_357: | |
fma.rn.f32 %f944, %f943, %f941, %f941; | |
@%p203 bra BB6_359; | |
mov.f32 %f713, 0f3F800000; | |
fma.rn.f32 %f944, %f943, %f276, %f713; | |
BB6_359: | |
and.b32 %r1263, %r1469, 2; | |
setp.eq.s32 %p206, %r1263, 0; | |
@%p206 bra BB6_361; | |
mov.f32 %f714, 0f00000000; | |
mov.f32 %f715, 0fBF800000; | |
fma.rn.f32 %f944, %f944, %f715, %f714; | |
BB6_361: | |
mul.f32 %f946, %f159, 0f3F000000; | |
abs.f32 %f716, %f946; | |
setp.neu.f32 %p207, %f716, 0f7F800000; | |
@%p207 bra BB6_363; | |
mov.f32 %f717, 0f00000000; | |
mul.rn.f32 %f946, %f946, %f717; | |
BB6_363: | |
mul.f32 %f718, %f946, 0f3F22F983; | |
cvt.rni.s32.f32 %r1479, %f718; | |
cvt.rn.f32.s32 %f719, %r1479; | |
neg.f32 %f720, %f719; | |
fma.rn.f32 %f722, %f720, %f697, %f946; | |
fma.rn.f32 %f724, %f720, %f699, %f722; | |
fma.rn.f32 %f947, %f720, %f701, %f724; | |
abs.f32 %f726, %f946; | |
setp.leu.f32 %p208, %f726, 0f47CE4780; | |
@%p208 bra BB6_374; | |
mov.b32 %r528, %f946; | |
shr.u32 %r529, %r528, 23; | |
shl.b32 %r1266, %r528, 8; | |
or.b32 %r530, %r1266, -2147483648; | |
mov.u32 %r1471, 0; | |
mov.u64 %rd177, __cudart_i2opi_f; | |
mov.u32 %r1470, -6; | |
mov.u64 %rd178, %rd85; | |
BB6_365: | |
.pragma "nounroll"; | |
ld.const.u32 %r1269, [%rd177]; | |
// inline asm | |
{ | |
mad.lo.cc.u32 %r1267, %r1269, %r530, %r1471; | |
madc.hi.u32 %r1471, %r1269, %r530, 0; | |
} | |
// inline asm | |
st.local.u32 [%rd178], %r1267; | |
add.s64 %rd178, %rd178, 4; | |
add.s64 %rd177, %rd177, 4; | |
add.s32 %r1470, %r1470, 1; | |
setp.ne.s32 %p209, %r1470, 0; | |
@%p209 bra BB6_365; | |
and.b32 %r1272, %r529, 255; | |
add.s32 %r1273, %r1272, -128; | |
shr.u32 %r1274, %r1273, 5; | |
and.b32 %r535, %r528, -2147483648; | |
st.local.u32 [%rd86], %r1471; | |
mov.u32 %r1275, 6; | |
sub.s32 %r1276, %r1275, %r1274; | |
mul.wide.s32 %rd143, %r1276, 4; | |
add.s64 %rd96, %rd85, %rd143; | |
ld.local.u32 %r1472, [%rd96]; | |
ld.local.u32 %r1473, [%rd96+-4]; | |
and.b32 %r538, %r529, 31; | |
setp.eq.s32 %p210, %r538, 0; | |
@%p210 bra BB6_368; | |
mov.u32 %r1277, 32; | |
sub.s32 %r1278, %r1277, %r538; | |
shr.u32 %r1279, %r1473, %r1278; | |
shl.b32 %r1280, %r1472, %r538; | |
add.s32 %r1472, %r1279, %r1280; | |
ld.local.u32 %r1281, [%rd96+-8]; | |
shr.u32 %r1282, %r1281, %r1278; | |
shl.b32 %r1283, %r1473, %r538; | |
add.s32 %r1473, %r1282, %r1283; | |
BB6_368: | |
shr.u32 %r1284, %r1473, 30; | |
shl.b32 %r1285, %r1472, 2; | |
add.s32 %r1474, %r1284, %r1285; | |
shl.b32 %r544, %r1473, 2; | |
shr.u32 %r1286, %r1474, 31; | |
shr.u32 %r1287, %r1472, 30; | |
add.s32 %r545, %r1286, %r1287; | |
setp.eq.s32 %p211, %r1286, 0; | |
@%p211 bra BB6_369; | |
not.b32 %r1288, %r1474; | |
neg.s32 %r1476, %r544; | |
setp.eq.s32 %p212, %r544, 0; | |
selp.u32 %r1289, 1, 0, %p212; | |
add.s32 %r1474, %r1289, %r1288; | |
xor.b32 %r1475, %r535, -2147483648; | |
bra.uni BB6_371; | |
BB6_369: | |
mov.u32 %r1475, %r535; | |
mov.u32 %r1476, %r544; | |
BB6_371: | |
clz.b32 %r1478, %r1474; | |
setp.eq.s32 %p213, %r1478, 0; | |
shl.b32 %r1290, %r1474, %r1478; | |
mov.u32 %r1291, 32; | |
sub.s32 %r1292, %r1291, %r1478; | |
shr.u32 %r1293, %r1476, %r1292; | |
add.s32 %r1294, %r1293, %r1290; | |
selp.b32 %r553, %r1474, %r1294, %p213; | |
mov.u32 %r1295, -921707870; | |
mul.hi.u32 %r1477, %r553, %r1295; | |
setp.eq.s32 %p214, %r535, 0; | |
neg.s32 %r1296, %r545; | |
selp.b32 %r1479, %r545, %r1296, %p214; | |
setp.lt.s32 %p215, %r1477, 1; | |
@%p215 bra BB6_373; | |
mul.lo.s32 %r1297, %r553, -921707870; | |
shr.u32 %r1298, %r1297, 31; | |
shl.b32 %r1299, %r1477, 1; | |
add.s32 %r1477, %r1298, %r1299; | |
add.s32 %r1478, %r1478, 1; | |
BB6_373: | |
mov.u32 %r1300, 126; | |
sub.s32 %r1301, %r1300, %r1478; | |
shl.b32 %r1302, %r1301, 23; | |
add.s32 %r1303, %r1477, 1; | |
shr.u32 %r1304, %r1303, 7; | |
add.s32 %r1305, %r1304, 1; | |
shr.u32 %r1306, %r1305, 1; | |
add.s32 %r1307, %r1306, %r1302; | |
or.b32 %r1308, %r1307, %r1475; | |
mov.b32 %f947, %r1308; | |
BB6_374: | |
mul.rn.f32 %f294, %f947, %f947; | |
and.b32 %r561, %r1479, 1; | |
setp.eq.s32 %p216, %r561, 0; | |
@%p216 bra BB6_376; | |
mov.f32 %f727, 0fBAB6061A; | |
mov.f32 %f728, 0f37CCF5CE; | |
fma.rn.f32 %f948, %f728, %f294, %f727; | |
bra.uni BB6_377; | |
BB6_376: | |
mov.f32 %f729, 0f3C08839E; | |
mov.f32 %f730, 0fB94CA1F9; | |
fma.rn.f32 %f948, %f730, %f294, %f729; | |
BB6_377: | |
@%p216 bra BB6_379; | |
mov.f32 %f731, 0f3D2AAAA5; | |
fma.rn.f32 %f732, %f948, %f294, %f731; | |
mov.f32 %f733, 0fBF000000; | |
fma.rn.f32 %f949, %f732, %f294, %f733; | |
bra.uni BB6_380; | |
BB6_379: | |
mov.f32 %f734, 0fBE2AAAA3; | |
fma.rn.f32 %f735, %f948, %f294, %f734; | |
mov.f32 %f736, 0f00000000; | |
fma.rn.f32 %f949, %f735, %f294, %f736; | |
BB6_380: | |
fma.rn.f32 %f950, %f949, %f947, %f947; | |
@%p216 bra BB6_382; | |
mov.f32 %f737, 0f3F800000; | |
fma.rn.f32 %f950, %f949, %f294, %f737; | |
BB6_382: | |
and.b32 %r1309, %r1479, 2; | |
setp.eq.s32 %p219, %r1309, 0; | |
@%p219 bra BB6_384; | |
mov.f32 %f738, 0f00000000; | |
mov.f32 %f739, 0fBF800000; | |
fma.rn.f32 %f950, %f950, %f739, %f738; | |
BB6_384: | |
mul.f32 %f740, %f159, %f159; | |
mul.f32 %f741, %f740, 0f3F000000; | |
mul.f32 %f742, %f944, %f950; | |
div.rn.f32 %f952, %f742, %f741; | |
BB6_385: | |
add.f32 %f743, %f861, %f874; | |
add.f32 %f744, %f743, %f887; | |
add.f32 %f745, %f744, %f900; | |
div.rn.f32 %f746, %f861, %f745; | |
div.rn.f32 %f747, %f874, %f745; | |
div.rn.f32 %f748, %f887, %f745; | |
div.rn.f32 %f749, %f900, %f745; | |
add.f32 %f750, %f3, 0fBF800000; | |
add.f32 %f751, %f2, 0fBF800000; | |
tex.2d.v4.f32.f32 {%f752, %f753, %f754, %f755}, [%rd97, {%f751, %f750}]; | |
tex.2d.v4.f32.f32 {%f756, %f757, %f758, %f759}, [%rd97, {%f2, %f750}]; | |
add.f32 %f760, %f2, 0f3F800000; | |
tex.2d.v4.f32.f32 {%f761, %f762, %f763, %f764}, [%rd97, {%f760, %f750}]; | |
add.f32 %f765, %f2, 0f40000000; | |
tex.2d.v4.f32.f32 {%f766, %f767, %f768, %f769}, [%rd97, {%f765, %f750}]; | |
mul.f32 %f770, %f747, %f756; | |
fma.rn.f32 %f771, %f746, %f752, %f770; | |
fma.rn.f32 %f772, %f748, %f761, %f771; | |
fma.rn.f32 %f773, %f749, %f766, %f772; | |
tex.2d.v4.f32.f32 {%f774, %f775, %f776, %f777}, [%rd97, {%f751, %f3}]; | |
tex.2d.v4.f32.f32 {%f778, %f779, %f780, %f781}, [%rd97, {%f2, %f3}]; | |
tex.2d.v4.f32.f32 {%f782, %f783, %f784, %f785}, [%rd97, {%f760, %f3}]; | |
tex.2d.v4.f32.f32 {%f786, %f787, %f788, %f789}, [%rd97, {%f765, %f3}]; | |
mul.f32 %f790, %f747, %f778; | |
fma.rn.f32 %f791, %f746, %f774, %f790; | |
fma.rn.f32 %f792, %f748, %f782, %f791; | |
fma.rn.f32 %f793, %f749, %f786, %f792; | |
add.f32 %f794, %f3, 0f3F800000; | |
tex.2d.v4.f32.f32 {%f795, %f796, %f797, %f798}, [%rd97, {%f751, %f794}]; | |
tex.2d.v4.f32.f32 {%f799, %f800, %f801, %f802}, [%rd97, {%f2, %f794}]; | |
tex.2d.v4.f32.f32 {%f803, %f804, %f805, %f806}, [%rd97, {%f760, %f794}]; | |
tex.2d.v4.f32.f32 {%f807, %f808, %f809, %f810}, [%rd97, {%f765, %f794}]; | |
mul.f32 %f811, %f747, %f799; | |
fma.rn.f32 %f812, %f746, %f795, %f811; | |
fma.rn.f32 %f813, %f748, %f803, %f812; | |
fma.rn.f32 %f814, %f749, %f807, %f813; | |
add.f32 %f815, %f3, 0f40000000; | |
tex.2d.v4.f32.f32 {%f816, %f817, %f818, %f819}, [%rd97, {%f751, %f815}]; | |
tex.2d.v4.f32.f32 {%f820, %f821, %f822, %f823}, [%rd97, {%f2, %f815}]; | |
tex.2d.v4.f32.f32 {%f824, %f825, %f826, %f827}, [%rd97, {%f760, %f815}]; | |
tex.2d.v4.f32.f32 {%f828, %f829, %f830, %f831}, [%rd97, {%f765, %f815}]; | |
add.f32 %f832, %f913, %f926; | |
add.f32 %f833, %f832, %f939; | |
add.f32 %f834, %f833, %f952; | |
div.rn.f32 %f835, %f913, %f834; | |
div.rn.f32 %f836, %f926, %f834; | |
div.rn.f32 %f837, %f939, %f834; | |
div.rn.f32 %f838, %f952, %f834; | |
mul.f32 %f839, %f747, %f820; | |
fma.rn.f32 %f840, %f746, %f816, %f839; | |
fma.rn.f32 %f841, %f748, %f824, %f840; | |
fma.rn.f32 %f842, %f749, %f828, %f841; | |
mul.f32 %f843, %f836, %f793; | |
fma.rn.f32 %f844, %f835, %f773, %f843; | |
fma.rn.f32 %f845, %f837, %f814, %f844; | |
fma.rn.f32 %f846, %f838, %f842, %f845; | |
setp.gt.s32 %p220, %r567, 8; | |
selp.f32 %f847, 0f477FFF00, 0f437F0000, %p220; | |
mul.f32 %f848, %f847, %f846; | |
cvt.rzi.u32.f32 %r1310, %f848; | |
mad.lo.s32 %r1319, %r2, %r564, %r1; | |
cvta.to.global.u64 %rd144, %rd98; | |
cvt.s64.s32 %rd145, %r1319; | |
add.s64 %rd146, %rd144, %rd145; | |
st.global.u8 [%rd146], %r1310; | |
BB6_386: | |
ret; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment