Created
March 15, 2024 04:22
-
-
Save raphlinus/5aca9de53f9d6b24933cb24d8a60df63 to your computer and use it in GitHub Desktop.
apparent miscompilation of flatten.wgsl
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1 s_version 0x4004 4 0.01 2 | |
2 s_inst_prefetch 0x3 4 0.01 1 | |
3 s_getpc_b64 s[0:1] 4 0.03 5 | |
4 s_mov_b32 s0, s2 4 0.05 9 | |
5 s_load_dwordx4 s[4:7], s[0:1], null 4 0.01 1 | |
6 s_load_dwordx4 s[12:15], s[0:1], 0x20 4 0.01 1 | |
7 s_load_dwordx4 s[16:19], s[0:1], 0x40 4 0.01 1 | |
8 v_lshl_add_u32 v3, s8, 8, v0 4 0.03 5 | |
9 v_lshrrev_b32_e32 v0, 2, v3 4 0.01 1 | |
10 s_waitcnt lgkmcnt(0) 4 2.30 406 | |
11 s_buffer_load_dwordx2 s[2:3], s[4:7], 0x24 4 0.01 1 | |
12 s_buffer_load_dwordx2 s[4:5], s[4:7], 0x34 4 0.01 1 | |
13 s_waitcnt lgkmcnt(0) 4 2.61 463 | |
14 v_add_lshl_u32 v1, v0, s2, 2 4 0.01 1 | |
15 v_mul_lo_u32 v0, v0, 20 4 0.11 19 | |
16 tbuffer_load_format_x v2, v1, s[12:15], 0 format:[BUF_FMT_32_FLOAT] offen 4 0.01 1 | |
17 s_clause 0x1 4 0.01 1 | |
18 tbuffer_load_format_xyz v[4:6], v0, s[16:19], 0 format:[BUF_FMT_32_32_32_FLOAT] offen offset:8 4 0.01 1 | |
19 tbuffer_load_format_x v1, v0, s[16:19], 0 format:[BUF_FMT_32_FLOAT] offen 4 0.08 14 | |
20 v_lshlrev_b32_e32 v0, 3, v3 4 0.03 5 | |
21 v_and_b32_e32 v7, 24, v0 4 0.01 1 | |
22 s_waitcnt vmcnt(2) 4 2.78 491 | |
23 v_bfe_u32 v8, v2, 0, v7 4 0.01 1 | |
24 v_lshrrev_b32_e32 v2, v7, v2 4 0.02 4 | |
25 v_and_b32_e32 v9, 0x404040, v8 4 0.01 1 | |
26 v_and_b32_e32 v7, 0x101010, v8 4 0.01 1 | |
27 v_and_b32_e32 v11, 16, v2 4 0.01 2 | |
28 v_bcnt_u32_b32 v10, v9, 0 4 0.01 1 | |
29 s_waitcnt vmcnt(1) 4 0.02 3 | |
30 v_bcnt_u32_b32 v7, v7, v6 4 0.18 32 | |
31 v_cmp_ne_i32_e32 vcc_lo, 0, v11 4 0.01 1 | |
32 v_lshl_add_u32 v5, v10, 1, v5 4 0.01 2 | |
33 v_and_b32_e32 v10, 0x202020, v8 4 0.02 3 | |
34 v_add_lshl_u32 v5, v5, s5, 2 4 0.01 1 | |
35 s_waitcnt vmcnt(0) 4 0.01 2 | |
36 v_bcnt_u32_b32 v1, v10, v1 4 0.03 5 | |
37 v_add_nc_u32_e32 v9, -8, v5 4 0.09 15 | |
38 tbuffer_load_format_x v6, v9, s[12:15], 0 format:[BUF_FMT_32_FLOAT] offen 4 0.01 1 | |
39 s_waitcnt_depctr 0xffe3 4 0.06 11 | |
40 s_and_saveexec_b32 s5, vcc_lo 4 0.05 9 | |
41 s_cbranch_execz _L0 4 0.01 1 | |
42 BBF0_0: | |
43 s_load_dwordx4 s[8:11], s[0:1], 0x60 0 0.00 | |
44 v_mul_lo_u32 v10, v7, 24 0 0.00 | |
45 s_waitcnt vmcnt(0) 0 0.00 | |
46 v_and_b32_e32 v9, 2.0, v6 0 0.00 | |
47 v_add_nc_u32_e32 v12, -1, v1 0 0.00 | |
48 v_cmp_ne_i32_e32 vcc_lo, 0, v9 0 0.00 | |
49 v_cndmask_b32_e64 v11, 0, 1, vcc_lo 0 0.00 | |
50 s_waitcnt lgkmcnt(0) 0 0.00 | |
51 buffer_store_dwordx2 v[11:12], v10, s[8:11], 0 offen offset:16 glc 0 0.00 | |
52 _L0: | |
53 s_waitcnt_depctr 0xffe3 4 0.23 40 | |
54 s_mov_b32 exec_lo, s5 4 0.05 9 | |
55 v_and_b32_e32 v9, 3, v2 4 0.03 5 | |
56 v_cmp_ne_i32_e32 vcc_lo, 0, v9 4 0.25 45 | |
57 s_and_b32 exec_lo, s5, vcc_lo 4 0.05 9 | |
58 s_cbranch_execz _L1 4 0.18 32 | |
59 BBF0_1: | |
60 v_lshrrev_b32_e32 v10, 2, v8 4 0.01 1 | |
61 v_lshrrev_b32_e32 v11, 3, v8 4 0.01 1 | |
62 v_and_b32_e32 v8, 0x30303, v8 4 0.01 1 | |
63 v_mul_lo_u32 v1, v1, 6 4 0.02 4 | |
64 v_and_b32_e32 v10, 0x10101, v10 4 0.01 1 | |
65 v_and_b32_e32 v11, 0x10101, v11 4 0.02 3 | |
66 v_add_nc_u32_e32 v1, -6, v1 4 0.01 1 | |
67 v_add_nc_u32_e32 v8, v8, v10 4 0.01 1 | |
68 v_mul_u32_u24_e32 v10, 15, v11 4 0.02 3 | |
69 v_add_lshl_u32 v1, s4, v1, 2 4 0.01 1 | |
70 s_waitcnt_depctr 0xffe3 4 1.43 253 | |
71 s_clause 0x1 4 0.01 1 | |
72 tbuffer_load_format_xyzw v[11:14], v1, s[12:15], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen 4 0.01 1 | |
73 tbuffer_load_format_xy v[18:19], v1, s[12:15], 0 format:[BUF_FMT_32_32_FLOAT] offen offset:16 4 0.07 11 | |
74 v_and_b32_e32 v10, v8, v10 4 0.03 5 | |
75 v_add_nc_u32_e32 v8, v8, v10 4 0.03 5 | |
76 v_lshrrev_b32_e32 v10, 8, v8 4 0.03 5 | |
77 v_add_nc_u32_e32 v8, v8, v10 4 0.01 1 | |
78 v_and_b32_e32 v10, 8, v2 4 0.02 3 | |
79 v_add_nc_u32_sdwa v8, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4 0.01 1 | |
80 v_cmp_eq_i32_e32 vcc_lo, 0, v10 4 0.02 3 | |
81 v_and_b32_e32 v1, 63, v8 4 0.12 22 | |
82 v_add3_u32 v1, v4, v1, s3 4 0.04 7 | |
83 s_and_saveexec_b32 s4, vcc_lo 4 0.05 9 | |
84 s_cbranch_execz _L2 4 0.13 22 | |
85 BBF0_2: | |
86 v_lshlrev_b32_e32 v1, 2, v1 0 0.00 | |
87 tbuffer_load_format_xy v[15:16], v1, s[12:15], 0 format:[BUF_FMT_32_32_FLOAT] offen 0 0.00 | |
88 v_cmp_gt_u32_e32 vcc_lo, 2, v9 0 0.00 | |
89 s_waitcnt vmcnt(0) 0 0.00 | |
90 v_cvt_f32_i32_sdwa v26, sext(v15) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 0 0.00 | |
91 v_cvt_f32_i32_sdwa v27, sext(v15) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 0 0.00 | |
92 v_cvt_f32_i32_sdwa v28, sext(v16) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 0 0.00 | |
93 v_cvt_f32_i32_sdwa v29, sext(v16) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 0 0.00 | |
94 s_and_saveexec_b32 s6, vcc_lo 0 0.00 | |
95 v_mov_b32_e32 v25, 0 0 0.00 | |
96 v_mov_b32_e32 v111, 0 0 0.00 | |
97 v_mov_b32_e32 v110, 0 0 0.00 | |
98 v_mov_b32_e32 v109, 0 0 0.00 | |
99 s_andn2_b32 exec_lo, s6, exec_lo 0 0.00 | |
100 s_cbranch_execz _L3 0 0.00 | |
101 BBF0_3: | |
102 tbuffer_load_format_x v16, v1, s[12:15], 0 format:[BUF_FMT_32_FLOAT] offen offset:8 0 0.00 | |
103 v_cmp_ne_i32_e32 vcc_lo, 3, v9 0 0.00 | |
104 s_waitcnt vmcnt(0) 0 0.00 | |
105 v_cvt_f32_i32_sdwa v109, sext(v16) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 0 0.00 | |
106 v_cvt_f32_i32_sdwa v110, sext(v16) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 0 0.00 | |
107 s_and_saveexec_b32 s7, vcc_lo 0 0.00 | |
108 v_mov_b32_e32 v25, 0 0 0.00 | |
109 v_mov_b32_e32 v111, 0 0 0.00 | |
110 s_andn2_b32 exec_lo, s7, exec_lo 0 0.00 | |
111 s_cbranch_execz _L3 0 0.00 | |
112 BBF0_4: | |
113 tbuffer_load_format_x v1, v1, s[12:15], 0 format:[BUF_FMT_32_FLOAT] offen offset:12 0 0.00 | |
114 s_waitcnt vmcnt(0) 0 0.00 | |
115 v_cvt_f32_i32_sdwa v111, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 0 0.00 | |
116 v_cvt_f32_i32_sdwa v25, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 0 0.00 | |
117 _L3: | |
118 s_mov_b32 exec_lo, s6 0 0.00 | |
119 _L2: | |
120 s_andn2_b32 exec_lo, s4, exec_lo 4 0.05 9 | |
121 s_cbranch_execz _L4 4 0.02 3 | |
122 BBF0_5: | |
123 v_lshlrev_b32_e32 v1, 2, v1 4 0.01 1 | |
124 s_waitcnt_depctr 0xffe3 4 1.94 343 | |
125 tbuffer_load_format_xyzw v[26:29], v1, s[12:15], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen 4 0.06 10 | |
126 v_cmp_gt_u32_e32 vcc_lo, 2, v9 4 0.09 16 | |
127 s_and_saveexec_b32 s6, vcc_lo 4 0.05 9 | |
128 v_mov_b32_e32 v25, 0 4 0.01 1 | |
129 v_mov_b32_e32 v111, 0 4 0.01 1 | |
130 v_mov_b32_e32 v110, 0 4 0.01 1 | |
131 v_mov_b32_e32 v109, 0 4 0.01 1 | |
132 s_andn2_b32 exec_lo, s6, exec_lo 4 0.05 9 | |
133 s_cbranch_execz _L4 4 0.03 5 | |
134 BBF0_6: | |
135 tbuffer_load_format_xy v[109:110], v1, s[12:15], 0 format:[BUF_FMT_32_32_FLOAT] offen offset:16 4 0.06 10 | |
136 v_cmp_ne_i32_e32 vcc_lo, 3, v9 4 0.09 16 | |
137 s_and_saveexec_b32 s7, vcc_lo 4 0.05 9 | |
138 v_mov_b32_e32 v25, 0 4 0.01 1 | |
139 v_mov_b32_e32 v111, 0 4 0.01 1 | |
140 s_andn2_b32 exec_lo, s7, exec_lo 4 0.05 9 | |
141 s_cbranch_execz _L4 4 0.06 10 | |
142 BBF0_7: | |
143 tbuffer_load_format_xy v[24:25], v1, s[12:15], 0 format:[BUF_FMT_32_32_FLOAT] offen offset:24 0 0.00 | |
144 s_waitcnt vmcnt(0) 0 0.00 | |
145 v_mov_b32_e32 v111, v24 0 0.00 | |
146 _L4: | |
147 s_mov_b32 exec_lo, s4 4 0.05 9 | |
148 v_and_b32_e32 v2, 4, v2 4 0.01 1 | |
149 s_waitcnt vmcnt(3) 4 0.01 2 | |
150 v_cmp_lt_i32_e64 s4, v6, 0 4 0.01 1 | |
151 v_cmp_eq_i32_e64 s8, v9, 2 4 0.01 2 | |
152 v_cmp_ne_i32_e64 s6, v2, 0 4 0.09 16 | |
153 s_and_b32 s7, s4, s6 4 0.01 2 | |
154 s_nand_b32 s9, s7, s8 4 0.01 1 | |
155 s_and_b32 vcc_lo, s7, s8 4 0.01 1 | |
156 s_waitcnt vmcnt(0) 4 2.64 467 | |
157 v_cndmask_b32_e32 v23, v26, v28, vcc_lo 4 0.01 1 | |
158 v_cndmask_b32_e32 v22, v27, v29, vcc_lo 4 0.01 1 | |
159 v_cndmask_b32_e32 v16, v28, v109, vcc_lo 4 0.01 1 | |
160 v_cndmask_b32_e32 v15, v29, v110, vcc_lo 4 0.01 1 | |
161 v_cndmask_b32_e64 v2, 1, v9, s9 4 0.03 5 | |
162 v_cmp_ne_i32_e32 vcc_lo, 1, v2 4 0.10 17 | |
163 s_and_saveexec_b32 s7, vcc_lo 4 0.05 9 | |
164 s_cbranch_execz _L5 4 0.02 3 | |
165 BBF0_8: | |
166 v_cmp_eq_i32_e32 vcc_lo, 2, v2 4 0.09 16 | |
167 s_and_saveexec_b32 s8, vcc_lo 4 0.05 9 | |
168 s_cbranch_execz _L6 4 0.06 10 | |
169 BBF0_9: | |
170 v_sub_f32_e32 v1, v109, v16 4 0.01 1 | |
171 v_sub_f32_e32 v2, v110, v15 4 0.01 1 | |
172 v_sub_f32_e32 v17, v23, v16 4 0.01 1 | |
173 v_sub_f32_e32 v4, v22, v15 4 0.01 1 | |
174 v_mov_b32_e32 v25, v110 4 0.02 3 | |
175 v_madmk_f32 v1, v1, 0x3eaaaaab, v16 4 0.01 1 | |
176 v_madmk_f32 v2, v2, 0x3eaaaaab, v15 4 0.01 1 | |
177 v_mov_b32_e32 v111, v109 4 0.05 9 | |
178 v_madmk_f32 v16, v17, 0x3eaaaaab, v16 4 0.01 1 | |
179 v_madmk_f32 v108, v4, 0x3eaaaaab, v15 4 0.01 1 | |
180 v_mov_b32_e32 v109, v1 4 0.01 1 | |
181 v_mov_b32_e32 v110, v2 4 0.01 1 | |
182 _L6: | |
183 s_andn2_b32 exec_lo, s8, exec_lo 4 0.05 9 | |
184 v_mov_b32_e32 v108, v15 4 0.01 1 | |
185 s_mov_b32 exec_lo, s8 4 0.08 14 | |
186 v_mov_b32_e32 v4, v16 4 0.02 3 | |
187 v_mov_b32_e32 v15, v25 4 0.01 1 | |
188 _L5: | |
189 s_andn2_b32 exec_lo, s7, exec_lo 4 0.05 9 | |
190 s_cbranch_execz _L7 4 0.03 4 | |
191 BBF0_10: | |
192 v_sub_f32_e32 v1, v23, v16 4 0.01 1 | |
193 v_sub_f32_e32 v2, v22, v15 4 0.01 1 | |
194 v_sub_f32_e32 v4, v16, v23 4 0.01 1 | |
195 v_sub_f32_e32 v8, v15, v22 4 0.01 1 | |
196 v_mov_b32_e32 v111, v16 4 0.01 1 | |
197 v_madmk_f32 v109, v1, 0x3eaaaaab, v16 4 0.01 1 | |
198 v_madmk_f32 v110, v2, 0x3eaaaaab, v15 4 0.01 1 | |
199 v_madmk_f32 v4, v4, 0x3eaaaaab, v23 4 0.01 1 | |
200 v_madmk_f32 v108, v8, 0x3eaaaaab, v22 4 0.01 1 | |
201 _L7: | |
202 s_andn2_b32 exec_lo, s7, s4 4 0.05 9 | |
203 s_cbranch_execz _L8 4 0.02 3 | |
204 BBF0_11: | |
205 v_mul_f32_e32 v1, v14, v108 4 0.01 1 | |
206 v_mul_f32_e32 v6, v13, v108 4 0.01 1 | |
207 v_mul_f32_e32 v0, v13, v22 4 0.01 1 | |
208 v_mul_f32_e32 v5, v14, v22 4 0.01 1 | |
209 v_mul_f32_e32 v10, v13, v110 4 0.01 1 | |
210 v_mac_f32_e32 v1, v12, v4 4 0.01 1 | |
211 v_mac_f32_e32 v6, v11, v4 4 0.01 1 | |
212 v_mul_f32_e32 v4, v13, v15 4 0.01 1 | |
213 v_mac_f32_e32 v0, v11, v23 4 0.01 1 | |
214 v_mac_f32_e32 v5, v12, v23 4 0.01 1 | |
215 v_add_f32_e32 v27, v19, v1 4 1.83 323 | |
216 v_mul_f32_e32 v1, v14, v110 4 0.01 1 | |
217 v_mac_f32_e32 v10, v11, v109 4 0.01 1 | |
218 v_mac_f32_e32 v4, v11, v111 4 0.01 1 | |
219 v_add_f32_e32 v0, v18, v0 4 0.01 1 | |
220 v_add_f32_e32 v5, v19, v5 4 0.01 1 | |
221 v_mac_f32_e32 v1, v12, v109 4 0.01 1 | |
222 v_add_f32_e32 v23, v18, v6 4 0.01 1 | |
223 v_add_f32_e32 v6, v18, v10 4 0.01 1 | |
224 v_mul_f32_e32 v9, v14, v15 4 0.01 1 | |
225 v_cmp_eq_f32_sdwa s8, v5, v27 src0_sel:DWORD src1_sel:DWORD 4 0.01 1 | |
226 v_add_f32_e32 v8, v19, v1 4 0.01 1 | |
227 v_add_f32_e32 v1, v18, v4 4 0.01 1 | |
228 v_cmp_eq_f32_sdwa s4, v0, v23 src0_sel:DWORD src1_sel:DWORD 4 0.01 1 | |
229 v_cmp_eq_f32_sdwa s9, v0, v6 src0_sel:DWORD src1_sel:DWORD 4 0.01 1 | |
230 v_mac_f32_e32 v9, v12, v111 4 0.01 1 | |
231 v_cmp_eq_f32_e32 vcc_lo, v5, v8 4 0.01 1 | |
232 v_cmp_eq_f32_sdwa s10, v0, v1 src0_sel:DWORD src1_sel:DWORD 4 0.02 2 | |
233 v_add_f32_e32 v2, v19, v9 4 0.08 14 | |
234 s_and_b32 s4, s4, s8 4 0.01 1 | |
235 s_and_b32 vcc_lo, s9, vcc_lo 4 0.01 1 | |
236 s_and_b32 s4, s4, vcc_lo 4 0.06 10 | |
237 v_cmp_eq_f32_e32 vcc_lo, v5, v2 4 0.10 17 | |
238 s_and_b32 vcc_lo, s10, vcc_lo 4 0.01 2 | |
239 s_and_b32 vcc_lo, s4, vcc_lo 4 0.01 2 | |
240 s_and_saveexec_b32 s4, vcc_lo 4 0.06 10 | |
241 v_mov_b32_e32 v106, 0xf2fc6f7c 4 0.01 1 | |
242 v_mov_b32_e32 v107, 0xf2fc6f7c 4 0.01 1 | |
243 v_mov_b32_e32 v42, 0x72fc6f7c 4 0.01 1 | |
244 v_mov_b32_e32 v48, 0x72fc6f7c 4 0.01 1 | |
245 s_andn2_b32 exec_lo, s4, exec_lo 4 0.05 9 | |
246 s_cbranch_execz _L9 4 0.02 3 | |
247 BBF0_12: | |
248 v_sub_f32_e32 v9, v23, v0 4 0.01 1 | |
249 v_sub_f32_e32 v11, v27, v5 4 0.02 4 | |
250 v_mul_f32_e32 v4, v9, v9 4 0.03 5 | |
251 v_mac_f32_e32 v4, v11, v11 4 0.03 5 | |
252 v_cmp_gt_f32_e32 vcc_lo, 0x2b8cbccc, v4 4 0.09 16 | |
253 s_and_saveexec_b32 s8, vcc_lo 4 0.05 9 | |
254 s_cbranch_execz _L10 4 0.06 10 | |
255 BBF0_13: | |
256 v_sub_f32_e32 v4, v6, v23 0 0.00 | |
257 v_sub_f32_e32 v10, v8, v27 0 0.00 | |
258 v_sub_f32_e32 v13, v2, v8 0 0.00 | |
259 v_mul_f32_e32 v14, 0x360637b4, v4 0 0.00 | |
260 v_mul_f32_e32 v4, 0x360637b4, v10 0 0.00 | |
261 v_sub_f32_e32 v10, v1, v6 0 0.00 | |
262 v_madmk_f32 v12, v9, 0x3f7fffde, v14 0 0.00 | |
263 v_madmk_f32 v4, v11, 0x3f7fffde, v4 0 0.00 | |
264 v_madmk_f32 v26, v10, 0x2b8cbccc, v12 0 0.00 | |
265 v_madmk_f32 v25, v13, 0x2b8cbccc, v4 0 0.00 | |
266 _L10: | |
267 s_andn2_b32 exec_lo, s8, exec_lo 4 1.51 267 | |
268 v_mov_b32_e32 v26, v9 4 0.01 1 | |
269 v_mov_b32_e32 v25, v11 4 0.01 2 | |
270 s_mov_b32 exec_lo, s8 4 0.06 10 | |
271 v_sub_f32_e32 v12, v6, v23 4 0.01 1 | |
272 v_sub_f32_e32 v14, v8, v27 4 0.01 1 | |
273 v_sub_f32_e32 v13, v1, v6 4 0.01 1 | |
274 v_sub_f32_e32 v17, v2, v8 4 0.01 1 | |
275 v_add_nc_u32_e32 v64, 0x2000, v3 4 0.03 5 | |
276 v_add_nc_u32_e32 v78, 0x1000, v3 4 0.01 1 | |
277 s_load_dwordx4 s[8:11], s[0:1], 0x80 4 0.01 1 | |
278 s_load_dwordx4 s[20:23], s[0:1], 0xc0 4 0.01 1 | |
279 s_load_dwordx4 s[24:27], s[0:1], 0xa0 4 0.01 1 | |
280 s_mov_b32 s28, exec_lo 4 0.01 1 | |
281 s_mov_b32 s29, exec_lo 4 0.01 1 | |
282 v_mov_b32_e32 v82, v5 4 0.01 1 | |
283 v_mov_b32_e32 v81, v0 4 0.01 1 | |
284 v_mov_b32_e32 v19, 0 4 0.01 1 | |
285 v_mov_b32_e32 v106, 0xf2fc6f7c 4 0.01 1 | |
286 v_mov_b32_e32 v107, 0xf2fc6f7c 4 0.01 1 | |
287 v_mov_b32_e32 v21, 0x72fc6f7c 4 0.01 1 | |
288 v_mov_b32_e32 v22, 0x72fc6f7c 4 0.01 1 | |
289 v_mov_b32_e32 v29, v5 4 0.01 1 | |
290 v_mov_b32_e32 v24, v0 4 0.31 55 | |
291 v_mov_b32_e32 v80, 1.0 4 0.01 1 | |
292 v_mov_b32_e32 v72, 0 4 0.03 5 | |
293 _L30: | |
294 v_cvt_f32_u32_e32 v34, v72 8 0.06 5 | |
295 v_mul_f32_e32 v34, v80, v34 8 0.06 5 | |
296 v_readfirstlane_b32 s30, v34 8 0.06 5 | |
297 v_cmp_eq_f32_e64 vcc_lo, s30, 1.0 8 0.19 16 | |
298 s_andn1_saveexec_b32 s31, vcc_lo 8 0.02 2 | |
299 s_andn2_b32 exec_lo, s31, exec_lo 8 0.03 2 | |
300 s_andn2_b32 s29, s29, exec_lo 8 0.12 10 | |
301 s_cbranch_scc0 _L11 8 0.14 12 | |
302 BBF0_14: | |
303 s_mov_b32 exec_lo, s31 4 0.01 1 | |
304 s_and_b32 exec_lo, exec_lo, s29 4 0.01 1 | |
305 s_mov_b32 s31, exec_lo 4 0.01 1 | |
306 s_mov_b32 s32, exec_lo 4 0.07 11 | |
307 v_mul_f32_e32 v34, v26, v26 4 0.04 6 | |
308 v_mac_f32_e32 v34, v25, v25 4 0.01 2 | |
309 _L22: | |
310 v_add_f32_e32 v35, s30, v80 5 0.04 5 | |
311 v_sub_f32_e32 v36, 1.0, v35 5 0.02 2 | |
312 v_mul_f32_e32 v40, v35, v35 5 0.02 3 | |
313 v_mul_f32_e32 v37, v35, v36 5 0.01 1 | |
314 v_mul_f32_e32 v38, v36, v36 5 0.03 4 | |
315 v_mul_f32_e32 v39, 0x40400000, v37 5 0.01 1 | |
316 v_mul_f32_e64 v43, v37, v12 mul:2 5 0.01 1 | |
317 v_mul_f32_e32 v42, 0x40400000, v38 5 0.01 1 | |
318 v_mul_f32_e64 v48, v37, v14 mul:2 5 0.01 2 | |
319 v_mul_f32_e32 v41, v6, v39 5 0.01 1 | |
320 v_mac_f32_e32 v43, v9, v38 5 1.30 183 | |
321 v_mul_f32_e32 v44, v8, v39 5 0.01 2 | |
322 v_mac_f32_e32 v48, v11, v38 5 0.01 1 | |
323 v_mul_f32_e32 v38, v36, v38 5 0.01 2 | |
324 v_mac_f32_e32 v41, v23, v42 5 0.01 1 | |
325 v_mad_f32 v75, v13, v40, v43 5 0.01 2 | |
326 v_mac_f32_e32 v44, v27, v42 5 0.01 1 | |
327 v_mad_f32 v74, v17, v40, v48 5 0.01 1 | |
328 v_mac_f32_e32 v41, v1, v40 5 0.01 1 | |
329 v_mul_f32_e32 v37, v75, v75 5 0.01 1 | |
330 v_mac_f32_e32 v44, v2, v40 5 0.02 2 | |
331 v_mul_f32_e32 v39, v35, v41 5 0.01 1 | |
332 v_mac_f32_e32 v37, v74, v74 5 0.01 1 | |
333 v_mul_f32_e32 v36, v35, v44 5 0.02 2 | |
334 v_mad_f32 v76, v0, v38, v39 5 0.01 1 | |
335 v_cmp_gt_f32_e32 vcc_lo, 0x2b8cbccc, v37 5 0.01 1 | |
336 v_mad_f32 v73, v5, v38, v36 5 0.10 14 | |
337 s_and_saveexec_b32 s33, vcc_lo 5 0.06 9 | |
338 s_cbranch_execz _L12 5 0.53 75 | |
339 BBF0_15: | |
340 v_add_f32_e32 v40, 0xb58637bd, v35 0 0.00 | |
341 v_cmp_gt_f32_e32 vcc_lo, 1.0, v35 0 0.00 | |
342 v_sub_f32_e32 v37, 1.0, v40 0 0.00 | |
343 v_mul_f32_e32 v47, v40, v40 0 0.00 | |
344 v_mul_f32_e32 v41, v40, v37 0 0.00 | |
345 v_mul_f32_e32 v38, v37, v37 0 0.00 | |
346 v_mul_f32_e64 v43, v41, v12 mul:2 0 0.00 | |
347 v_mul_f32_e64 v44, v41, v14 mul:2 0 0.00 | |
348 v_mac_f32_e32 v43, v9, v38 0 0.00 | |
349 v_mac_f32_e32 v44, v11, v38 0 0.00 | |
350 v_mad_f32 v75, v13, v47, v43 0 0.00 | |
351 v_mad_f32 v74, v17, v47, v44 0 0.00 | |
352 s_and_saveexec_b32 s34, vcc_lo 0 0.00 | |
353 s_cbranch_execz _L13 0 0.00 | |
354 BBF0_16: | |
355 v_mul_f32_e32 v35, 0x40400000, v41 0 0.00 | |
356 v_mul_f32_e32 v36, 0x40400000, v38 0 0.00 | |
357 v_mul_f32_e32 v41, v6, v35 0 0.00 | |
358 v_mul_f32_e32 v42, v8, v35 0 0.00 | |
359 v_mac_f32_e32 v41, v23, v36 0 0.00 | |
360 v_mac_f32_e32 v42, v27, v36 0 0.00 | |
361 v_mac_f32_e32 v41, v1, v47 0 0.00 | |
362 v_mac_f32_e32 v42, v2, v47 0 0.00 | |
363 v_mul_f32_e32 v38, v37, v38 0 0.00 | |
364 v_mul_f32_e32 v35, v40, v41 0 0.00 | |
365 v_mul_f32_e32 v36, v40, v42 0 0.00 | |
366 v_mad_f32 v76, v0, v38, v35 0 0.00 | |
367 v_mad_f32 v73, v5, v38, v36 0 0.00 | |
368 _L13: | |
369 s_andn2_b32 exec_lo, s34, exec_lo 0 0.00 | |
370 v_mov_b32_e32 v40, v35 0 0.00 | |
371 s_mov_b32 exec_lo, s34 0 0.00 | |
372 _L12: | |
373 s_andn2_b32 exec_lo, s33, exec_lo 5 0.06 9 | |
374 v_mov_b32_e32 v40, v35 5 1.45 205 | |
375 s_mov_b32 exec_lo, s33 5 0.01 2 | |
376 s_ff1_i32_b32 s34, exec_lo 5 0.01 1 | |
377 s_mov_b32 s33, exec_lo 5 0.01 1 | |
378 s_lshl_b32 s35, 1, s34 5 0.02 2 | |
379 s_and_b32 s35, s35, exec_lo 5 0.02 2 | |
380 s_and_saveexec_b32 s35, s35 5 0.07 9 | |
381 s_cbranch_execz _L14 5 0.02 3 | |
382 BBF0_17: | |
383 s_bcnt1_i32_b32 s36, s33 5 0.01 2 | |
384 v_mov_b32_e32 v39, s36 5 0.01 1 | |
385 s_waitcnt lgkmcnt(0) 5 0.01 2 | |
386 s_waitcnt_depctr 0xffe3 5 0.10 14 | |
387 buffer_atomic_add v39, off, s[8:11], 0 offset:32 glc 5 0.01 1 | |
388 _L14: | |
389 s_waitcnt_depctr 0xffe3 5 0.08 11 | |
390 s_mov_b32 exec_lo, s35 5 0.01 1 | |
391 s_waitcnt vmcnt(0) 5 2.84 402 | |
392 v_readlane_b32 s34, v39, s34 5 0.01 1 | |
393 v_mbcnt_lo_u32_b32 v39, s33, 0 5 0.01 1 | |
394 v_mov_b32_e32 v46, v3 5 0.02 2 | |
395 v_mov_b32_e32 v47, s30 5 0.01 1 | |
396 v_mov_b32_e32 v48, v40 5 0.01 1 | |
397 v_sub_f32_e32 v41, v73, v29 5 0.01 1 | |
398 v_sub_f32_e32 v43, v76, v24 5 0.01 1 | |
399 v_sub_f32_e32 v44, v40, v19 5 0.01 1 | |
400 v_mul_f32_e32 v45, v75, v75 5 0.02 3 | |
401 v_mul_f32_e32 v42, v41, v41 5 0.02 3 | |
402 v_mac_f32_e32 v45, v74, v74 5 0.01 2 | |
403 v_mad_f32 v18, v43, v43, v42 5 0.01 1 | |
404 v_add_nc_i32 v39, s34, v39 5 0.04 6 | |
405 v_mul_lo_u32 v39, v39, 12 5 0.03 4 | |
406 s_waitcnt lgkmcnt(0) 5 0.01 2 | |
407 s_waitcnt_depctr 0xffe3 5 0.12 17 | |
408 buffer_store_dwordx3 v[46:48], v39, s[20:23], 0 offen glc 5 0.09 13 | |
409 v_mul_f32_e32 v48, v44, v44 5 0.01 1 | |
410 v_sqrt_f32_e32 v46, v18 5 0.03 4 | |
411 v_mul_f32_e32 v39, v34, v48 5 0.01 1 | |
412 v_mul_f32_e32 v47, v48, v45 5 0.03 4 | |
413 v_cmp_lt_f32_e64 s33, v39, 0x2b8cbccc 5 0.01 1 | |
414 v_cmp_lt_f32_e64 s34, v47, 0x2b8cbccc 5 0.01 1 | |
415 v_cmp_ge_f32_e64 s35, v46, 0x358637bd 5 0.12 16 | |
416 s_and_b32 vcc_lo, s33, s34 5 0.01 2 | |
417 s_or_b32 vcc_lo, s35, vcc_lo 5 0.01 2 | |
418 s_and_saveexec_b32 s34, vcc_lo 5 0.06 9 | |
419 s_cbranch_execz _L15 5 0.02 3 | |
420 BBF0_18: | |
421 v_cmp_lt_f32_e64 s33, v18, 0x358637bd 5 0.11 16 | |
422 s_andn1_saveexec_b32 s35, s33 5 0.06 9 | |
423 s_cbranch_execz _L16 5 0.02 3 | |
424 BBF0_19: | |
425 v_mul_f32_e32 v28, v25, v41 5 0.01 1 | |
426 v_mul_f32_e32 v30, v26, v41 5 0.01 1 | |
427 v_max_f32_e32 v39, 0x358637bd, v18 5 0.02 2 | |
428 v_mac_f32_e32 v28, v26, v43 5 0.01 1 | |
429 v_mad_f32 v30, v25, v43, -v30 5 0.01 1 | |
430 v_rcp_f32_e32 v39, v39 5 0.03 3 | |
431 v_mul_f32_e32 v31, v28, v28 5 0.04 5 | |
432 v_mac_f32_e32 v31, v30, v30 5 0.02 2 | |
433 v_mul_f32_e32 v42, v44, v39 5 0.02 2 | |
434 v_sqrt_f32_e32 v31, v31 5 0.07 10 | |
435 v_cmp_nlt_f32_e32 vcc_lo, 0x358637bd, v31 5 0.12 16 | |
436 s_and_saveexec_b32 s36, vcc_lo 5 0.06 9 | |
437 v_mov_b32_e32 v71, 0x3eaaaaab 5 0.01 1 | |
438 v_mov_b32_e32 v35, 0 5 0.01 1 | |
439 s_andn2_b32 exec_lo, s36, exec_lo 5 0.06 9 | |
440 s_cbranch_execz _L17 5 0.02 3 | |
441 BBF0_20: | |
442 v_max_f32_e64 v39, |v28|, |v30| 5 0.01 1 | |
443 v_min_f32_e64 v44, |v28|, |v30| 5 0.04 5 | |
444 s_mov_b32 s37, 0x3caaae5f 5 0.01 1 | |
445 v_min_f32_e32 v48, v28, v30 5 0.04 6 | |
446 v_cmp_gt_f32_e64 vcc_lo, |v30|, |v28| 5 0.01 1 | |
447 v_rcp_f32_e32 v39, v39 5 0.02 3 | |
448 v_mul_f32_e32 v71, v42, v31 5 0.06 8 | |
449 v_mul_f32_e32 v45, v44, v39 5 0.04 5 | |
450 v_mul_f32_e32 v39, v45, v45 5 0.04 5 | |
451 v_madak_f32 v49, s37, v39, 0xbdae5a36 5 1.25 177 | |
452 v_cmp_gt_f32_e64 s37, -v48, v48 5 0.18 25 | |
453 v_madak_f32 v49, v39, v49, 0x3e3876e2 5 0.04 5 | |
454 v_madak_f32 v49, v39, v49, 0xbea91d04 5 0.21 29 | |
455 v_madak_f32 v44, v39, v49, 0x3f7ff738 5 0.21 30 | |
456 v_mul_f32_e32 v39, v45, v44 5 0.21 29 | |
457 v_madak_f32 v39, -2.0, v39, 0x3fc90fdb 5 0.04 5 | |
458 v_cndmask_b32_e32 v50, 0, v39, vcc_lo 5 0.01 1 | |
459 v_max_f32_e32 v39, v28, v30 5 0.01 1 | |
460 v_cmp_gt_f32_e64 vcc_lo, -v28, v28 5 0.01 1 | |
461 v_cndmask_b32_e64 v28, 0, 0xc0490fdb, vcc_lo 5 0.01 1 | |
462 v_mac_f32_e32 v50, v45, v44 5 0.01 2 | |
463 v_cmp_ge_f32_e64 vcc_lo, v39, -v39 5 0.02 3 | |
464 v_add_f32_e32 v28, v50, v28 5 0.10 14 | |
465 s_and_b32 vcc_lo, s37, vcc_lo 5 0.01 1 | |
466 v_cndmask_b32_e64 v30, 0, 0x80000000, vcc_lo 5 0.04 5 | |
467 v_xor_b32_e32 v35, v28, v30 5 0.02 3 | |
468 _L17: | |
469 s_mov_b32 exec_lo, s36 5 0.06 9 | |
470 v_mul_f32_e32 v44, v74, v41 5 0.01 1 | |
471 v_mul_f32_e32 v48, v74, v43 5 0.03 4 | |
472 v_mac_f32_e32 v44, v75, v43 5 0.24 34 | |
473 v_mad_f32 v48, v75, v41, -v48 5 0.01 1 | |
474 v_mul_f32_e32 v31, v44, v44 5 0.04 5 | |
475 v_mac_f32_e32 v31, v48, v48 5 0.04 6 | |
476 v_sqrt_f32_e32 v18, v31 5 0.07 10 | |
477 v_cmp_nlt_f32_e32 vcc_lo, 0x358637bd, v18 5 0.12 16 | |
478 s_and_b32 exec_lo, s36, vcc_lo 5 0.06 9 | |
479 v_mov_b32_e32 v70, 0x3eaaaaab 5 0.01 1 | |
480 v_mov_b32_e32 v77, 0 5 0.01 1 | |
481 s_andn2_b32 exec_lo, s36, exec_lo 5 0.06 9 | |
482 s_cbranch_execz _L18 5 0.03 4 | |
483 BBF0_21: | |
484 v_max_f32_e64 v39, |v44|, |v48| 5 0.04 5 | |
485 s_mov_b32 s37, 0x3caaae5f 5 0.05 7 | |
486 v_cmp_gt_f32_e64 vcc_lo, |v48|, |v44| 5 0.01 1 | |
487 v_mul_f32_e32 v70, v42, v18 5 0.01 1 | |
488 v_rcp_f32_e32 v45, v39 5 0.02 3 | |
489 v_min_f32_e64 v39, |v44|, |v48| 5 0.06 8 | |
490 v_mul_f32_e32 v45, v39, v45 5 0.04 5 | |
491 v_mul_f32_e32 v39, v45, v45 5 0.04 5 | |
492 v_madak_f32 v49, s37, v39, 0xbdae5a36 5 0.04 5 | |
493 v_madak_f32 v49, v39, v49, 0x3e3876e2 5 0.04 5 | |
494 v_madak_f32 v49, v39, v49, 0xbea91d04 5 0.04 5 | |
495 v_madak_f32 v39, v39, v49, 0x3f7ff738 5 1.10 155 | |
496 v_max_f32_e32 v49, v44, v48 5 0.01 1 | |
497 v_mul_f32_e32 v47, v45, v39 5 0.06 9 | |
498 v_madak_f32 v47, -2.0, v47, 0x3fc90fdb 5 0.04 6 | |
499 v_cndmask_b32_e32 v52, 0, v47, vcc_lo 5 0.01 1 | |
500 v_min_f32_e32 v47, v44, v48 5 0.01 1 | |
501 v_cmp_gt_f32_e64 vcc_lo, -v44, v44 5 0.01 1 | |
502 v_cndmask_b32_e64 v50, 0, 0xc0490fdb, vcc_lo 5 0.01 1 | |
503 v_cmp_ge_f32_e64 vcc_lo, v49, -v49 5 0.01 1 | |
504 v_mac_f32_e32 v52, v45, v39 5 0.01 1 | |
505 v_cmp_gt_f32_e64 s37, -v47, v47 5 0.03 4 | |
506 v_add_f32_e32 v39, v52, v50 5 0.11 15 | |
507 s_and_b32 vcc_lo, s37, vcc_lo 5 0.01 1 | |
508 v_cndmask_b32_e64 v45, 0, 0x80000000, vcc_lo 5 0.04 5 | |
509 v_xor_b32_e32 v77, v39, v45 5 0.03 4 | |
510 _L18: | |
511 s_mov_b32 exec_lo, s36 5 0.01 1 | |
512 _L16: | |
513 s_andn2_b32 exec_lo, s35, exec_lo 5 0.07 10 | |
514 v_cndmask_b32_e64 v35, v45, 0, s33 5 0.01 1 | |
515 v_cndmask_b32_e64 v77, v77, 0, s33 5 0.01 1 | |
516 v_cndmask_b32_e64 v71, v71, 0x3eaaaaab, s33 5 0.61 86 | |
517 v_cndmask_b32_e64 v70, v70, 0x3eaaaaab, s33 5 0.01 1 | |
518 s_mov_b32 exec_lo, s35 5 0.07 9 | |
519 v_mul_f32_e32 v42, 0.15915494, v35 5 0.01 1 | |
520 v_mul_f32_e32 v44, 0.15915494, v77 5 0.05 7 | |
521 v_cos_f32_e32 v47, v42 5 0.03 4 | |
522 v_cos_f32_e32 v42, v44 5 0.07 10 | |
523 v_mul_f32_e32 v44, v47, v42 5 0.04 5 | |
524 v_cmp_lt_f32_e64 s33, v44, 0 5 0.11 16 | |
525 s_andn2_b32 exec_lo, s35, s33 5 0.06 9 | |
526 s_cbranch_execz _L19 5 0.06 8 | |
527 BBF0_22: | |
528 v_add_f32_e32 v39, 1.0, v47 5 0.01 1 | |
529 v_add_f32_e32 v44, 1.0, v42 5 0.01 1 | |
530 v_mul_f32_e32 v48, 0.15915494, v35 5 0.01 1 | |
531 v_mul_f32_e32 v49, 0.15915494, v77 5 0.01 1 | |
532 v_mul_f32_e32 v51, v70, v71 5 0.01 1 | |
533 v_max_f32_e32 v39, 0x3089705f, v39 5 0.02 2 | |
534 v_max_f32_e32 v44, 0x3089705f, v44 5 0.01 1 | |
535 v_sin_f32_e32 v48, v48 5 0.02 3 | |
536 v_add_f32_e32 v53, v77, v35 5 0.02 2 | |
537 v_rcp_f32_e32 v39, v39 5 0.03 4 | |
538 v_sin_f32_e32 v49, v49 5 0.03 3 | |
539 v_mul_f32_e64 v30, v48, v71 mul:2 5 0.01 1 | |
540 v_mul_f32_e32 v42, v42, v48 5 0.02 3 | |
541 v_mul_f32_e32 v50, 0x3f2aaaab, v39 5 0.01 1 | |
542 v_madmk_f32 v52, v39, 0xbf2aaaab, v71 5 1.03 146 | |
543 v_rcp_f32_e32 v39, v44 5 0.05 7 | |
544 v_mul_f32_e32 v71, v53, v53 5 0.01 1 | |
545 v_mac_f32_e32 v42, v47, v49 5 0.01 1 | |
546 v_mul_f32_e32 v55, v50, v48 5 0.01 1 | |
547 v_mac_f32_e32 v30, v49, v70 5 0.01 1 | |
548 v_mul_f32_e32 v54, v52, v52 5 0.02 3 | |
549 v_mac_f32_e32 v55, v48, v50 5 0.01 1 | |
550 v_mac_f32_e32 v30, v49, v70 5 0.01 1 | |
551 v_mul_f32_e32 v44, 0x3f2aaaab, v39 5 0.01 1 | |
552 v_madmk_f32 v57, v39, 0xbf2aaaab, v70 5 0.01 1 | |
553 v_mul_f32_e64 v70, |v53|, v71 5 0.01 2 | |
554 v_mad_f32 v30, -v51, v42, v30 5 0.01 2 | |
555 v_mac_f32_e32 v55, v49, v44 5 0.01 1 | |
556 v_mul_f32_e32 v28, v50, v44 5 0.01 1 | |
557 v_mac_f32_e32 v54, v57, v57 5 0.02 2 | |
558 v_mac_f32_e32 v55, v49, v44 5 0.02 2 | |
559 v_sqrt_f32_e32 v49, v54 5 0.03 3 | |
560 v_mad_f32 v55, -v28, v42, v55 5 0.01 1 | |
561 v_sub_f32_e32 v28, v35, v77 5 0.03 4 | |
562 v_mul_f32_e32 v39, 0x3e19999a, v55 5 0.01 1 | |
563 v_mul_f32_e64 v50, |v28|, 0x3bf5c28f 5 0.01 1 | |
564 v_mul_f32_e64 v47, |v28|, 0x3d8f5c29 5 0.90 128 | |
565 v_mad_f32 v39, v30, 0x3e19999a, -v39 5 0.01 1 | |
566 v_madmk_f32 v51, v70, 0x369b3073, v50 5 0.01 1 | |
567 v_mad_f32 v47, |v53|, 0x3ba3d70a, v47 5 0.02 2 | |
568 v_mul_f32_e64 v28, |v39|, 0x3fc66666 5 0.04 5 | |
569 v_mac_f32_e32 v28, v51, v71 5 0.04 5 | |
570 v_mad_f32 v39, v47, v49, v28 5 0.01 1 | |
571 _L19: | |
572 s_andn2_b32 exec_lo, s35, exec_lo 5 0.06 9 | |
573 v_cndmask_b32_e64 v39, v18, 2.0, s33 5 0.01 1 | |
574 s_mov_b32 exec_lo, s35 5 0.06 9 | |
575 v_mul_f32_e32 v39, v46, v39 5 0.04 5 | |
576 v_cmp_le_f32_e64 s33, v39, 0x3e800000 5 0.01 1 | |
577 v_cmp_ge_f32_e32 vcc_lo, 0x37800000, v80 5 0.12 17 | |
578 s_or_b32 vcc_lo, s33, vcc_lo 5 0.01 2 | |
579 s_andn1_saveexec_b32 s35, vcc_lo 5 0.01 2 | |
580 s_andn2_b32 exec_lo, s35, exec_lo 5 0.01 1 | |
581 s_andn2_b32 s32, s32, exec_lo 5 0.07 10 | |
582 s_cbranch_scc0 _L20 5 0.90 128 | |
583 BBF0_23: | |
584 s_and_b32 exec_lo, s35, s32 1 0.00 1 | |
585 _L15: | |
586 s_andn2_b32 exec_lo, s34, exec_lo 1 0.00 1 | |
587 s_and_b32 exec_lo, s34, s32 1 0.28 197 | |
588 s_ff1_i32_b32 s34, exec_lo 1 0.00 1 | |
589 s_mov_b32 s33, exec_lo 1 0.00 1 | |
590 s_lshl_b32 s35, 1, s34 1 0.00 1 | |
591 v_lshlrev_b32_e32 v72, 1, v72 1 0.00 1 | |
592 s_and_b32 s35, s35, exec_lo 1 0.00 1 | |
593 v_ldexp_f32 v80, v80, -1 1 0.01 5 | |
594 s_and_saveexec_b32 s35, s35 1 0.01 9 | |
595 s_cbranch_execz _L21 1 0.00 3 | |
596 BBF0_24: | |
597 s_bcnt1_i32_b32 s36, s33 1 0.00 2 | |
598 v_mov_b32_e32 v32, s36 1 0.00 1 | |
599 s_waitcnt_depctr 0xffe3 1 0.02 16 | |
600 buffer_atomic_add v32, off, s[8:11], 0 offset:32 glc 1 0.00 1 | |
601 _L21: | |
602 s_waitcnt_depctr 0xffe3 1 0.02 11 | |
603 s_mov_b32 exec_lo, s35 1 0.01 9 | |
604 v_mbcnt_lo_u32_b32 v35, s33, 0 1 0.00 1 | |
605 s_waitcnt vmcnt(0) 1 0.28 197 | |
606 v_readlane_b32 s33, v32, s34 1 0.00 1 | |
607 v_cvt_f32_u32_e32 v65, v72 1 0.00 1 | |
608 v_mov_b32_e32 v66, v80 1 0.00 3 | |
609 v_add_nc_i32 v32, s33, v35 1 0.01 5 | |
610 v_mul_lo_u32 v35, v32, 12 1 0.01 4 | |
611 s_waitcnt_depctr 0xffe3 1 0.03 19 | |
612 buffer_store_dwordx3 v[64:66], v35, s[20:23], 0 offen glc 1 0.00 1 | |
613 s_branch _L22 1 0.03 21 | |
614 _L20: | |
615 s_mov_b32 exec_lo, s31 4 0.71 126 | |
616 v_add_nc_u32_e32 v19, 1, v72 4 0.01 1 | |
617 s_mov_b32 s30, exec_lo 4 0.01 1 | |
618 s_ff1_i32_b32 s31, exec_lo 4 0.01 2 | |
619 v_ffbl_b32_e32 v25, v19 4 0.01 1 | |
620 s_lshl_b32 s32, 1, s31 4 0.01 2 | |
621 s_and_b32 s32, s32, exec_lo 4 0.01 2 | |
622 v_min_u32_e32 v25, 32, v25 4 0.03 5 | |
623 v_lshlrev_b32_e64 v26, v25, 1 4 0.03 5 | |
624 v_cvt_f32_u32_e32 v26, v26 4 0.01 1 | |
625 v_lshrrev_b32_e32 v72, v25, v19 4 0.02 4 | |
626 v_mul_f32_e32 v80, v80, v26 4 0.01 1 | |
627 s_and_saveexec_b32 s32, s32 4 0.05 9 | |
628 s_cbranch_execz _L23 4 0.02 3 | |
629 BBF0_25: | |
630 s_bcnt1_i32_b32 s33, s30 4 0.01 2 | |
631 v_mov_b32_e32 v25, s33 4 0.01 1 | |
632 s_waitcnt_depctr 0xffe3 4 0.09 16 | |
633 buffer_atomic_add v25, off, s[8:11], 0 offset:32 glc 4 0.01 1 | |
634 _L23: | |
635 s_waitcnt_depctr 0xffe3 4 0.06 11 | |
636 s_mov_b32 exec_lo, s32 4 0.07 12 | |
637 v_sub_f32_e32 v28, v77, v35 4 0.01 1 | |
638 v_add_f32_e32 v26, v35, v77 4 0.01 1 | |
639 s_mov_b32 s32, 0x3b21e3b8 4 0.01 1 | |
640 s_mov_b32 s33, 0xb84c68e7 4 0.01 1 | |
641 v_cvt_f32_u32_e32 v79, v72 4 0.01 1 | |
642 v_mul_f32_e32 v31, v28, v28 4 0.01 1 | |
643 v_mul_f32_e32 v30, v26, v26 4 0.02 4 | |
644 v_mad_f32 v33, v31, 0xbccccccd, 1.0 4 0.01 1 | |
645 s_waitcnt vmcnt(0) 4 1.06 187 | |
646 v_readlane_b32 s31, v25, s31 4 0.01 2 | |
647 v_mul_f32_e32 v32, v31, v31 4 0.01 2 | |
648 v_madak_f32 v34, s32, v31, 0xbd2aaaab 4 0.01 1 | |
649 v_madak_f32 v44, s33, v31, 0x3a088889 4 0.01 1 | |
650 v_mbcnt_lo_u32_b32 v25, s30, 0 4 0.01 1 | |
651 v_madmk_f32 v33, v32, 0x39b3719e, v33 4 0.01 1 | |
652 v_madmk_f32 v47, v32, 0xb81c6fca, v34 4 0.01 1 | |
653 v_mul_f32_e32 v34, v31, v32 4 0.01 1 | |
654 v_madmk_f32 v44, v30, 0xb6500cec, v44 4 0.02 4 | |
655 v_madmk_f32 v48, v34, 0xb601da25, v33 4 0.01 1 | |
656 v_mac_f32_e32 v47, v44, v30 4 0.01 2 | |
657 s_mov_b32 s30, 0xbc6a0ea1 4 0.01 1 | |
658 s_mov_b32 s32, 0x3979a934 4 0.01 1 | |
659 s_mov_b32 s33, 0x388fa325 4 0.01 1 | |
660 v_mac_f32_e32 v48, v47, v30 4 0.03 5 | |
661 v_ldexp_f32 v42, v48, -2 4 0.01 1 | |
662 v_add_nc_i32 v25, s31, v25 4 0.72 127 | |
663 v_madak_f32 v33, s30, v31, 0x40c00000 4 0.01 2 | |
664 v_madak_f32 v44, s33, v31, 0xba3b3ee7 4 0.01 1 | |
665 v_madak_f32 v31, s32, v31, 0xbdcccccd 4 0.01 1 | |
666 v_mul_lo_u32 v25, v25, 12 4 0.04 6 | |
667 v_madmk_f32 v33, v32, 0xb8c28a7f, v33 4 0.01 1 | |
668 v_madmk_f32 v44, v30, 0xb70526e7, v44 4 0.01 1 | |
669 v_madmk_f32 v31, v32, 0x378e44a1, v31 4 0.01 1 | |
670 v_rcp_f32_e32 v32, v42 4 0.02 4 | |
671 v_ldexp_f32 v42, v46, -3 4 0.01 1 | |
672 v_madmk_f32 v47, v34, 0x3494ab4c, v33 4 0.01 2 | |
673 v_mac_f32_e32 v31, v44, v30 4 0.03 5 | |
674 v_mac_f32_e32 v47, v31, v30 4 0.01 1 | |
675 s_waitcnt_depctr 0xffe3 4 0.09 16 | |
676 buffer_store_dwordx3 v[78:80], v25, s[20:23], 0 offen glc 4 0.09 16 | |
677 v_mul_f32_e32 v25, v28, v47 4 0.01 1 | |
678 v_mul_f32_e32 v28, v42, v32 4 0.02 4 | |
679 v_ldexp_f32 v33, v25, -1 4 0.01 1 | |
680 v_sqrt_f32_e32 v28, v28 4 0.02 4 | |
681 v_cmp_gt_f32_e64 s30, 0x3a83126f, |v25| 4 0.82 144 | |
682 s_andn1_saveexec_b32 s31, s30 4 0.05 9 | |
683 s_cbranch_execz _L24 4 0.02 3 | |
684 BBF0_26: | |
685 v_mad_f32 v44, v25, -0.5, v26 4 0.01 1 | |
686 v_mov_b32_e32 v77, v25 4 0.03 5 | |
687 v_sqrt_f32_e64 v31, |v44| 4 0.02 4 | |
688 v_add_f32_e32 v32, v25, v44 4 0.05 9 | |
689 v_sqrt_f32_e64 v47, |v32| 4 0.02 4 | |
690 v_mul_f32_e32 v70, v44, v31 4 0.04 7 | |
691 v_mad_f32 v34, v32, v47, -v70 4 0.02 3 | |
692 v_rcp_f32_e32 v32, v25 4 0.03 5 | |
693 v_mul_f32_e32 v42, 0x3f2aaaab, v34 4 0.05 8 | |
694 v_mul_f32_e32 v18, v42, v32 4 0.01 1 | |
695 _L24: | |
696 s_andn2_b32 exec_lo, s31, exec_lo 4 0.05 9 | |
697 v_mov_b32_e32 v70, 0 4 0.01 1 | |
698 v_mov_b32_e32 v77, 0 4 0.01 1 | |
699 v_mov_b32_e32 v44, 0 4 0.01 1 | |
700 v_mov_b32_e32 v34, 0 4 0.01 2 | |
701 v_sqrt_f32_e64 v18, |v26| 4 0.03 5 | |
702 s_mov_b32 exec_lo, s31 4 0.05 9 | |
703 v_mul_f32_e32 v28, v28, v18 4 0.01 2 | |
704 v_mov_b32_e32 v18, 0 4 0.01 1 | |
705 s_movk_i32 s34, 0xffff 4 0.01 1 | |
706 v_rcp_f32_e32 v49, v77 4 0.02 4 | |
707 s_mov_b32 s32, exec_lo 4 0.01 1 | |
708 v_ceil_f32_e32 v28, v28 4 0.01 1 | |
709 s_mov_b32 s33, exec_lo 4 0.24 43 | |
710 v_rcp_f32_e32 v48, v48 4 0.02 4 | |
711 v_max_f32_e32 v28, 1.0, v28 4 0.01 2 | |
712 v_cmp_eq_f32_e64 s31, v40, 1.0 4 0.02 2 | |
713 v_cvt_u32_f32_e32 v71, v28 4 0.01 1 | |
714 v_rcp_f32_e32 v28, v28 4 0.02 4 | |
715 s_nop 0 4 0.01 1 | |
716 s_nop 0 4 0.01 1 | |
717 _L29: | |
718 v_cmp_eq_i32_e64 s34, s34, 0 13 0.03 1 | |
719 v_add_co_ci_u32_e64 v20, vcc_lo, v18, 0, s34 13 0.09 5 | |
720 v_cmp_gt_u32_e32 vcc_lo, v71, v20 13 0.31 17 | |
721 s_and_saveexec_b32 s35, vcc_lo 13 0.04 2 | |
722 s_andn2_b32 exec_lo, s35, exec_lo 13 0.02 1 | |
723 s_andn2_b32 s33, s33, exec_lo 13 0.18 10 | |
724 s_cbranch_scc0 _L25 13 0.18 9 | |
725 BBF0_27: | |
726 s_and_b32 exec_lo, s35, s33 9 0.12 9 | |
727 v_add_co_ci_u32_e64 v18, vcc_lo, v18, 1, s34 9 0.06 5 | |
728 v_cmp_eq_i32_e32 vcc_lo, v71, v18 9 0.22 17 | |
729 s_and_b32 vcc_lo, vcc_lo, s31 9 0.03 2 | |
730 s_andn1_saveexec_b32 s34, vcc_lo 9 0.11 9 | |
731 s_cbranch_execz _L26 9 0.12 9 | |
732 BBF0_28: | |
733 v_cvt_f32_u32_e32 v18, v18 6 0.06 7 | |
734 v_mul_f32_e32 v57, v18, v28 6 0.01 1 | |
735 s_andn1_saveexec_b32 s35, s30 6 0.08 9 | |
736 s_cbranch_execz _L27 6 0.04 4 | |
737 BBF0_29: | |
738 v_mad_f32 v18, v34, v57, v70 6 0.05 6 | |
739 v_log_f32_e64 v30, |v18| 6 0.67 79 | |
740 v_cmp_gt_f32_e32 vcc_lo, 0, v18 6 0.01 1 | |
741 v_cndmask_b32_e64 v42, 0, -1, vcc_lo 6 0.01 1 | |
742 v_cmp_lt_f32_e32 vcc_lo, 0, v18 6 0.03 3 | |
743 v_mul_f32_e32 v18, 0x3f2aaaab, v30 6 0.02 1 | |
744 v_add_co_ci_u32_e64 v30, vcc_lo, v42, 0, vcc_lo 6 0.03 3 | |
745 v_exp_f32_e32 v18, v18 6 0.03 3 | |
746 v_cvt_f32_i32_e32 v52, v30 6 0.07 8 | |
747 v_mul_f32_e32 v18, v52, v18 6 0.05 5 | |
748 v_sub_f32_e32 v18, v18, v44 6 0.05 5 | |
749 v_mul_f32_e32 v57, v18, v49 6 0.01 1 | |
750 _L27: | |
751 s_mov_b32 exec_lo, s35 6 0.08 9 | |
752 v_add_f32_e64 v55, v57, -1.0 div:2 6 0.03 3 | |
753 v_mul_f32_e32 v18, v57, v57 6 0.01 1 | |
754 v_add_f32_e64 v59, v57, -2.0 div:2 6 0.01 1 | |
755 v_ldexp_f32 v54, v57, -1 6 0.02 2 | |
756 v_mad_f32 v52, v25, v55, v26 6 0.01 1 | |
757 v_mul_f32_e32 v18, v25, v18 6 0.03 3 | |
758 v_mad_f32 v59, v33, v59, v26 6 0.04 5 | |
759 v_mul_f32_e32 v55, v57, v52 6 0.01 1 | |
760 v_ldexp_f32 v52, v18, -1 6 0.02 1 | |
761 v_mad_f32 v54, v59, v54, -v35 6 0.02 2 | |
762 v_mul_f32_e32 v56, v55, v55 6 0.48 57 | |
763 v_mul_f32_e64 v61, v18, v52 div:2 6 0.01 1 | |
764 v_mul_f32_e64 v30, v18, v55 div:2 6 0.01 1 | |
765 v_mul_f32_e32 v54, 0.15915494, v54 6 0.02 2 | |
766 v_mul_f32_e32 v46, v56, v56 6 0.01 1 | |
767 v_mul_f32_e32 v42, v56, v61 6 0.01 1 | |
768 v_mac_f32_e32 v30, v55, v52 6 0.02 2 | |
769 v_mul_f32_e32 v58, v61, v61 6 0.02 2 | |
770 v_mad_f32 v62, v61, 0xbbcccccd, 1.0 6 0.01 1 | |
771 v_mac_f32_e32 v42, v56, v61 6 0.01 1 | |
772 v_mul_f32_e64 v65, v30, v56 mul:2 6 0.01 1 | |
773 v_mul_f32_e32 v50, v30, v61 6 0.02 2 | |
774 v_mac_f32_e32 v42, v30, v30 6 0.01 1 | |
775 v_mul_f32_e32 v63, v30, v65 6 0.01 1 | |
776 v_mul_f32_e32 v69, v55, v50 6 0.02 2 | |
777 v_mul_f32_e32 v60, 0x38c30c31, v42 6 0.01 1 | |
778 v_mac_f32_e32 v63, v46, v61 6 0.01 1 | |
779 v_mac_f32_e32 v69, v55, v50 6 0.02 2 | |
780 v_madmk_f32 v60, v46, 0x3a088889, v60 6 0.01 1 | |
781 v_mac_f32_e32 v63, v42, v56 6 0.01 1 | |
782 v_mac_f32_e32 v69, v42, v52 6 0.01 1 | |
783 v_mul_f32_e64 v42, v18, v61 div:2 6 0.01 1 | |
784 v_mul_f32_e32 v61, v55, v65 6 0.01 1 | |
785 v_madmk_f32 v50, v58, 0x3797b426, v60 6 1.04 122 | |
786 v_madmk_f32 v60, v56, 0xbd2aaaab, v62 6 0.02 2 | |
787 v_mad_f32 v58, v52, v55, v30 6 0.01 1 | |
788 v_mac_f32_e32 v65, v30, v56 6 0.01 1 | |
789 v_mul_f32_e32 v67, 0x39c30c31, v42 6 0.01 1 | |
790 v_mul_f32_e32 v59, 0x3672b9d6, v69 6 0.01 1 | |
791 v_add_f32_e32 v50, v60, v50 6 0.01 1 | |
792 v_mul_f32_e32 v58, v58, v55 6 0.01 1 | |
793 v_mul_f32_e32 v55, v55, v65 6 0.01 1 | |
794 v_mul_f32_e32 v42, v56, v46 6 0.01 1 | |
795 v_mac_f32_e32 v61, v46, v52 6 0.01 1 | |
796 v_madmk_f32 v60, v63, 0xb521d13a, v50 6 0.01 1 | |
797 v_madmk_f32 v63, v58, 0x3b088889, v67 6 0.01 1 | |
798 v_mac_f32_e32 v55, v52, v46 6 0.01 1 | |
799 v_sin_f32_e32 v52, v54 6 0.03 3 | |
800 v_madmk_f32 v30, v61, 0x379c09c1, v59 6 0.01 1 | |
801 v_mul_f32_e32 v59, v46, v46 6 0.01 1 | |
802 v_mad_f32 v63, v18, 0x3d2aaaab, -v63 6 0.01 1 | |
803 v_madmk_f32 v18, v42, 0xb6500d01, v60 6 0.01 1 | |
804 v_mul_f32_e32 v42, v57, v48 6 0.01 1 | |
805 v_mul_f32_e32 v55, v55, v56 6 0.01 1 | |
806 v_cos_f32_e32 v56, v54 6 0.03 3 | |
807 v_add_f32_e32 v30, v63, v30 6 0.71 84 | |
808 v_madmk_f32 v18, v59, 0x3238ef1d, v18 6 0.03 3 | |
809 v_mul_f32_e32 v52, v42, v52 6 0.03 3 | |
810 v_madmk_f32 v55, v55, 0xb3b8ef1d, v30 6 0.02 2 | |
811 v_mul_f32_e32 v30, v18, v52 6 0.02 2 | |
812 v_mul_f32_e32 v56, v42, v56 6 0.02 2 | |
813 v_mul_f32_e32 v59, v55, v52 6 0.03 3 | |
814 v_mad_f32 v30, -v55, v56, -v30 6 0.02 1 | |
815 v_mad_f32 v59, v18, v56, -v59 6 0.03 3 | |
816 v_mul_f32_e32 v18, v41, v30 6 0.01 1 | |
817 v_mul_f32_e32 v30, v43, v30 6 0.07 8 | |
818 v_mad_f32 v18, v43, v59, -v18 6 0.01 1 | |
819 v_mac_f32_e32 v30, v41, v59 6 0.04 4 | |
820 v_add_f32_e32 v83, v24, v18 6 0.01 1 | |
821 v_add_f32_e32 v84, v29, v30 6 0.01 1 | |
822 _L26: | |
823 s_andn2_b32 exec_lo, s34, exec_lo 9 0.12 9 | |
824 v_mov_b32_e32 v84, v2 9 0.01 1 | |
825 v_mov_b32_e32 v83, v1 9 0.01 1 | |
826 s_mov_b32 exec_lo, s34 9 0.01 1 | |
827 s_ff1_i32_b32 s35, exec_lo 9 0.01 1 | |
828 s_mov_b32 s34, exec_lo 9 0.18 14 | |
829 s_lshl_b32 s36, 1, s35 9 0.03 2 | |
830 s_and_b32 s36, s36, exec_lo 9 0.03 2 | |
831 s_and_saveexec_b32 s36, s36 9 0.13 10 | |
832 s_cbranch_execz _L28 9 0.04 3 | |
833 BBF0_30: | |
834 s_bcnt1_i32_b32 s37, s34 9 0.03 2 | |
835 v_mov_b32_e32 v18, s37 9 0.01 1 | |
836 s_waitcnt_depctr 0xffe3 9 0.21 16 | |
837 buffer_atomic_add v18, off, s[8:11], 0 offset:28 glc 9 0.01 1 | |
838 _L28: | |
839 s_waitcnt_depctr 0xffe3 9 0.14 11 | |
840 s_mov_b32 exec_lo, s36 9 0.12 9 | |
841 v_mbcnt_lo_u32_b32 v30, s34, 0 9 0.01 1 | |
842 s_waitcnt vmcnt(0) 9 2.99 235 | |
843 v_readlane_b32 s34, v18, s35 9 0.01 1 | |
844 v_min3_f32 v22, v81, v83, v22 9 0.02 1 | |
845 v_min3_f32 v21, v82, v84, v21 9 0.01 1 | |
846 v_max3_f32 v107, v81, v83, v107 9 0.01 1 | |
847 v_max3_f32 v106, v82, v84, v106 9 0.02 1 | |
848 v_add_nc_i32 v18, s34, v30 9 0.14 11 | |
849 s_movk_i32 s34, 0x0 9 0.01 1 | |
850 v_mul_lo_u32 v18, v18, 24 9 0.05 4 | |
851 s_waitcnt_depctr 0xffe3 9 0.24 18 | |
852 buffer_store_dword v7, v18, s[24:27], 0 offen glc 9 0.01 1 | |
853 buffer_store_dwordx4 v[81:84], v18, s[24:27], 0 offen offset:8 glc 9 0.21 16 | |
854 v_mov_b32_e32 v82, v84 9 0.01 1 | |
855 v_mov_b32_e32 v18, v20 9 0.01 1 | |
856 v_mov_b32_e32 v81, v83 9 0.01 1 | |
857 s_branch _L29 9 0.32 24 | |
858 _L25: | |
859 s_mov_b32 exec_lo, s32 4 0.05 9 | |
860 v_mov_b32_e32 v19, v40 4 0.01 1 | |
861 v_mov_b32_e32 v25, v74 4 0.01 1 | |
862 v_mov_b32_e32 v26, v75 4 0.01 1 | |
863 v_mov_b32_e32 v29, v73 4 0.01 1 | |
864 v_mov_b32_e32 v24, v76 4 0.01 1 | |
865 s_branch _L30 4 0.48 84 | |
866 _L11: | |
867 s_mov_b32 exec_lo, s28 4 0.05 9 | |
868 v_mov_b32_e32 v42, v21 4 0.01 1 | |
869 v_mov_b32_e32 v48, v22 4 0.01 1 | |
870 _L9: | |
871 s_mov_b32 exec_lo, s4 4 0.01 1 | |
872 _L8: | |
873 s_andn2_b32 exec_lo, s7, exec_lo 4 0.06 10 | |
874 s_cbranch_execz _L31 4 0.01 1 | |
875 BBF0_31: | |
876 v_add_nc_u32_e32 v5, -4, v5 0 0.00 | |
877 s_waitcnt_depctr 0xffe3 0 0.00 | |
878 tbuffer_load_format_x v5, v5, s[12:15], 0 format:[BUF_FMT_32_FLOAT] offen 0 0.00 | |
879 s_waitcnt vmcnt(0) 0 0.00 | |
880 v_ldexp_f32 v20, v5, -1 0 0.00 | |
881 s_andn1_saveexec_b32 s4, s6 0 0.00 | |
882 s_cbranch_execz _L32 0 0.00 | |
883 BBF0_32: | |
884 v_add_nc_u32_e32 v9, 1, v3 0 0.00 | |
885 v_lshrrev_b32_e32 v9, 2, v9 0 0.00 | |
886 v_add_lshl_u32 v10, s2, v9, 2 0 0.00 | |
887 tbuffer_load_format_x v10, v10, s[12:15], 0 format:[BUF_FMT_32_FLOAT] offen 0 0.00 | |
888 v_add_nc_u32_e32 v0, 8, v0 0 0.00 | |
889 v_and_b32_e32 v17, 24, v0 0 0.00 | |
890 v_mul_lo_u32 v0, v9, 20 0 0.00 | |
891 tbuffer_load_format_x v0, v0, s[16:19], 0 format:[BUF_FMT_32_FLOAT] offen offset:8 0 0.00 | |
892 s_waitcnt vmcnt(1) 0 0.00 | |
893 v_bfe_u32 v9, v10, 0, v17 0 0.00 | |
894 v_lshrrev_b32_e32 v10, v17, v10 0 0.00 | |
895 v_and_b32_e32 v21, 0x30303, v9 0 0.00 | |
896 v_lshrrev_b32_e32 v24, 2, v9 0 0.00 | |
897 v_lshrrev_b32_e32 v9, 3, v9 0 0.00 | |
898 v_and_b32_e32 v17, 3, v10 0 0.00 | |
899 v_and_b32_e32 v24, 0x10101, v24 0 0.00 | |
900 v_and_b32_e32 v9, 0x10101, v9 0 0.00 | |
901 v_add_nc_u32_e32 v21, v21, v24 0 0.00 | |
902 v_mul_u32_u24_e32 v24, 15, v9 0 0.00 | |
903 v_and_b32_e32 v24, v21, v24 0 0.00 | |
904 v_add_nc_u32_e32 v9, v21, v24 0 0.00 | |
905 v_and_b32_e32 v21, 8, v10 0 0.00 | |
906 v_lshrrev_b32_e32 v24, 8, v9 0 0.00 | |
907 v_cmp_eq_i32_e32 vcc_lo, 0, v21 0 0.00 | |
908 v_add_nc_u32_e32 v9, v9, v24 0 0.00 | |
909 v_add_nc_u32_sdwa v9, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 0 0.00 | |
910 v_and_b32_e32 v9, 63, v9 0 0.00 | |
911 s_waitcnt vmcnt(0) 0 0.00 | |
912 v_add3_u32 v0, v9, v0, s3 0 0.00 | |
913 s_and_saveexec_b32 s2, vcc_lo 0 0.00 | |
914 s_cbranch_execz _L33 0 0.00 | |
915 BBF0_33: | |
916 v_lshlrev_b32_e32 v0, 2, v0 0 0.00 | |
917 tbuffer_load_format_xy v[24:25], v0, s[12:15], 0 format:[BUF_FMT_32_32_FLOAT] offen 0 0.00 | |
918 v_cmp_gt_u32_e32 vcc_lo, 2, v17 0 0.00 | |
919 s_waitcnt vmcnt(0) 0 0.00 | |
920 v_cvt_f32_i32_sdwa v30, sext(v24) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 0 0.00 | |
921 v_cvt_f32_i32_sdwa v31, sext(v24) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 0 0.00 | |
922 v_cvt_f32_i32_sdwa v32, sext(v25) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 0 0.00 | |
923 v_cvt_f32_i32_sdwa v33, sext(v25) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 0 0.00 | |
924 s_and_saveexec_b32 s3, vcc_lo 0 0.00 | |
925 v_mov_b32_e32 v2, 0 0 0.00 | |
926 v_mov_b32_e32 v1, 0 0 0.00 | |
927 v_mov_b32_e32 v42, 0 0 0.00 | |
928 v_mov_b32_e32 v41, 0 0 0.00 | |
929 s_andn2_b32 exec_lo, s3, exec_lo 0 0.00 | |
930 s_cbranch_execz _L34 0 0.00 | |
931 BBF0_34: | |
932 tbuffer_load_format_x v26, v0, s[12:15], 0 format:[BUF_FMT_32_FLOAT] offen offset:8 0 0.00 | |
933 v_cmp_ne_i32_e32 vcc_lo, 3, v17 0 0.00 | |
934 s_waitcnt vmcnt(0) 0 0.00 | |
935 v_cvt_f32_i32_sdwa v1, sext(v26) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 0 0.00 | |
936 v_cvt_f32_i32_sdwa v2, sext(v26) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 0 0.00 | |
937 s_and_saveexec_b32 s6, vcc_lo 0 0.00 | |
938 v_mov_b32_e32 v42, 0 0 0.00 | |
939 v_mov_b32_e32 v41, 0 0 0.00 | |
940 s_andn2_b32 exec_lo, s6, exec_lo 0 0.00 | |
941 s_cbranch_execz _L34 0 0.00 | |
942 BBF0_35: | |
943 tbuffer_load_format_x v0, v0, s[12:15], 0 format:[BUF_FMT_32_FLOAT] offen offset:12 0 0.00 | |
944 s_waitcnt vmcnt(0) 0 0.00 | |
945 v_cvt_f32_i32_sdwa v41, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 0 0.00 | |
946 v_cvt_f32_i32_sdwa v42, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 0 0.00 | |
947 _L34: | |
948 s_mov_b32 exec_lo, s3 0 0.00 | |
949 _L33: | |
950 s_andn2_b32 exec_lo, s2, exec_lo 0 0.00 | |
951 s_cbranch_execz _L35 0 0.00 | |
952 BBF0_36: | |
953 v_lshlrev_b32_e32 v0, 2, v0 0 0.00 | |
954 s_waitcnt_depctr 0xffe3 0 0.00 | |
955 tbuffer_load_format_xyzw v[30:33], v0, s[12:15], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen 0 0.00 | |
956 v_cmp_gt_u32_e32 vcc_lo, 2, v17 0 0.00 | |
957 s_and_saveexec_b32 s3, vcc_lo 0 0.00 | |
958 v_mov_b32_e32 v2, 0 0 0.00 | |
959 v_mov_b32_e32 v1, 0 0 0.00 | |
960 v_mov_b32_e32 v42, 0 0 0.00 | |
961 v_mov_b32_e32 v41, 0 0 0.00 | |
962 s_andn2_b32 exec_lo, s3, exec_lo 0 0.00 | |
963 s_cbranch_execz _L35 0 0.00 | |
964 BBF0_37: | |
965 tbuffer_load_format_xy v[1:2], v0, s[12:15], 0 format:[BUF_FMT_32_32_FLOAT] offen offset:16 0 0.00 | |
966 v_cmp_ne_i32_e32 vcc_lo, 3, v17 0 0.00 | |
967 s_and_saveexec_b32 s6, vcc_lo 0 0.00 | |
968 v_mov_b32_e32 v42, 0 0 0.00 | |
969 v_mov_b32_e32 v41, 0 0 0.00 | |
970 s_andn2_b32 exec_lo, s6, exec_lo 0 0.00 | |
971 s_cbranch_execz _L35 0 0.00 | |
972 BBF0_38: | |
973 tbuffer_load_format_xy v[41:42], v0, s[12:15], 0 format:[BUF_FMT_32_32_FLOAT] offen offset:24 0 0.00 | |
974 _L35: | |
975 s_waitcnt_depctr 0xffe3 0 0.00 | |
976 s_mov_b32 exec_lo, s2 0 0.00 | |
977 v_and_b32_e32 v10, 4, v10 0 0.00 | |
978 v_cmp_eq_i32_e64 s6, v17, 2 0 0.00 | |
979 v_cmp_ne_i32_e64 s2, v10, 0 0 0.00 | |
980 s_and_b32 vcc_lo, s2, s6 0 0.00 | |
981 s_nand_b32 s2, s2, s6 0 0.00 | |
982 s_waitcnt vmcnt(0) 0 0.00 | |
983 v_cndmask_b32_e32 v30, v30, v32, vcc_lo 0 0.00 | |
984 v_cndmask_b32_e32 v9, v31, v33, vcc_lo 0 0.00 | |
985 v_cndmask_b32_e32 v0, v32, v1, vcc_lo 0 0.00 | |
986 v_cndmask_b32_e32 v16, v33, v2, vcc_lo 0 0.00 | |
987 v_cndmask_b32_e64 v29, 1, v17, s2 0 0.00 | |
988 v_cmp_ne_i32_e32 vcc_lo, 1, v29 0 0.00 | |
989 s_and_saveexec_b32 s2, vcc_lo 0 0.00 | |
990 s_cbranch_execz _L36 0 0.00 | |
991 BBF0_39: | |
992 v_cmp_eq_i32_e32 vcc_lo, 2, v29 0 0.00 | |
993 s_and_saveexec_b32 s3, vcc_lo 0 0.00 | |
994 s_cbranch_execz _L37 0 0.00 | |
995 BBF0_40: | |
996 v_sub_f32_e32 v21, v1, v0 0 0.00 | |
997 v_sub_f32_e32 v26, v2, v16 0 0.00 | |
998 v_sub_f32_e32 v27, v30, v0 0 0.00 | |
999 v_sub_f32_e32 v28, v9, v16 0 0.00 | |
1000 v_mov_b32_e32 v42, v2 0 0.00 | |
1001 v_madmk_f32 v21, v21, 0x3eaaaaab, v0 0 0.00 | |
1002 v_mov_b32_e32 v41, v1 0 0.00 | |
1003 v_madmk_f32 v24, v26, 0x3eaaaaab, v16 0 0.00 | |
1004 v_madmk_f32 v25, v27, 0x3eaaaaab, v0 0 0.00 | |
1005 v_madmk_f32 v16, v28, 0x3eaaaaab, v16 0 0.00 | |
1006 v_mov_b32_e32 v1, v21 0 0.00 | |
1007 _L37: | |
1008 s_andn2_b32 exec_lo, s3, exec_lo 0 0.00 | |
1009 v_mov_b32_e32 v25, v0 0 0.00 | |
1010 v_mov_b32_e32 v24, v2 0 0.00 | |
1011 s_mov_b32 exec_lo, s3 0 0.00 | |
1012 _L36: | |
1013 s_andn2_b32 exec_lo, s2, exec_lo 0 0.00 | |
1014 s_cbranch_execz _L38 0 0.00 | |
1015 BBF0_41: | |
1016 v_sub_f32_e32 v26, v16, v9 0 0.00 | |
1017 v_sub_f32_e32 v24, v9, v16 0 0.00 | |
1018 v_sub_f32_e32 v21, v30, v0 0 0.00 | |
1019 v_sub_f32_e32 v25, v0, v30 0 0.00 | |
1020 v_mov_b32_e32 v42, v16 0 0.00 | |
1021 v_madmk_f32 v26, v26, 0x3eaaaaab, v9 0 0.00 | |
1022 v_madmk_f32 v24, v24, 0x3eaaaaab, v16 0 0.00 | |
1023 v_mov_b32_e32 v41, v0 0 0.00 | |
1024 v_madmk_f32 v1, v21, 0x3eaaaaab, v0 0 0.00 | |
1025 v_madmk_f32 v25, v25, 0x3eaaaaab, v30 0 0.00 | |
1026 v_mov_b32_e32 v16, v26 0 0.00 | |
1027 _L38: | |
1028 s_mov_b32 exec_lo, s2 0 0.00 | |
1029 v_sub_f32_e32 v29, v111, v4 0 0.00 | |
1030 v_sub_f32_e32 v33, v15, v108 0 0.00 | |
1031 v_sub_f32_e32 v31, v111, v109 0 0.00 | |
1032 v_sub_f32_e32 v32, v15, v110 0 0.00 | |
1033 v_subrev_f32_e32 v34, v23, v109 0 0.00 | |
1034 v_mul_f32_e32 v28, v29, v29 0 0.00 | |
1035 v_subrev_f32_e32 v36, v23, v111 0 0.00 | |
1036 v_mul_f32_e32 v35, v31, v31 0 0.00 | |
1037 v_subrev_f32_e32 v37, v22, v110 0 0.00 | |
1038 v_mul_f32_e32 v40, v34, v34 0 0.00 | |
1039 v_mac_f32_e32 v28, v33, v33 0 0.00 | |
1040 v_subrev_f32_e32 v44, v22, v15 0 0.00 | |
1041 v_mac_f32_e32 v35, v32, v32 0 0.00 | |
1042 v_subrev_f32_e32 v39, v23, v4 0 0.00 | |
1043 v_mac_f32_e32 v40, v37, v37 0 0.00 | |
1044 v_cmp_gt_f32_e64 s3, v28, 0x2b8cbccc 0 0.00 | |
1045 v_subrev_f32_e32 v27, v30, v1 0 0.00 | |
1046 v_cmp_lt_f32_e32 vcc_lo, 0x2b8cbccc, v35 0 0.00 | |
1047 v_subrev_f32_e32 v35, v22, v108 0 0.00 | |
1048 v_subrev_f32_e32 v25, v30, v25 0 0.00 | |
1049 v_subrev_f32_e32 v30, v30, v41 0 0.00 | |
1050 v_subrev_f32_e32 v0, v9, v42 0 0.00 | |
1051 v_cndmask_b32_e64 v28, v36, v29, s3 0 0.00 | |
1052 v_mul_f32_e32 v29, v39, v39 0 0.00 | |
1053 v_cndmask_b32_e32 v28, v28, v31, vcc_lo 0 0.00 | |
1054 v_cndmask_b32_e64 v31, v44, v33, s3 0 0.00 | |
1055 v_mac_f32_e32 v29, v35, v35 0 0.00 | |
1056 v_cndmask_b32_e32 v33, v31, v32, vcc_lo 0 0.00 | |
1057 v_cmp_lt_f32_e32 vcc_lo, 0x2b8cbccc, v40 0 0.00 | |
1058 v_cndmask_b32_e32 v32, v36, v34, vcc_lo 0 0.00 | |
1059 v_mul_f32_e32 v34, v28, v28 0 0.00 | |
1060 v_cndmask_b32_e32 v36, v44, v37, vcc_lo 0 0.00 | |
1061 v_cmp_lt_f32_e32 vcc_lo, 0x2b8cbccc, v29 0 0.00 | |
1062 v_subrev_f32_e32 v37, v9, v24 0 0.00 | |
1063 v_cndmask_b32_e32 v31, v32, v39, vcc_lo 0 0.00 | |
1064 v_mac_f32_e32 v34, v33, v33 0 0.00 | |
1065 v_cndmask_b32_e32 v29, v36, v35, vcc_lo 0 0.00 | |
1066 v_subrev_f32_e32 v24, v9, v16 0 0.00 | |
1067 v_cmp_ngt_f32_e32 vcc_lo, 0x2b8cbccc, v34 0 0.00 | |
1068 v_cndmask_b32_e32 v35, 0x358637bd, v28, vcc_lo 0 0.00 | |
1069 v_mul_f32_e32 v28, v27, v27 0 0.00 | |
1070 v_mul_f32_e32 v32, v29, v29 0 0.00 | |
1071 v_cndmask_b32_e32 v33, 0, v33, vcc_lo 0 0.00 | |
1072 v_mul_f32_e32 v34, v25, v25 0 0.00 | |
1073 v_mul_f32_e32 v26, v35, v35 0 0.00 | |
1074 v_mac_f32_e32 v28, v37, v37 0 0.00 | |
1075 v_mac_f32_e32 v32, v31, v31 0 0.00 | |
1076 v_mac_f32_e32 v34, v24, v24 0 0.00 | |
1077 v_mac_f32_e32 v26, v33, v33 0 0.00 | |
1078 v_cmp_gt_f32_e64 s3, v28, 0x2b8cbccc 0 0.00 | |
1079 v_cmp_ngt_f32_e32 vcc_lo, 0x2b8cbccc, v32 0 0.00 | |
1080 v_cndmask_b32_e32 v29, 0, v29, vcc_lo 0 0.00 | |
1081 v_cndmask_b32_e32 v31, 0x358637bd, v31, vcc_lo 0 0.00 | |
1082 v_rsq_f32_e32 v28, v26 0 0.00 | |
1083 v_cmp_lt_f32_e32 vcc_lo, 0x2b8cbccc, v34 0 0.00 | |
1084 v_cndmask_b32_e64 v26, v30, v27, s3 0 0.00 | |
1085 v_mul_f32_e32 v30, v29, v29 0 0.00 | |
1086 v_cndmask_b32_e64 v9, v0, v37, s3 0 0.00 | |
1087 v_mul_f32_e32 v27, v33, v28 0 0.00 | |
1088 v_cndmask_b32_e32 v26, v26, v25, vcc_lo 0 0.00 | |
1089 v_mac_f32_e32 v30, v31, v31 0 0.00 | |
1090 v_cndmask_b32_e32 v0, v9, v24, vcc_lo 0 0.00 | |
1091 v_mul_f32_e64 v25, v5, v27 div:2 0 0.00 | |
1092 v_mul_f32_e32 v9, v26, v26 0 0.00 | |
1093 v_rsq_f32_e32 v24, v30 0 0.00 | |
1094 v_mul_f32_e32 v30, v35, v28 0 0.00 | |
1095 v_subrev_f32_e32 v39, v25, v111 0 0.00 | |
1096 v_mac_f32_e32 v9, v0, v0 0 0.00 | |
1097 v_mul_f32_e64 v21, v5, v30 div:2 0 0.00 | |
1098 v_cmp_ngt_f32_e32 vcc_lo, 0x2b8cbccc, v9 0 0.00 | |
1099 v_cndmask_b32_e32 v26, 0x358637bd, v26, vcc_lo 0 0.00 | |
1100 v_cndmask_b32_e32 v9, 0, v0, vcc_lo 0 0.00 | |
1101 v_add_f32_e32 v37, v21, v15 0 0.00 | |
1102 v_mul_f32_e64 v29, -v29, v24 0 0.00 | |
1103 v_mul_f32_e32 v31, v31, v24 0 0.00 | |
1104 v_xor_b32_e32 v0, 0x80000000, v25 0 0.00 | |
1105 v_cmp_neq_f32_e32 vcc_lo, 0, v5 0 0.00 | |
1106 s_and_b32 exec_lo, s2, vcc_lo 0 0.00 | |
1107 s_cbranch_execz _L39 0 0.00 | |
1108 BBF0_42: | |
1109 v_add_f32_e32 v24, v11, v14 0 0.00 | |
1110 v_add_f32_e32 v34, v12, v13 0 0.00 | |
1111 v_sub_f32_e32 v28, v12, v13 0 0.00 | |
1112 v_sub_f32_e32 v41, v11, v14 0 0.00 | |
1113 v_mov_b32_e32 v1, v22 0 0.00 | |
1114 v_mul_f32_e32 v38, v24, v24 0 0.00 | |
1115 v_mul_f32_e32 v24, v34, v34 0 0.00 | |
1116 v_mov_b32_e32 v2, v23 0 0.00 | |
1117 v_mov_b32_e32 v43, v15 0 0.00 | |
1118 v_mov_b32_e32 v8, v111 0 0.00 | |
1119 v_mac_f32_e32 v38, v28, v28 0 0.00 | |
1120 v_mac_f32_e32 v24, v41, v41 0 0.00 | |
1121 v_mov_b32_e32 v28, v18 0 0.00 | |
1122 v_mov_b32_e32 v45, v11 0 0.00 | |
1123 v_mov_b32_e32 v42, v110 0 0.00 | |
1124 v_sqrt_f32_e32 v34, v38 0 0.00 | |
1125 v_mov_b32_e32 v38, v37 0 0.00 | |
1126 v_mov_b32_e32 v40, v14 0 0.00 | |
1127 v_mov_b32_e32 v44, v13 0 0.00 | |
1128 v_sqrt_f32_e32 v24, v24 0 0.00 | |
1129 v_mov_b32_e32 v46, v109 0 0.00 | |
1130 v_mov_b32_e32 v47, v108 0 0.00 | |
1131 v_mov_b32_e32 v48, v39 0 0.00 | |
1132 v_mov_b32_e32 v51, v4 0 0.00 | |
1133 v_mad_f32 v41, v20, v29, v23 0 0.00 | |
1134 v_mad_f32 v36, v20, v31, v22 0 0.00 | |
1135 v_mad_f32 v52, v34, 0.5, v24 0 0.00 | |
1136 v_mov_b32_e32 v24, v19 0 0.00 | |
1137 v_mov_b32_e32 v34, v12 0 0.00 | |
1138 _L39: | |
1139 s_andn2_b32 exec_lo, s2, exec_lo 0 0.00 | |
1140 s_cbranch_execz _L40 0 0.00 | |
1141 BBF0_43: | |
1142 v_mul_f32_e32 v41, v14, v22 0 0.00 | |
1143 v_mul_f32_e32 v24, v13, v22 0 0.00 | |
1144 v_mul_f32_e32 v43, v14, v108 0 0.00 | |
1145 v_mul_f32_e32 v40, v13, v15 0 0.00 | |
1146 v_mul_f32_e32 v45, v14, v15 0 0.00 | |
1147 v_mac_f32_e32 v41, v12, v23 0 0.00 | |
1148 v_mul_f32_e32 v34, v13, v108 0 0.00 | |
1149 v_mul_f32_e32 v38, v13, v110 0 0.00 | |
1150 v_mac_f32_e32 v24, v11, v23 0 0.00 | |
1151 v_mac_f32_e32 v43, v12, v4 0 0.00 | |
1152 v_add_f32_e32 v1, v19, v41 0 0.00 | |
1153 v_mul_f32_e32 v41, v14, v110 0 0.00 | |
1154 v_mac_f32_e32 v40, v11, v111 0 0.00 | |
1155 v_mac_f32_e32 v45, v12, v111 0 0.00 | |
1156 v_mac_f32_e32 v34, v11, v4 0 0.00 | |
1157 v_mac_f32_e32 v38, v11, v109 0 0.00 | |
1158 v_add_f32_e32 v2, v18, v24 0 0.00 | |
1159 v_add_f32_e32 v47, v19, v43 0 0.00 | |
1160 v_mac_f32_e32 v41, v12, v109 0 0.00 | |
1161 v_add_f32_e32 v8, v18, v40 0 0.00 | |
1162 v_add_f32_e32 v43, v19, v45 0 0.00 | |
1163 v_add_f32_e32 v51, v18, v34 0 0.00 | |
1164 v_add_f32_e32 v46, v18, v38 0 0.00 | |
1165 v_add_f32_e32 v42, v19, v41 0 0.00 | |
1166 v_mov_b32_e32 v24, 0 0 0.00 | |
1167 v_mov_b32_e32 v28, 0 0 0.00 | |
1168 v_mov_b32_e32 v34, 0 0 0.00 | |
1169 v_mov_b32_e32 v45, 1.0 0 0.00 | |
1170 v_mov_b32_e32 v38, v43 0 0.00 | |
1171 v_mov_b32_e32 v40, 1.0 0 0.00 | |
1172 v_mov_b32_e32 v44, 0 0 0.00 | |
1173 v_mov_b32_e32 v48, v8 0 0.00 | |
1174 v_mov_b32_e32 v36, v1 0 0.00 | |
1175 v_mov_b32_e32 v41, v2 0 0.00 | |
1176 v_mov_b32_e32 v52, 1.0 0 0.00 | |
1177 _L40: | |
1178 s_mov_b32 exec_lo, s2 0 0.00 | |
1179 v_cmp_eq_f32_sdwa s2, v2, v51 src0_sel:DWORD src1_sel:DWORD 0 0.00 | |
1180 v_cmp_eq_f32_sdwa s6, v1, v47 src0_sel:DWORD src1_sel:DWORD 0 0.00 | |
1181 v_cmp_eq_f32_sdwa s3, v2, v46 src0_sel:DWORD src1_sel:DWORD 0 0.00 | |
1182 v_cmp_eq_f32_e32 vcc_lo, v1, v42 0 0.00 | |
1183 s_and_b32 s2, s2, s6 0 0.00 | |
1184 s_and_b32 vcc_lo, s3, vcc_lo 0 0.00 | |
1185 s_and_b32 s2, s2, vcc_lo 0 0.00 | |
1186 v_cmp_eq_f32_sdwa s3, v2, v8 src0_sel:DWORD src1_sel:DWORD 0 0.00 | |
1187 v_cmp_eq_f32_e32 vcc_lo, v1, v43 0 0.00 | |
1188 s_and_b32 vcc_lo, s3, vcc_lo 0 0.00 | |
1189 s_and_b32 vcc_lo, s2, vcc_lo 0 0.00 | |
1190 s_and_saveexec_b32 s2, vcc_lo 0 0.00 | |
1191 v_mov_b32_e32 v67, 0xf2fc6f7c 0 0.00 | |
1192 v_mov_b32_e32 v68, 0xf2fc6f7c 0 0.00 | |
1193 v_mov_b32_e32 v71, 0x72fc6f7c 0 0.00 | |
1194 v_mov_b32_e32 v72, 0x72fc6f7c 0 0.00 | |
1195 s_andn2_b32 exec_lo, s2, exec_lo 0 0.00 | |
1196 s_cbranch_execz _L41 0 0.00 | |
1197 BBF0_44: | |
1198 v_sub_f32_e32 v55, v51, v2 0 0.00 | |
1199 v_sub_f32_e32 v59, v47, v1 0 0.00 | |
1200 v_mul_f32_e32 v53, v55, v55 0 0.00 | |
1201 v_mac_f32_e32 v53, v59, v59 0 0.00 | |
1202 v_cmp_gt_f32_e32 vcc_lo, 0x2b8cbccc, v53 0 0.00 | |
1203 s_and_saveexec_b32 s3, vcc_lo 0 0.00 | |
1204 s_cbranch_execz _L42 0 0.00 | |
1205 BBF0_45: | |
1206 v_sub_f32_e32 v53, v46, v51 0 0.00 | |
1207 v_sub_f32_e32 v54, v42, v47 0 0.00 | |
1208 v_sub_f32_e32 v56, v8, v46 0 0.00 | |
1209 v_sub_f32_e32 v57, v43, v42 0 0.00 | |
1210 v_mul_f32_e32 v53, 0x360637b4, v53 0 0.00 | |
1211 v_mul_f32_e32 v54, 0x360637b4, v54 0 0.00 | |
1212 v_madmk_f32 v53, v55, 0x3f7fffde, v53 0 0.00 | |
1213 v_madmk_f32 v54, v59, 0x3f7fffde, v54 0 0.00 | |
1214 v_madmk_f32 v32, v56, 0x2b8cbccc, v53 0 0.00 | |
1215 v_madmk_f32 v53, v57, 0x2b8cbccc, v54 0 0.00 | |
1216 _L42: | |
1217 s_andn2_b32 exec_lo, s3, exec_lo 0 0.00 | |
1218 v_mov_b32_e32 v53, v59 0 0.00 | |
1219 v_mov_b32_e32 v32, v55 0 0.00 | |
1220 s_mov_b32 exec_lo, s3 0 0.00 | |
1221 s_waitcnt lgkmcnt(0) 0 0.00 | |
1222 s_load_dwordx4 s[8:11], s[0:1], 0x80 0 0.00 | |
1223 s_load_dwordx4 s[12:15], s[0:1], 0xa0 0 0.00 | |
1224 s_load_dwordx4 s[16:19], s[0:1], 0xc0 0 0.00 | |
1225 v_mov_b32_e32 v16, 1.0 0 0.00 | |
1226 v_mov_b32_e32 v49, 0 0 0.00 | |
1227 v_mov_b32_e32 v64, 0 0 0.00 | |
1228 v_mov_b32_e32 v70, v1 0 0.00 | |
1229 v_mov_b32_e32 v65, v2 0 0.00 | |
1230 v_mov_b32_e32 v67, 0xf2fc6f7c 0 0.00 | |
1231 v_mov_b32_e32 v68, 0xf2fc6f7c 0 0.00 | |
1232 v_mov_b32_e32 v71, 0x72fc6f7c 0 0.00 | |
1233 v_mov_b32_e32 v72, 0x72fc6f7c 0 0.00 | |
1234 v_sub_f32_e32 v56, v46, v51 0 0.00 | |
1235 v_sub_f32_e32 v57, v42, v47 0 0.00 | |
1236 v_sub_f32_e32 v58, v8, v46 0 0.00 | |
1237 v_sub_f32_e32 v60, v43, v42 0 0.00 | |
1238 v_add_nc_u32_e32 v61, 0x2000, v3 0 0.00 | |
1239 v_add_nc_u32_e32 v62, 0x1000, v3 0 0.00 | |
1240 v_ldexp_f32 v63, v52, -3 0 0.00 | |
1241 v_cmp_ge_f32_e64 s3, v5, 0 0 0.00 | |
1242 s_mov_b32 s6, exec_lo 0 0.00 | |
1243 s_mov_b32 s20, exec_lo 0 0.00 | |
1244 _L72: | |
1245 v_cvt_f32_u32_e32 v77, v49 0 0.00 | |
1246 v_mul_f32_e32 v77, v16, v77 0 0.00 | |
1247 v_cmp_eq_f32_e32 vcc_lo, 1.0, v77 0 0.00 | |
1248 s_andn1_saveexec_b32 s21, vcc_lo 0 0.00 | |
1249 s_andn2_b32 exec_lo, s21, exec_lo 0 0.00 | |
1250 s_andn2_b32 s20, s20, exec_lo 0 0.00 | |
1251 s_cbranch_scc0 _L41 0 0.00 | |
1252 BBF0_46: | |
1253 s_mov_b32 exec_lo, s21 0 0.00 | |
1254 s_and_b32 exec_lo, exec_lo, s20 0 0.00 | |
1255 s_mov_b32 s21, exec_lo 0 0.00 | |
1256 s_mov_b32 s22, exec_lo 0 0.00 | |
1257 v_mul_f32_e32 v73, v32, v32 0 0.00 | |
1258 v_mov_b32_e32 v54, v91 0 0.00 | |
1259 v_mov_b32_e32 v76, v49 0 0.00 | |
1260 v_mov_b32_e32 v49, v86 0 0.00 | |
1261 v_mac_f32_e32 v73, v53, v53 0 0.00 | |
1262 s_nop 0 0 0.00 | |
1263 s_nop 0 0 0.00 | |
1264 s_nop 0 0 0.00 | |
1265 _L53: | |
1266 v_add_f32_e32 v78, v77, v16 0 0.00 | |
1267 v_sub_f32_e32 v79, 1.0, v78 0 0.00 | |
1268 v_mul_f32_e32 v82, v78, v78 0 0.00 | |
1269 v_mul_f32_e32 v83, v78, v79 0 0.00 | |
1270 v_mul_f32_e32 v81, v79, v79 0 0.00 | |
1271 v_mul_f32_e32 v80, 0x40400000, v83 0 0.00 | |
1272 v_mul_f32_e64 v84, v83, v56 mul:2 0 0.00 | |
1273 v_mul_f32_e32 v86, 0x40400000, v81 0 0.00 | |
1274 v_mul_f32_e64 v90, v83, v57 mul:2 0 0.00 | |
1275 v_mul_f32_e32 v79, v79, v81 0 0.00 | |
1276 v_mul_f32_e32 v85, v46, v80 0 0.00 | |
1277 v_mac_f32_e32 v84, v55, v81 0 0.00 | |
1278 v_mul_f32_e32 v80, v42, v80 0 0.00 | |
1279 v_mac_f32_e32 v90, v59, v81 0 0.00 | |
1280 v_mac_f32_e32 v85, v51, v86 0 0.00 | |
1281 v_mac_f32_e32 v84, v58, v82 0 0.00 | |
1282 v_mac_f32_e32 v80, v47, v86 0 0.00 | |
1283 v_mac_f32_e32 v90, v60, v82 0 0.00 | |
1284 v_mac_f32_e32 v85, v8, v82 0 0.00 | |
1285 v_mul_f32_e32 v86, v84, v84 0 0.00 | |
1286 v_mac_f32_e32 v80, v43, v82 0 0.00 | |
1287 v_mul_f32_e32 v88, v78, v85 0 0.00 | |
1288 v_mac_f32_e32 v86, v90, v90 0 0.00 | |
1289 v_mul_f32_e32 v81, v78, v80 0 0.00 | |
1290 v_mad_f32 v66, v2, v79, v88 0 0.00 | |
1291 v_cmp_gt_f32_e32 vcc_lo, 0x2b8cbccc, v86 0 0.00 | |
1292 v_mad_f32 v79, v1, v79, v81 0 0.00 | |
1293 s_and_saveexec_b32 s23, vcc_lo 0 0.00 | |
1294 s_cbranch_execz _L43 0 0.00 | |
1295 BBF0_47: | |
1296 v_add_f32_e32 v86, 0xb58637bd, v78 0 0.00 | |
1297 v_cmp_gt_f32_e32 vcc_lo, 1.0, v78 0 0.00 | |
1298 v_sub_f32_e32 v80, 1.0, v86 0 0.00 | |
1299 v_mul_f32_e32 v89, v86, v86 0 0.00 | |
1300 v_mul_f32_e32 v83, v86, v80 0 0.00 | |
1301 v_mul_f32_e32 v85, v80, v80 0 0.00 | |
1302 v_mul_f32_e64 v84, v83, v56 mul:2 0 0.00 | |
1303 v_mul_f32_e64 v82, v83, v57 mul:2 0 0.00 | |
1304 v_mac_f32_e32 v84, v55, v85 0 0.00 | |
1305 v_mac_f32_e32 v82, v59, v85 0 0.00 | |
1306 v_mac_f32_e32 v84, v58, v89 0 0.00 | |
1307 v_mac_f32_e32 v82, v60, v89 0 0.00 | |
1308 s_and_saveexec_b32 s24, vcc_lo 0 0.00 | |
1309 s_cbranch_execz _L44 0 0.00 | |
1310 BBF0_48: | |
1311 v_mul_f32_e32 v83, 0x40400000, v83 0 0.00 | |
1312 v_mul_f32_e32 v88, 0x40400000, v85 0 0.00 | |
1313 v_mul_f32_e32 v78, v46, v83 0 0.00 | |
1314 v_mul_f32_e32 v81, v42, v83 0 0.00 | |
1315 v_mac_f32_e32 v78, v51, v88 0 0.00 | |
1316 v_mac_f32_e32 v81, v47, v88 0 0.00 | |
1317 v_mac_f32_e32 v78, v8, v89 0 0.00 | |
1318 v_mac_f32_e32 v81, v43, v89 0 0.00 | |
1319 v_mul_f32_e32 v83, v80, v85 0 0.00 | |
1320 v_mul_f32_e32 v78, v86, v78 0 0.00 | |
1321 v_mul_f32_e32 v81, v86, v81 0 0.00 | |
1322 v_mad_f32 v66, v2, v83, v78 0 0.00 | |
1323 v_mad_f32 v79, v1, v83, v81 0 0.00 | |
1324 _L44: | |
1325 s_andn2_b32 exec_lo, s24, exec_lo 0 0.00 | |
1326 v_mov_b32_e32 v86, v78 0 0.00 | |
1327 s_mov_b32 exec_lo, s24 0 0.00 | |
1328 v_mov_b32_e32 v78, v66 0 0.00 | |
1329 v_mov_b32_e32 v80, v82 0 0.00 | |
1330 v_mov_b32_e32 v82, v84 0 0.00 | |
1331 _L43: | |
1332 s_andn2_b32 exec_lo, s23, exec_lo 0 0.00 | |
1333 v_mov_b32_e32 v80, v90 0 0.00 | |
1334 v_mov_b32_e32 v82, v84 0 0.00 | |
1335 v_mov_b32_e32 v86, v78 0 0.00 | |
1336 v_mov_b32_e32 v78, v66 0 0.00 | |
1337 s_mov_b32 exec_lo, s23 0 0.00 | |
1338 s_ff1_i32_b32 s24, exec_lo 0 0.00 | |
1339 s_mov_b32 s23, exec_lo 0 0.00 | |
1340 s_lshl_b32 s25, 1, s24 0 0.00 | |
1341 s_and_b32 s25, s25, exec_lo 0 0.00 | |
1342 s_and_saveexec_b32 s25, s25 0 0.00 | |
1343 s_cbranch_execz _L45 0 0.00 | |
1344 BBF0_49: | |
1345 s_bcnt1_i32_b32 s26, s23 0 0.00 | |
1346 v_mov_b32_e32 v81, s26 0 0.00 | |
1347 s_waitcnt lgkmcnt(0) 0 0.00 | |
1348 s_waitcnt_depctr 0xffe3 0 0.00 | |
1349 buffer_atomic_add v81, off, s[8:11], 0 offset:32 glc 0 0.00 | |
1350 _L45: | |
1351 s_waitcnt_depctr 0xffe3 0 0.00 | |
1352 s_mov_b32 exec_lo, s25 0 0.00 | |
1353 s_waitcnt vmcnt(0) 0 0.00 | |
1354 v_readlane_b32 s24, v81, s24 0 0.00 | |
1355 v_mbcnt_lo_u32_b32 v81, s23, 0 0 0.00 | |
1356 v_mov_b32_e32 v90, v3 0 0.00 | |
1357 v_mov_b32_e32 v91, v77 0 0.00 | |
1358 v_mov_b32_e32 v92, v86 0 0.00 | |
1359 v_sub_f32_e32 v84, v78, v65 0 0.00 | |
1360 v_sub_f32_e32 v85, v79, v70 0 0.00 | |
1361 v_sub_f32_e32 v87, v86, v64 0 0.00 | |
1362 v_mul_f32_e32 v89, v82, v82 0 0.00 | |
1363 v_mul_f32_e32 v83, v84, v84 0 0.00 | |
1364 v_mul_f32_e32 v88, v87, v87 0 0.00 | |
1365 v_mac_f32_e32 v89, v80, v80 0 0.00 | |
1366 v_mac_f32_e32 v83, v85, v85 0 0.00 | |
1367 v_add_nc_i32 v81, s24, v81 0 0.00 | |
1368 v_mul_lo_u32 v81, v81, 12 0 0.00 | |
1369 s_waitcnt lgkmcnt(0) 0 0.00 | |
1370 s_waitcnt_depctr 0xffe3 0 0.00 | |
1371 buffer_store_dwordx3 v[90:92], v81, s[16:19], 0 offen glc 0 0.00 | |
1372 v_sqrt_f32_e32 v81, v83 0 0.00 | |
1373 v_mul_f32_e32 v90, v73, v88 0 0.00 | |
1374 v_mul_f32_e32 v88, v88, v89 0 0.00 | |
1375 v_cmp_lt_f32_e64 s23, v90, 0x2b8cbccc 0 0.00 | |
1376 v_cmp_lt_f32_e64 s24, v88, 0x2b8cbccc 0 0.00 | |
1377 v_cmp_ge_f32_e64 s25, v81, 0x358637bd 0 0.00 | |
1378 s_and_b32 vcc_lo, s23, s24 0 0.00 | |
1379 s_or_b32 s23, s25, vcc_lo 0 0.00 | |
1380 v_cndmask_b32_e64 v66, 0, -1, vcc_lo 0 0.00 | |
1381 s_and_saveexec_b32 s24, s23 0 0.00 | |
1382 s_cbranch_execz _L46 0 0.00 | |
1383 BBF0_50: | |
1384 v_cmp_lt_f32_e64 s23, v83, 0x358637bd 0 0.00 | |
1385 s_andn1_saveexec_b32 s25, s23 0 0.00 | |
1386 s_cbranch_execz _L47 0 0.00 | |
1387 BBF0_51: | |
1388 v_mul_f32_e32 v92, v32, v84 0 0.00 | |
1389 v_mul_f32_e32 v49, v32, v85 0 0.00 | |
1390 v_max_f32_e32 v74, 0x358637bd, v83 0 0.00 | |
1391 v_mac_f32_e32 v92, v53, v85 0 0.00 | |
1392 v_mad_f32 v49, v53, v84, -v49 0 0.00 | |
1393 v_rcp_f32_e32 v74, v74 0 0.00 | |
1394 v_mul_f32_e32 v50, v92, v92 0 0.00 | |
1395 v_mac_f32_e32 v50, v49, v49 0 0.00 | |
1396 v_sqrt_f32_e32 v83, v50 0 0.00 | |
1397 v_mul_f32_e32 v50, v87, v74 0 0.00 | |
1398 v_cmp_nlt_f32_e32 vcc_lo, 0x358637bd, v83 0 0.00 | |
1399 s_and_saveexec_b32 s26, vcc_lo 0 0.00 | |
1400 v_mov_b32_e32 v54, 0x3eaaaaab 0 0.00 | |
1401 v_mov_b32_e32 v74, 0 0 0.00 | |
1402 s_andn2_b32 exec_lo, s26, exec_lo 0 0.00 | |
1403 s_cbranch_execz _L48 0 0.00 | |
1404 BBF0_52: | |
1405 v_max_f32_e64 v74, |v92|, |v49| 0 0.00 | |
1406 v_min_f32_e64 v88, |v92|, |v49| 0 0.00 | |
1407 s_mov_b32 s27, 0x3caaae5f 0 0.00 | |
1408 v_min_f32_e32 v90, v92, v49 0 0.00 | |
1409 v_cmp_gt_f32_e64 vcc_lo, |v49|, |v92| 0 0.00 | |
1410 v_rcp_f32_e32 v74, v74 0 0.00 | |
1411 v_max_f32_e32 v49, v92, v49 0 0.00 | |
1412 v_mul_f32_e32 v54, v50, v83 0 0.00 | |
1413 v_mul_f32_e32 v89, v88, v74 0 0.00 | |
1414 v_mul_f32_e32 v74, v89, v89 0 0.00 | |
1415 v_madak_f32 v87, s27, v74, 0xbdae5a36 0 0.00 | |
1416 v_cmp_gt_f32_e64 s27, -v90, v90 0 0.00 | |
1417 v_madak_f32 v87, v74, v87, 0x3e3876e2 0 0.00 | |
1418 v_madak_f32 v87, v74, v87, 0xbea91d04 0 0.00 | |
1419 v_madak_f32 v88, v74, v87, 0x3f7ff738 0 0.00 | |
1420 v_mul_f32_e32 v74, v89, v88 0 0.00 | |
1421 v_madak_f32 v74, -2.0, v74, 0x3fc90fdb 0 0.00 | |
1422 v_cndmask_b32_e32 v74, 0, v74, vcc_lo 0 0.00 | |
1423 v_cmp_gt_f32_e64 vcc_lo, -v92, v92 0 0.00 | |
1424 v_cndmask_b32_e64 v87, 0, 0xc0490fdb, vcc_lo 0 0.00 | |
1425 v_cmp_ge_f32_e64 vcc_lo, v49, -v49 0 0.00 | |
1426 v_mac_f32_e32 v74, v89, v88 0 0.00 | |
1427 v_add_f32_e32 v49, v74, v87 0 0.00 | |
1428 s_and_b32 vcc_lo, s27, vcc_lo 0 0.00 | |
1429 v_cndmask_b32_e64 v74, 0, 0x80000000, vcc_lo 0 0.00 | |
1430 v_cndmask_b32_e64 v66, 0, -1, vcc_lo 0 0.00 | |
1431 v_xor_b32_e32 v74, v49, v74 0 0.00 | |
1432 _L48: | |
1433 s_mov_b32 exec_lo, s26 0 0.00 | |
1434 v_mul_f32_e32 v90, v82, v84 0 0.00 | |
1435 v_mul_f32_e32 v87, v80, v84 0 0.00 | |
1436 v_mac_f32_e32 v90, v80, v85 0 0.00 | |
1437 v_mad_f32 v87, v82, v85, -v87 0 0.00 | |
1438 v_mul_f32_e32 v88, v90, v90 0 0.00 | |
1439 v_mac_f32_e32 v88, v87, v87 0 0.00 | |
1440 v_sqrt_f32_e32 v88, v88 0 0.00 | |
1441 v_cmp_nlt_f32_e32 vcc_lo, 0x358637bd, v88 0 0.00 | |
1442 s_and_b32 exec_lo, s26, vcc_lo 0 0.00 | |
1443 v_mov_b32_e32 v49, 0x3eaaaaab 0 0.00 | |
1444 v_mov_b32_e32 v69, 0 0 0.00 | |
1445 s_andn2_b32 exec_lo, s26, exec_lo 0 0.00 | |
1446 s_cbranch_execz _L49 0 0.00 | |
1447 BBF0_53: | |
1448 v_max_f32_e64 v83, |v90|, |v87| 0 0.00 | |
1449 v_min_f32_e64 v92, |v90|, |v87| 0 0.00 | |
1450 s_mov_b32 s27, 0x3caaae5f 0 0.00 | |
1451 v_cmp_gt_f32_e64 vcc_lo, |v87|, |v90| 0 0.00 | |
1452 v_max_f32_e32 v93, v90, v87 0 0.00 | |
1453 v_rcp_f32_e32 v83, v83 0 0.00 | |
1454 v_mul_f32_e32 v49, v50, v88 0 0.00 | |
1455 v_mul_f32_e32 v83, v92, v83 0 0.00 | |
1456 v_mul_f32_e32 v89, v83, v83 0 0.00 | |
1457 v_madak_f32 v91, s27, v89, 0xbdae5a36 0 0.00 | |
1458 v_madak_f32 v91, v89, v91, 0x3e3876e2 0 0.00 | |
1459 v_madak_f32 v91, v89, v91, 0xbea91d04 0 0.00 | |
1460 v_madak_f32 v92, v89, v91, 0x3f7ff738 0 0.00 | |
1461 v_mul_f32_e32 v89, v83, v92 0 0.00 | |
1462 v_madak_f32 v89, -2.0, v89, 0x3fc90fdb 0 0.00 | |
1463 v_cndmask_b32_e32 v94, 0, v89, vcc_lo 0 0.00 | |
1464 v_min_f32_e32 v89, v90, v87 0 0.00 | |
1465 v_cmp_gt_f32_e64 vcc_lo, -v90, v90 0 0.00 | |
1466 v_cndmask_b32_e64 v87, 0, 0xc0490fdb, vcc_lo 0 0.00 | |
1467 v_cmp_ge_f32_e64 vcc_lo, v93, -v93 0 0.00 | |
1468 v_mac_f32_e32 v94, v83, v92 0 0.00 | |
1469 v_cmp_gt_f32_e64 s27, -v89, v89 0 0.00 | |
1470 v_add_f32_e32 v89, v94, v87 0 0.00 | |
1471 s_and_b32 vcc_lo, s27, vcc_lo 0 0.00 | |
1472 v_cndmask_b32_e64 v83, 0, 0x80000000, vcc_lo 0 0.00 | |
1473 v_cndmask_b32_e64 v66, 0, -1, vcc_lo 0 0.00 | |
1474 v_xor_b32_e32 v69, v89, v83 0 0.00 | |
1475 _L49: | |
1476 s_mov_b32 exec_lo, s26 0 0.00 | |
1477 v_mov_b32_e32 v50, v74 0 0.00 | |
1478 _L47: | |
1479 s_andn2_b32 exec_lo, s25, exec_lo 0 0.00 | |
1480 v_cndmask_b32_e64 v50, v89, 0, s23 0 0.00 | |
1481 v_cndmask_b32_e64 v69, v69, 0, s23 0 0.00 | |
1482 v_cndmask_b32_e64 v54, v54, 0x3eaaaaab, s23 0 0.00 | |
1483 v_cndmask_b32_e64 v49, v49, 0x3eaaaaab, s23 0 0.00 | |
1484 s_mov_b32 exec_lo, s25 0 0.00 | |
1485 v_mul_f32_e32 v87, 0.15915494, v50 0 0.00 | |
1486 v_mul_f32_e32 v89, 0.15915494, v69 0 0.00 | |
1487 v_cos_f32_e32 v92, v87 0 0.00 | |
1488 v_cos_f32_e32 v87, v89 0 0.00 | |
1489 v_mul_f32_e32 v89, v92, v87 0 0.00 | |
1490 v_cmp_lt_f32_e64 s23, v89, 0 0 0.00 | |
1491 s_andn2_b32 exec_lo, s25, s23 0 0.00 | |
1492 s_cbranch_execz _L50 0 0.00 | |
1493 BBF0_54: | |
1494 v_add_f32_e32 v74, 1.0, v92 0 0.00 | |
1495 v_add_f32_e32 v89, 1.0, v87 0 0.00 | |
1496 v_mul_f32_e32 v90, 0.15915494, v50 0 0.00 | |
1497 v_mul_f32_e32 v91, 0.15915494, v69 0 0.00 | |
1498 v_mul_f32_e32 v97, v49, v54 0 0.00 | |
1499 v_max_f32_e32 v74, 0x3089705f, v74 0 0.00 | |
1500 v_max_f32_e32 v89, 0x3089705f, v89 0 0.00 | |
1501 v_sin_f32_e32 v90, v90 0 0.00 | |
1502 v_add_f32_e32 v101, v69, v50 0 0.00 | |
1503 v_rcp_f32_e32 v74, v74 0 0.00 | |
1504 v_rcp_f32_e32 v96, v89 0 0.00 | |
1505 v_mul_f32_e64 v88, v90, v54 mul:2 0 0.00 | |
1506 v_mul_f32_e32 v87, v87, v90 0 0.00 | |
1507 v_sin_f32_e32 v94, v91 0 0.00 | |
1508 v_mul_f32_e32 v93, 0x3f2aaaab, v74 0 0.00 | |
1509 v_madmk_f32 v74, v74, 0xbf2aaaab, v54 0 0.00 | |
1510 v_madmk_f32 v89, v96, 0xbf2aaaab, v49 0 0.00 | |
1511 v_mul_f32_e32 v91, v93, v90 0 0.00 | |
1512 v_mul_f32_e32 v74, v74, v74 0 0.00 | |
1513 v_mul_f32_e32 v95, 0x3f2aaaab, v96 0 0.00 | |
1514 v_mac_f32_e32 v87, v92, v94 0 0.00 | |
1515 v_mac_f32_e32 v88, v94, v49 0 0.00 | |
1516 v_mac_f32_e32 v91, v90, v93 0 0.00 | |
1517 v_mac_f32_e32 v74, v89, v89 0 0.00 | |
1518 v_mul_f32_e32 v90, v101, v101 0 0.00 | |
1519 v_mac_f32_e32 v88, v94, v49 0 0.00 | |
1520 v_mac_f32_e32 v91, v94, v95 0 0.00 | |
1521 v_sqrt_f32_e32 v92, v74 0 0.00 | |
1522 v_mul_f32_e32 v74, v93, v95 0 0.00 | |
1523 v_mul_f32_e64 v89, |v101|, v90 0 0.00 | |
1524 v_mad_f32 v88, -v97, v87, v88 0 0.00 | |
1525 v_mac_f32_e32 v91, v94, v95 0 0.00 | |
1526 v_mad_f32 v91, -v74, v87, v91 0 0.00 | |
1527 v_sub_f32_e32 v74, v50, v69 0 0.00 | |
1528 v_mul_f32_e32 v87, 0x3e19999a, v91 0 0.00 | |
1529 v_mul_f32_e64 v94, |v74|, 0x3d8f5c29 0 0.00 | |
1530 v_mul_f32_e64 v74, |v74|, 0x3bf5c28f 0 0.00 | |
1531 v_mad_f32 v87, v88, 0x3e19999a, -v87 0 0.00 | |
1532 v_mad_f32 v94, |v101|, 0x3ba3d70a, v94 0 0.00 | |
1533 v_madmk_f32 v54, v89, 0x369b3073, v74 0 0.00 | |
1534 v_mul_f32_e64 v89, |v87|, 0x3fc66666 0 0.00 | |
1535 v_mac_f32_e32 v89, v54, v90 0 0.00 | |
1536 v_mad_f32 v87, v94, v92, v89 0 0.00 | |
1537 _L50: | |
1538 s_andn2_b32 exec_lo, s25, exec_lo 0 0.00 | |
1539 v_cndmask_b32_e64 v87, v66, 2.0, s23 0 0.00 | |
1540 s_mov_b32 exec_lo, s25 0 0.00 | |
1541 v_mul_f32_e32 v87, v81, v87 0 0.00 | |
1542 v_mul_f32_e32 v87, v52, v87 0 0.00 | |
1543 v_cmp_le_f32_e64 s23, v87, 0x3e800000 0 0.00 | |
1544 v_cmp_ge_f32_e32 vcc_lo, 0x37800000, v16 0 0.00 | |
1545 s_or_b32 vcc_lo, s23, vcc_lo 0 0.00 | |
1546 s_andn1_saveexec_b32 s25, vcc_lo 0 0.00 | |
1547 s_andn2_b32 exec_lo, s25, exec_lo 0 0.00 | |
1548 s_andn2_b32 s22, s22, exec_lo 0 0.00 | |
1549 s_cbranch_scc0 _L51 0 0.00 | |
1550 BBF0_55: | |
1551 s_and_b32 exec_lo, s25, s22 0 0.00 | |
1552 _L46: | |
1553 s_andn2_b32 exec_lo, s24, exec_lo 0 0.00 | |
1554 s_and_b32 exec_lo, s24, s22 0 0.00 | |
1555 s_ff1_i32_b32 s24, exec_lo 0 0.00 | |
1556 s_mov_b32 s23, exec_lo 0 0.00 | |
1557 s_lshl_b32 s25, 1, s24 0 0.00 | |
1558 v_lshlrev_b32_e32 v76, 1, v76 0 0.00 | |
1559 s_and_b32 s25, s25, exec_lo 0 0.00 | |
1560 v_ldexp_f32 v16, v16, -1 0 0.00 | |
1561 s_and_saveexec_b32 s25, s25 0 0.00 | |
1562 s_cbranch_execz _L52 0 0.00 | |
1563 BBF0_56: | |
1564 s_bcnt1_i32_b32 s26, s23 0 0.00 | |
1565 v_mov_b32_e32 v78, s26 0 0.00 | |
1566 s_waitcnt_depctr 0xffe3 0 0.00 | |
1567 buffer_atomic_add v78, off, s[8:11], 0 offset:32 glc 0 0.00 | |
1568 _L52: | |
1569 s_waitcnt_depctr 0xffe3 0 0.00 | |
1570 s_mov_b32 exec_lo, s25 0 0.00 | |
1571 v_mbcnt_lo_u32_b32 v79, s23, 0 0 0.00 | |
1572 s_waitcnt vmcnt(0) 0 0.00 | |
1573 v_readlane_b32 s23, v78, s24 0 0.00 | |
1574 v_cvt_f32_u32_e32 v80, v76 0 0.00 | |
1575 v_mov_b32_e32 v81, v16 0 0.00 | |
1576 v_add_nc_i32 v78, s23, v79 0 0.00 | |
1577 v_mov_b32_e32 v79, v61 0 0.00 | |
1578 v_mul_lo_u32 v78, v78, 12 0 0.00 | |
1579 s_waitcnt_depctr 0xffe3 0 0.00 | |
1580 buffer_store_dwordx3 v[79:81], v78, s[16:19], 0 offen glc 0 0.00 | |
1581 s_branch _L53 0 0.00 | |
1582 _L51: | |
1583 s_mov_b32 exec_lo, s21 0 0.00 | |
1584 v_add_nc_u32_e32 v49, 1, v76 0 0.00 | |
1585 s_mov_b32 s21, exec_lo 0 0.00 | |
1586 s_ff1_i32_b32 s22, exec_lo 0 0.00 | |
1587 v_ffbl_b32_e32 v64, v49 0 0.00 | |
1588 s_lshl_b32 s23, 1, s22 0 0.00 | |
1589 s_and_b32 s23, s23, exec_lo 0 0.00 | |
1590 v_min_u32_e32 v64, 32, v64 0 0.00 | |
1591 v_lshlrev_b32_e64 v66, v64, 1 0 0.00 | |
1592 v_cvt_f32_u32_e32 v66, v66 0 0.00 | |
1593 v_lshrrev_b32_e32 v49, v64, v49 0 0.00 | |
1594 v_mul_f32_e32 v16, v16, v66 0 0.00 | |
1595 s_and_saveexec_b32 s23, s23 0 0.00 | |
1596 s_cbranch_execz _L54 0 0.00 | |
1597 BBF0_57: | |
1598 s_bcnt1_i32_b32 s24, s21 0 0.00 | |
1599 v_mov_b32_e32 v64, s24 0 0.00 | |
1600 s_waitcnt_depctr 0xffe3 0 0.00 | |
1601 buffer_atomic_add v64, off, s[8:11], 0 offset:32 glc 0 0.00 | |
1602 _L54: | |
1603 s_waitcnt_depctr 0xffe3 0 0.00 | |
1604 s_mov_b32 exec_lo, s23 0 0.00 | |
1605 s_waitcnt vmcnt(0) 0 0.00 | |
1606 v_readlane_b32 s22, v64, s22 0 0.00 | |
1607 v_sub_f32_e32 v73, v69, v50 0 0.00 | |
1608 v_add_f32_e32 v75, v50, v69 0 0.00 | |
1609 v_mbcnt_lo_u32_b32 v64, s21, 0 0 0.00 | |
1610 v_cvt_f32_u32_e32 v92, v49 0 0.00 | |
1611 v_mov_b32_e32 v94, v16 0 0.00 | |
1612 v_mul_f32_e32 v77, v73, v73 0 0.00 | |
1613 v_mul_f32_e32 v76, v75, v75 0 0.00 | |
1614 v_mov_b32_e32 v93, v92 0 0.00 | |
1615 v_mov_b32_e32 v92, v62 0 0.00 | |
1616 v_mad_f32 v89, v77, 0xbccccccd, 1.0 0 0.00 | |
1617 s_mov_b32 s26, 0xb84c68e7 0 0.00 | |
1618 s_mov_b32 s21, 0xbc6a0ea1 0 0.00 | |
1619 s_mov_b32 s23, 0x3979a934 0 0.00 | |
1620 s_mov_b32 s24, 0x388fa325 0 0.00 | |
1621 s_mov_b32 s25, 0x3b21e3b8 0 0.00 | |
1622 v_madak_f32 v66, s26, v77, 0x3a088889 0 0.00 | |
1623 v_add_nc_i32 v64, s22, v64 0 0.00 | |
1624 v_madak_f32 v83, s21, v77, 0x40c00000 0 0.00 | |
1625 v_madak_f32 v87, s25, v77, 0xbd2aaaab 0 0.00 | |
1626 v_madak_f32 v74, s24, v77, 0xba3b3ee7 0 0.00 | |
1627 v_madak_f32 v88, s23, v77, 0xbdcccccd 0 0.00 | |
1628 v_madmk_f32 v91, v76, 0xb6500cec, v66 0 0.00 | |
1629 v_mul_f32_e32 v66, v77, v77 0 0.00 | |
1630 v_mul_lo_u32 v64, v64, 12 0 0.00 | |
1631 v_madmk_f32 v74, v76, 0xb70526e7, v74 0 0.00 | |
1632 v_madmk_f32 v83, v66, 0xb8c28a7f, v83 0 0.00 | |
1633 v_madmk_f32 v89, v66, 0x39b3719e, v89 0 0.00 | |
1634 v_mul_f32_e32 v90, v77, v66 0 0.00 | |
1635 v_madmk_f32 v77, v66, 0x378e44a1, v88 0 0.00 | |
1636 v_madmk_f32 v66, v66, 0xb81c6fca, v87 0 0.00 | |
1637 v_madmk_f32 v83, v90, 0x3494ab4c, v83 0 0.00 | |
1638 v_mac_f32_e32 v77, v74, v76 0 0.00 | |
1639 v_mac_f32_e32 v66, v91, v76 0 0.00 | |
1640 v_madmk_f32 v87, v90, 0xb601da25, v89 0 0.00 | |
1641 v_mac_f32_e32 v83, v77, v76 0 0.00 | |
1642 v_mac_f32_e32 v87, v66, v76 0 0.00 | |
1643 v_rcp_f32_e32 v66, v81 0 0.00 | |
1644 s_waitcnt_depctr 0xffe3 0 0.00 | |
1645 buffer_store_dwordx3 v[92:94], v64, s[16:19], 0 offen glc 0 0.00 | |
1646 v_mul_f32_e32 v64, v73, v83 0 0.00 | |
1647 v_mul_f32_e64 v73, v5, v87 div:2 0 0.00 | |
1648 v_cmp_gt_f32_e64 vcc_lo, 0x3a83126f, |v64| 0 0.00 | |
1649 v_mul_f32_e32 v66, v73, v66 0 0.00 | |
1650 s_andn1_saveexec_b32 s21, vcc_lo 0 0.00 | |
1651 s_cbranch_execz _L55 0 0.00 | |
1652 BBF0_58: | |
1653 v_mad_f32 v83, v64, -0.5, v75 0 0.00 | |
1654 v_cmp_gt_f32_e64 s22, 0x3a83126f, |v66| 0 0.00 | |
1655 s_andn1_saveexec_b32 s23, s22 0 0.00 | |
1656 s_cbranch_execz _L56 0 0.00 | |
1657 BBF0_59: | |
1658 v_mad_f32 v73, -v66, v83, -1.0 0 0.00 | |
1659 v_mul_f32_e32 v74, v64, v66 0 0.00 | |
1660 v_mad_f32 v73, -v83, v66, v73 0 0.00 | |
1661 v_ldexp_f32 v32, -v74, 1 0 0.00 | |
1662 v_cmp_gt_f32_e64 vcc_lo, 0x3f4ccccd, |v73| 0 0.00 | |
1663 s_andn1_saveexec_b32 s24, vcc_lo 0 0.00 | |
1664 s_cbranch_execz _L57 0 0.00 | |
1665 BBF0_60: | |
1666 v_add_f32_e64 v77, |v73|, -1.0 0 0.00 | |
1667 v_mov_b32_e32 v88, 0xbf4f5c29 0 0.00 | |
1668 v_cmp_gt_f32_e64 vcc_lo, 0x40066666, |v73| 0 0.00 | |
1669 v_cndmask_b32_e64 v91, 0.5, 0x3f23fe5d, vcc_lo 0 0.00 | |
1670 v_mov_b32_e32 v89, 0x3f6a311b 0 0.00 | |
1671 v_sqrt_f32_e64 v90, |v77| 0 0.00 | |
1672 v_cndmask_b32_e32 v88, 0xbe1fbe77, v88, vcc_lo 0 0.00 | |
1673 s_mov_b32 s25, 0x3f715bef 0 0.00 | |
1674 v_cndmask_b32_e32 v94, 0x3e255531, v89, vcc_lo 0 0.00 | |
1675 v_cmp_gt_f32_e64 vcc_lo, 0x3fa00000, |v73| 0 0.00 | |
1676 v_mad_f32 v88, v91, |v73|, v88 0 0.00 | |
1677 v_mul_f32_e32 v77, v77, v90 0 0.00 | |
1678 v_mad_f32 v94, v88, |v73|, v94 0 0.00 | |
1679 v_madak_f32 v77, s25, v77, 0x3f490fdb 0 0.00 | |
1680 v_cndmask_b32_e32 v77, v94, v77, vcc_lo 0 0.00 | |
1681 _L57: | |
1682 s_andn2_b32 exec_lo, s24, exec_lo 0 0.00 | |
1683 v_mul_f32_e64 v77, |v73|, 0x3e32e5ab 0 0.00 | |
1684 v_sin_f32_e32 v77, v77 0 0.00 | |
1685 v_mul_f32_e32 v77, 0x3f693710, v77 0 0.00 | |
1686 s_mov_b32 exec_lo, s24 0 0.00 | |
1687 v_cmp_gt_f32_e32 vcc_lo, 0, v73 0 0.00 | |
1688 v_cndmask_b32_e64 v88, 0, -1, vcc_lo 0 0.00 | |
1689 v_cmp_lt_f32_e32 vcc_lo, 0, v73 0 0.00 | |
1690 v_mad_f32 v91, v74, -2.0, v73 0 0.00 | |
1691 v_add_co_ci_u32_e64 v88, vcc_lo, v88, 0, vcc_lo 0 0.00 | |
1692 v_cmp_gt_f32_e64 vcc_lo, 0x3f4ccccd, |v91| 0 0.00 | |
1693 v_cvt_f32_i32_e32 v88, v88 0 0.00 | |
1694 v_mul_f32_e32 v53, v77, v88 0 0.00 | |
1695 s_andn2_b32 exec_lo, s24, vcc_lo 0 0.00 | |
1696 s_cbranch_execz _L58 0 0.00 | |
1697 BBF0_61: | |
1698 v_add_f32_e64 v88, |v91|, -1.0 0 0.00 | |
1699 v_mov_b32_e32 v74, 0xbf4f5c29 0 0.00 | |
1700 v_cmp_gt_f32_e64 vcc_lo, 0x40066666, |v91| 0 0.00 | |
1701 v_cndmask_b32_e64 v90, 0.5, 0x3f23fe5d, vcc_lo 0 0.00 | |
1702 s_mov_b32 s25, 0x3f715bef 0 0.00 | |
1703 v_sqrt_f32_e64 v89, |v88| 0 0.00 | |
1704 v_cndmask_b32_e32 v93, 0xbe1fbe77, v74, vcc_lo 0 0.00 | |
1705 v_mov_b32_e32 v74, 0x3f6a311b 0 0.00 | |
1706 v_mad_f32 v93, v90, |v91|, v93 0 0.00 | |
1707 v_cndmask_b32_e32 v74, 0x3e255531, v74, vcc_lo 0 0.00 | |
1708 v_cmp_gt_f32_e64 vcc_lo, 0x3fa00000, |v91| 0 0.00 | |
1709 v_mul_f32_e32 v88, v88, v89 0 0.00 | |
1710 v_mad_f32 v74, v93, |v91|, v74 0 0.00 | |
1711 v_madak_f32 v88, s25, v88, 0x3f490fdb 0 0.00 | |
1712 v_cndmask_b32_e32 v88, v74, v88, vcc_lo 0 0.00 | |
1713 _L58: | |
1714 s_andn2_b32 exec_lo, s24, exec_lo 0 0.00 | |
1715 v_mul_f32_e64 v74, |v91|, 0x3e32e5ab 0 0.00 | |
1716 v_sin_f32_e32 v74, v74 0 0.00 | |
1717 v_mul_f32_e32 v88, 0x3f693710, v74 0 0.00 | |
1718 s_mov_b32 exec_lo, s24 0 0.00 | |
1719 v_rcp_f32_e32 v89, v32 0 0.00 | |
1720 v_mul_f32_e32 v74, v64, v73 0 0.00 | |
1721 v_mad_f32 v83, -v74, v89, v83 0 0.00 | |
1722 v_cmp_gt_f32_e32 vcc_lo, 0, v91 0 0.00 | |
1723 v_cndmask_b32_e64 v74, 0, -1, vcc_lo 0 0.00 | |
1724 v_cmp_lt_f32_e32 vcc_lo, 0, v91 0 0.00 | |
1725 v_mad_f32 v66, v83, v66, 1.0 0 0.00 | |
1726 v_add_co_ci_u32_e64 v74, vcc_lo, v74, 0, vcc_lo 0 0.00 | |
1727 v_mul_f32_e32 v66, v83, v66 0 0.00 | |
1728 v_cvt_f32_i32_e32 v74, v74 0 0.00 | |
1729 v_sqrt_f32_e64 v66, |v66| 0 0.00 | |
1730 v_mad_f32 v88, v88, v74, -v53 0 0.00 | |
1731 v_mul_f32_e32 v66, v88, v66 0 0.00 | |
1732 v_mul_f32_e32 v54, v66, v89 0 0.00 | |
1733 _L56: | |
1734 s_andn2_b32 exec_lo, s23, exec_lo 0 0.00 | |
1735 s_cbranch_execz _L59 0 0.00 | |
1736 BBF0_62: | |
1737 v_sqrt_f32_e64 v66, |v83| 0 0.00 | |
1738 v_add_f32_e32 v73, v64, v83 0 0.00 | |
1739 v_mov_b32_e32 v32, v64 0 0.00 | |
1740 v_sqrt_f32_e64 v74, |v73| 0 0.00 | |
1741 v_mul_f32_e32 v53, v83, v66 0 0.00 | |
1742 v_mad_f32 v88, v73, v74, -v53 0 0.00 | |
1743 v_rcp_f32_e32 v73, v64 0 0.00 | |
1744 v_mul_f32_e32 v74, 0x3f2aaaab, v88 0 0.00 | |
1745 v_mul_f32_e32 v54, v74, v73 0 0.00 | |
1746 v_mov_b32_e32 v73, v83 0 0.00 | |
1747 _L59: | |
1748 s_mov_b32 exec_lo, s23 0 0.00 | |
1749 v_mov_b32_e32 v74, v73 0 0.00 | |
1750 v_mov_b32_e32 v83, v53 0 0.00 | |
1751 v_cndmask_b32_e64 v76, 0, 2, s22 0 0.00 | |
1752 _L55: | |
1753 s_andn2_b32 exec_lo, s21, exec_lo 0 0.00 | |
1754 s_cbranch_execz _L60 0 0.00 | |
1755 BBF0_63: | |
1756 v_mad_f32 v66, v75, v66, 1.0 0 0.00 | |
1757 v_mov_b32_e32 v32, 0 0 0.00 | |
1758 v_mov_b32_e32 v74, 0 0 0.00 | |
1759 v_mov_b32_e32 v83, 0 0 0.00 | |
1760 v_mov_b32_e32 v76, 1 0 0.00 | |
1761 v_mul_f32_e32 v66, v75, v66 0 0.00 | |
1762 v_mov_b32_e32 v88, 0 0 0.00 | |
1763 v_sqrt_f32_e64 v54, |v66| 0 0.00 | |
1764 _L60: | |
1765 s_mov_b32 exec_lo, s21 0 0.00 | |
1766 v_ldexp_f32 v77, v87, -2 0 0.00 | |
1767 v_mov_b32_e32 v69, 0 0 0.00 | |
1768 s_movk_i32 s23, 0xffff 0 0.00 | |
1769 s_mov_b32 s21, exec_lo 0 0.00 | |
1770 s_mov_b32 s22, exec_lo 0 0.00 | |
1771 v_rcp_f32_e32 v90, v77 0 0.00 | |
1772 v_mul_f32_e32 v77, v63, v81 0 0.00 | |
1773 v_mul_f32_e32 v77, v77, v90 0 0.00 | |
1774 v_sqrt_f32_e32 v90, v77 0 0.00 | |
1775 v_max_f32_e32 v77, 0x358637bd, v81 0 0.00 | |
1776 v_mul_f32_e32 v73, v90, v54 0 0.00 | |
1777 v_rcp_f32_e32 v90, v77 0 0.00 | |
1778 v_ldexp_f32 v77, v64, -1 0 0.00 | |
1779 v_ceil_f32_e32 v73, v73 0 0.00 | |
1780 v_max_f32_e32 v73, 1.0, v73 0 0.00 | |
1781 v_mul_f32_e64 v92, v5, v90 div:2 0 0.00 | |
1782 v_cvt_u32_f32_e32 v91, v73 0 0.00 | |
1783 s_nop 0 0 0.00 | |
1784 s_nop 0 0 0.00 | |
1785 s_nop 0 0 0.00 | |
1786 s_nop 0 0 0.00 | |
1787 _L71: | |
1788 v_cmp_eq_i32_e64 s23, s23, 0 0 0.00 | |
1789 v_add_co_ci_u32_e64 v81, vcc_lo, v69, 0, s23 0 0.00 | |
1790 v_cmp_gt_u32_e32 vcc_lo, v91, v81 0 0.00 | |
1791 s_and_saveexec_b32 s24, vcc_lo 0 0.00 | |
1792 s_andn2_b32 exec_lo, s24, exec_lo 0 0.00 | |
1793 s_andn2_b32 s22, s22, exec_lo 0 0.00 | |
1794 s_cbranch_scc0 _L61 0 0.00 | |
1795 BBF0_64: | |
1796 s_and_b32 exec_lo, s24, s22 0 0.00 | |
1797 v_add_co_ci_u32_e64 v69, vcc_lo, v69, 1, s23 0 0.00 | |
1798 v_cmp_eq_f32_e64 s23, v86, 1.0 0 0.00 | |
1799 v_cmp_eq_i32_e32 vcc_lo, v91, v69 0 0.00 | |
1800 s_and_b32 vcc_lo, vcc_lo, s23 0 0.00 | |
1801 s_andn1_saveexec_b32 s23, vcc_lo 0 0.00 | |
1802 s_cbranch_execz _L62 0 0.00 | |
1803 BBF0_65: | |
1804 v_rcp_f32_e32 v94, v73 0 0.00 | |
1805 v_cvt_f32_u32_e32 v69, v69 0 0.00 | |
1806 v_cmp_ne_i32_e32 vcc_lo, 1, v76 0 0.00 | |
1807 v_mul_f32_e32 v96, v69, v94 0 0.00 | |
1808 s_and_saveexec_b32 s24, vcc_lo 0 0.00 | |
1809 s_cbranch_execz _L63 0 0.00 | |
1810 BBF0_66: | |
1811 v_mad_f32 v69, v88, v96, v83 0 0.00 | |
1812 v_cmp_ne_i32_e32 vcc_lo, 2, v76 0 0.00 | |
1813 s_and_saveexec_b32 s25, vcc_lo 0 0.00 | |
1814 s_cbranch_execz _L64 0 0.00 | |
1815 BBF0_67: | |
1816 v_cmp_gt_f32_e64 vcc_lo, 0x3f337960, |v69| 0 0.00 | |
1817 s_andn1_saveexec_b32 s26, vcc_lo 0 0.00 | |
1818 s_cbranch_execz _L65 0 0.00 | |
1819 BBF0_68: | |
1820 v_cmp_gt_f32_e64 vcc_lo, 0x3f673b59, |v69| 0 0.00 | |
1821 s_andn1_saveexec_b32 s27, vcc_lo 0 0.00 | |
1822 s_cbranch_execz _L66 0 0.00 | |
1823 BBF0_69: | |
1824 v_mov_b32_e32 v89, 0xbf83a110 0 0.00 | |
1825 v_cmp_gt_f32_e64 vcc_lo, 0x40027ca5, |v69| 0 0.00 | |
1826 v_cndmask_b32_e64 v94, 2.0, 0x3fc7d00b, vcc_lo 0 0.00 | |
1827 v_cndmask_b32_e32 v95, 0xbe98df6c, v89, vcc_lo 0 0.00 | |
1828 v_mad_f32 v95, v94, |v69|, v95 0 0.00 | |
1829 v_mov_b32_e32 v89, 0x3f21d928 0 0.00 | |
1830 v_sqrt_f32_e32 v93, v95 0 0.00 | |
1831 v_cndmask_b32_e32 v94, 0x3e1fbe77, v89, vcc_lo 0 0.00 | |
1832 v_add_f32_e32 v94, v94, v93 0 0.00 | |
1833 _L66: | |
1834 s_andn2_b32 exec_lo, s27, exec_lo 0 0.00 | |
1835 s_cbranch_execz _L67 0 0.00 | |
1836 BBF0_70: | |
1837 v_add_f32_e64 v89, |v69|, 0xbf490fdb 0 0.00 | |
1838 v_log_f32_e64 v93, |v89| 0 0.00 | |
1839 v_cmp_gt_f32_e32 vcc_lo, 0, v89 0 0.00 | |
1840 v_cndmask_b32_e64 v94, 0, -1, vcc_lo 0 0.00 | |
1841 v_cmp_lt_f32_e32 vcc_lo, 0, v89 0 0.00 | |
1842 v_mul_f32_e32 v89, 0x3f2aaaab, v93 0 0.00 | |
1843 v_add_co_ci_u32_e64 v93, vcc_lo, v94, 0, vcc_lo 0 0.00 | |
1844 v_exp_f32_e32 v94, v89 0 0.00 | |
1845 v_cvt_f32_i32_e32 v89, v93 0 0.00 | |
1846 v_mul_f32_e32 v89, v94, v89 0 0.00 | |
1847 v_mad_f32 v94, v89, 0x3f852018, 1.0 0 0.00 | |
1848 _L67: | |
1849 s_mov_b32 exec_lo, s27 0 0.00 | |
1850 _L65: | |
1851 s_andn2_b32 exec_lo, s26, exec_lo 0 0.00 | |
1852 s_cbranch_execz _L68 0 0.00 | |
1853 BBF0_71: | |
1854 s_mov_b32 s27, 0xbca86ba3 0 0.00 | |
1855 v_mul_f32_e64 v89, |v69|, 0x3f8c8168 0 0.00 | |
1856 v_mad_f32 v93, |v69|, 0xbf8c8168, 1.0 0 0.00 | |
1857 v_mad_f32 v95, |v69|, s27, 0x3d981627 0 0.00 | |
1858 v_sqrt_f32_e32 v93, v93 0 0.00 | |
1859 v_madak_f32 v95, v95, v89, 0xbe593484 0 0.00 | |
1860 v_madak_f32 v94, v95, v89, 0x3fc90da4 0 0.00 | |
1861 v_mad_f32 v89, -v94, v93, 0x3fc90fdb 0 0.00 | |
1862 v_mul_f32_e32 v94, 0x3f693710, v89 0 0.00 | |
1863 _L68: | |
1864 s_mov_b32 exec_lo, s26 0 0.00 | |
1865 v_cmp_gt_f32_e32 vcc_lo, 0, v69 0 0.00 | |
1866 v_cndmask_b32_e64 v89, 0, -1, vcc_lo 0 0.00 | |
1867 v_cmp_lt_f32_e32 vcc_lo, 0, v69 0 0.00 | |
1868 v_add_co_ci_u32_e64 v69, vcc_lo, v89, 0, vcc_lo 0 0.00 | |
1869 v_cvt_f32_i32_e32 v69, v69 0 0.00 | |
1870 v_mul_f32_e32 v69, v94, v69 0 0.00 | |
1871 _L64: | |
1872 s_andn2_b32 exec_lo, s25, exec_lo 0 0.00 | |
1873 s_cbranch_execz _L69 0 0.00 | |
1874 BBF0_72: | |
1875 v_log_f32_e64 v89, |v69| 0 0.00 | |
1876 v_cmp_gt_f32_e32 vcc_lo, 0, v69 0 0.00 | |
1877 v_cndmask_b32_e64 v93, 0, -1, vcc_lo 0 0.00 | |
1878 v_cmp_lt_f32_e32 vcc_lo, 0, v69 0 0.00 | |
1879 v_mul_f32_e32 v69, 0x3f2aaaab, v89 0 0.00 | |
1880 v_add_co_ci_u32_e64 v89, vcc_lo, v93, 0, vcc_lo 0 0.00 | |
1881 v_exp_f32_e32 v69, v69 0 0.00 | |
1882 v_cvt_f32_i32_e32 v94, v89 0 0.00 | |
1883 v_mul_f32_e32 v69, v94, v69 0 0.00 | |
1884 _L69: | |
1885 s_mov_b32 exec_lo, s25 0 0.00 | |
1886 v_rcp_f32_e32 v94, v32 0 0.00 | |
1887 v_sub_f32_e32 v69, v69, v74 0 0.00 | |
1888 v_mul_f32_e32 v96, v69, v94 0 0.00 | |
1889 _L63: | |
1890 s_mov_b32 exec_lo, s24 0 0.00 | |
1891 v_add_f32_e64 v94, v96, -1.0 div:2 0 0.00 | |
1892 v_mul_f32_e32 v95, v96, v96 0 0.00 | |
1893 v_add_f32_e64 v97, v96, -2.0 div:2 0 0.00 | |
1894 v_mad_f32 v94, v64, v94, v75 0 0.00 | |
1895 v_mul_f32_e32 v69, v64, v95 0 0.00 | |
1896 v_mad_f32 v103, v77, v97, v75 0 0.00 | |
1897 v_ldexp_f32 v97, v96, -1 0 0.00 | |
1898 v_mul_f32_e32 v94, v96, v94 0 0.00 | |
1899 v_ldexp_f32 v100, v69, -1 0 0.00 | |
1900 v_mad_f32 v97, v103, v97, -v50 0 0.00 | |
1901 v_mul_f32_e32 v104, v94, v94 0 0.00 | |
1902 v_mul_f32_e64 v95, v69, v100 div:2 0 0.00 | |
1903 v_mul_f32_e64 v89, v69, v94 div:2 0 0.00 | |
1904 v_mul_f32_e32 v97, 0.15915494, v97 0 0.00 | |
1905 v_mul_f32_e32 v101, v104, v104 0 0.00 | |
1906 v_mul_f32_e32 v93, v104, v95 0 0.00 | |
1907 v_mac_f32_e32 v89, v94, v100 0 0.00 | |
1908 v_mul_f32_e32 v107, v95, v95 0 0.00 | |
1909 v_mad_f32 v103, v95, 0xbbcccccd, 1.0 0 0.00 | |
1910 v_mac_f32_e32 v93, v104, v95 0 0.00 | |
1911 v_mul_f32_e64 v99, v89, v104 mul:2 0 0.00 | |
1912 v_mul_f32_e32 v105, v89, v95 0 0.00 | |
1913 v_madmk_f32 v103, v104, 0xbd2aaaab, v103 0 0.00 | |
1914 v_mac_f32_e32 v93, v89, v89 0 0.00 | |
1915 v_mul_f32_e32 v106, v89, v99 0 0.00 | |
1916 v_mul_f32_e32 v102, v94, v105 0 0.00 | |
1917 v_mul_f32_e32 v98, 0x38c30c31, v93 0 0.00 | |
1918 v_mac_f32_e32 v106, v101, v95 0 0.00 | |
1919 v_mac_f32_e32 v102, v94, v105 0 0.00 | |
1920 v_mad_f32 v105, v100, v94, v89 0 0.00 | |
1921 v_madmk_f32 v98, v101, 0x3a088889, v98 0 0.00 | |
1922 v_mac_f32_e32 v106, v93, v104 0 0.00 | |
1923 v_mac_f32_e32 v102, v93, v100 0 0.00 | |
1924 v_mul_f32_e64 v93, v69, v95 div:2 0 0.00 | |
1925 v_madmk_f32 v98, v107, 0x3797b426, v98 0 0.00 | |
1926 v_mul_f32_e32 v93, 0x39c30c31, v93 0 0.00 | |
1927 v_add_f32_e32 v95, v103, v98 0 0.00 | |
1928 v_mul_f32_e32 v98, 0x3672b9d6, v102 0 0.00 | |
1929 v_mul_f32_e32 v102, v105, v94 0 0.00 | |
1930 v_mul_f32_e32 v103, v94, v99 0 0.00 | |
1931 v_mul_f32_e32 v105, v104, v101 0 0.00 | |
1932 v_madmk_f32 v95, v106, 0xb521d13a, v95 0 0.00 | |
1933 v_add_f32_e32 v106, -1.0, v96 0 0.00 | |
1934 v_madmk_f32 v102, v102, 0x3b088889, v93 0 0.00 | |
1935 v_mac_f32_e32 v103, v101, v100 0 0.00 | |
1936 v_mac_f32_e32 v99, v89, v104 0 0.00 | |
1937 v_madmk_f32 v89, v105, 0xb6500d01, v95 0 0.00 | |
1938 v_mad_f32 v95, v77, v106, v75 0 0.00 | |
1939 v_mad_f32 v102, v69, 0x3d2aaaab, -v102 0 0.00 | |
1940 v_madmk_f32 v93, v103, 0x379c09c1, v98 0 0.00 | |
1941 v_mul_f32_e32 v94, v94, v99 0 0.00 | |
1942 v_mul_f32_e32 v98, v101, v101 0 0.00 | |
1943 v_mad_f32 v69, v95, v96, -v50 0 0.00 | |
1944 v_rcp_f32_e32 v95, v87 0 0.00 | |
1945 v_add_f32_e32 v99, v102, v93 0 0.00 | |
1946 v_mac_f32_e32 v94, v100, v101 0 0.00 | |
1947 v_madmk_f32 v89, v98, 0x3238ef1d, v89 0 0.00 | |
1948 v_sin_f32_e32 v93, v97 0 0.00 | |
1949 v_mul_f32_e32 v69, 0.15915494, v69 0 0.00 | |
1950 v_mul_f32_e32 v94, v94, v104 0 0.00 | |
1951 v_mul_f32_e32 v96, v96, v95 0 0.00 | |
1952 v_cos_f32_e32 v95, v97 0 0.00 | |
1953 v_mul_f32_e32 v98, v96, v93 0 0.00 | |
1954 v_madmk_f32 v93, v94, 0xb3b8ef1d, v99 0 0.00 | |
1955 v_cos_f32_e32 v97, v69 0 0.00 | |
1956 v_mul_f32_e32 v94, v89, v98 0 0.00 | |
1957 v_mul_f32_e32 v96, v96, v95 0 0.00 | |
1958 v_sin_f32_e32 v69, v69 0 0.00 | |
1959 v_mul_f32_e32 v98, v93, v98 0 0.00 | |
1960 v_mad_f32 v94, -v93, v96, -v94 0 0.00 | |
1961 v_mad_f32 v98, v89, v96, -v98 0 0.00 | |
1962 v_mac_f32_e32 v94, v92, v97 0 0.00 | |
1963 v_mac_f32_e32 v98, v92, v69 0 0.00 | |
1964 v_mul_f32_e32 v95, v85, v94 0 0.00 | |
1965 v_mul_f32_e32 v96, v84, v94 0 0.00 | |
1966 v_mad_f32 v95, v84, v98, -v95 0 0.00 | |
1967 v_mac_f32_e32 v96, v85, v98 0 0.00 | |
1968 v_add_f32_e32 v94, v65, v95 0 0.00 | |
1969 v_add_f32_e32 v66, v70, v96 0 0.00 | |
1970 _L62: | |
1971 s_andn2_b32 exec_lo, s23, exec_lo 0 0.00 | |
1972 v_mov_b32_e32 v66, v38 0 0.00 | |
1973 v_mov_b32_e32 v94, v48 0 0.00 | |
1974 s_mov_b32 exec_lo, s23 0 0.00 | |
1975 s_ff1_i32_b32 s24, exec_lo 0 0.00 | |
1976 s_mov_b32 s23, exec_lo 0 0.00 | |
1977 s_lshl_b32 s25, 1, s24 0 0.00 | |
1978 s_and_b32 s25, s25, exec_lo 0 0.00 | |
1979 s_and_saveexec_b32 s25, s25 0 0.00 | |
1980 s_cbranch_execz _L70 0 0.00 | |
1981 BBF0_73: | |
1982 s_bcnt1_i32_b32 s26, s23 0 0.00 | |
1983 v_mov_b32_e32 v89, s26 0 0.00 | |
1984 s_waitcnt_depctr 0xffe3 0 0.00 | |
1985 buffer_atomic_add v89, off, s[8:11], 0 offset:28 glc 0 0.00 | |
1986 _L70: | |
1987 s_waitcnt_depctr 0xffe3 0 0.00 | |
1988 s_mov_b32 exec_lo, s25 0 0.00 | |
1989 s_waitcnt vmcnt(0) 0 0.00 | |
1990 v_readlane_b32 s24, v89, s24 0 0.00 | |
1991 v_cndmask_b32_e64 v93, v36, v66, s3 0 0.00 | |
1992 v_cndmask_b32_e64 v54, v66, v36, s3 0 0.00 | |
1993 v_cndmask_b32_e64 v95, v41, v94, s3 0 0.00 | |
1994 v_cndmask_b32_e64 v96, v94, v41, s3 0 0.00 | |
1995 v_mbcnt_lo_u32_b32 v89, s23, 0 0 0.00 | |
1996 v_mov_b32_e32 v36, v66 0 0.00 | |
1997 v_mul_f32_e32 v98, v44, v54 0 0.00 | |
1998 v_mul_f32_e32 v53, v40, v54 0 0.00 | |
1999 v_mul_f32_e32 v54, v44, v93 0 0.00 | |
2000 v_mul_f32_e32 v93, v40, v93 0 0.00 | |
2001 v_mov_b32_e32 v41, v94 0 0.00 | |
2002 v_mac_f32_e32 v98, v45, v96 0 0.00 | |
2003 v_mac_f32_e32 v53, v34, v96 0 0.00 | |
2004 v_mac_f32_e32 v54, v45, v95 0 0.00 | |
2005 v_mac_f32_e32 v93, v34, v95 0 0.00 | |
2006 v_add_nc_i32 v89, s24, v89 0 0.00 | |
2007 v_add_f32_e32 v97, v28, v98 0 0.00 | |
2008 v_add_f32_e32 v98, v24, v53 0 0.00 | |
2009 v_add_f32_e32 v99, v28, v54 0 0.00 | |
2010 v_add_f32_e32 v100, v24, v93 0 0.00 | |
2011 v_mul_lo_u32 v89, v89, 24 0 0.00 | |
2012 s_movk_i32 s23, 0x0 0 0.00 | |
2013 v_mov_b32_e32 v69, v81 0 0.00 | |
2014 s_waitcnt_depctr 0xffe3 0 0.00 | |
2015 s_clause 0x1 0 0.00 | |
2016 buffer_store_dword v7, v89, s[12:15], 0 offen glc 0 0.00 | |
2017 buffer_store_dwordx4 v[97:100], v89, s[12:15], 0 offen offset:8 glc 0 0.00 | |
2018 v_min3_f32 v72, v97, v99, v72 0 0.00 | |
2019 v_min3_f32 v71, v98, v100, v71 0 0.00 | |
2020 v_max3_f32 v68, v97, v99, v68 0 0.00 | |
2021 v_max3_f32 v67, v98, v100, v67 0 0.00 | |
2022 s_branch _L71 0 0.00 | |
2023 _L61: | |
2024 s_mov_b32 exec_lo, s21 0 0.00 | |
2025 v_mov_b32_e32 v64, v86 0 0.00 | |
2026 v_mov_b32_e32 v53, v80 0 0.00 | |
2027 v_mov_b32_e32 v32, v82 0 0.00 | |
2028 v_mov_b32_e32 v70, v79 0 0.00 | |
2029 v_mov_b32_e32 v65, v78 0 0.00 | |
2030 v_mov_b32_e32 v104, v76 0 0.00 | |
2031 v_mov_b32_e32 v69, v73 0 0.00 | |
2032 s_branch _L72 0 0.00 | |
2033 _L41: | |
2034 s_mov_b32 exec_lo, s2 0 0.00 | |
2035 v_add_f32_e32 v38, v25, v111 0 0.00 | |
2036 v_subrev_f32_e32 v43, v21, v15 0 0.00 | |
2037 v_cmp_eq_f32_e64 vcc_lo, -v20, 0 0 0.00 | |
2038 s_andn2_b32 exec_lo, s2, vcc_lo 0 0.00 | |
2039 s_cbranch_execz _L73 0 0.00 | |
2040 BBF0_74: | |
2041 v_add_f32_e32 v25, v11, v14 0 0.00 | |
2042 v_add_f32_e32 v40, v12, v13 0 0.00 | |
2043 v_sub_f32_e32 v41, v12, v13 0 0.00 | |
2044 v_sub_f32_e32 v42, v11, v14 0 0.00 | |
2045 v_mad_f32 v64, -v20, v29, v23 0 0.00 | |
2046 v_mul_f32_e32 v44, v25, v25 0 0.00 | |
2047 v_mul_f32_e32 v25, v40, v40 0 0.00 | |
2048 v_mad_f32 v63, -v20, v31, v22 0 0.00 | |
2049 v_mov_b32_e32 v102, v15 0 0.00 | |
2050 v_mov_b32_e32 v103, v111 0 0.00 | |
2051 v_mac_f32_e32 v44, v41, v41 0 0.00 | |
2052 v_mac_f32_e32 v25, v42, v42 0 0.00 | |
2053 v_mov_b32_e32 v8, v14 0 0.00 | |
2054 v_mov_b32_e32 v16, v19 0 0.00 | |
2055 v_mov_b32_e32 v29, v12 0 0.00 | |
2056 v_sqrt_f32_e32 v40, v44 0 0.00 | |
2057 v_mov_b32_e32 v31, v11 0 0.00 | |
2058 v_mov_b32_e32 v44, v43 0 0.00 | |
2059 v_mov_b32_e32 v45, v38 0 0.00 | |
2060 v_sqrt_f32_e32 v25, v25 0 0.00 | |
2061 v_mov_b32_e32 v49, v23 0 0.00 | |
2062 v_mov_b32_e32 v23, v18 0 0.00 | |
2063 v_mad_f32 v2, v40, 0.5, v25 0 0.00 | |
2064 v_mov_b32_e32 v25, v13 0 0.00 | |
2065 _L73: | |
2066 s_andn2_b32 exec_lo, s2, exec_lo 0 0.00 | |
2067 s_cbranch_execz _L74 0 0.00 | |
2068 BBF0_75: | |
2069 v_mul_f32_e32 v40, v13, v22 0 0.00 | |
2070 v_mul_f32_e32 v25, v14, v22 0 0.00 | |
2071 v_mul_f32_e32 v42, v13, v110 0 0.00 | |
2072 v_mul_f32_e32 v8, v13, v15 0 0.00 | |
2073 v_mul_f32_e32 v41, v14, v15 0 0.00 | |
2074 v_mul_f32_e32 v22, v13, v108 0 0.00 | |
2075 v_mac_f32_e32 v40, v11, v23 0 0.00 | |
2076 v_mac_f32_e32 v25, v12, v23 0 0.00 | |
2077 v_mac_f32_e32 v42, v11, v109 0 0.00 | |
2078 v_mul_f32_e32 v29, v14, v110 0 0.00 | |
2079 v_mac_f32_e32 v8, v11, v111 0 0.00 | |
2080 v_mac_f32_e32 v41, v12, v111 0 0.00 | |
2081 v_mul_f32_e32 v1, v14, v108 0 0.00 | |
2082 v_mac_f32_e32 v22, v11, v4 0 0.00 | |
2083 v_add_f32_e32 v49, v18, v40 0 0.00 | |
2084 v_add_f32_e32 v63, v19, v25 0 0.00 | |
2085 v_add_f32_e32 v25, v18, v42 0 0.00 | |
2086 v_mac_f32_e32 v29, v12, v109 0 0.00 | |
2087 v_add_f32_e32 v103, v18, v8 0 0.00 | |
2088 v_add_f32_e32 v102, v19, v41 0 0.00 | |
2089 v_mac_f32_e32 v1, v12, v4 0 0.00 | |
2090 v_add_f32_e32 v4, v18, v22 0 0.00 | |
2091 v_add_f32_e32 v110, v19, v29 0 0.00 | |
2092 v_mov_b32_e32 v2, 1.0 0 0.00 | |
2093 v_mov_b32_e32 v8, 1.0 0 0.00 | |
2094 v_mov_b32_e32 v16, 0 0 0.00 | |
2095 v_mov_b32_e32 v23, 0 0 0.00 | |
2096 v_mov_b32_e32 v29, 0 0 0.00 | |
2097 v_mov_b32_e32 v31, 1.0 0 0.00 | |
2098 v_mov_b32_e32 v109, v25 0 0.00 | |
2099 v_mov_b32_e32 v44, v102 0 0.00 | |
2100 v_mov_b32_e32 v45, v103 0 0.00 | |
2101 v_mov_b32_e32 v22, v63 0 0.00 | |
2102 v_mov_b32_e32 v64, v49 0 0.00 | |
2103 v_mov_b32_e32 v25, 0 0 0.00 | |
2104 v_add_f32_e32 v108, v19, v1 0 0.00 | |
2105 _L74: | |
2106 s_mov_b32 exec_lo, s2 0 0.00 | |
2107 v_cmp_eq_f32_sdwa s2, v4, v49 src0_sel:DWORD src1_sel:DWORD 0 0.00 | |
2108 v_cmp_eq_f32_sdwa s6, v108, v22 src0_sel:DWORD src1_sel:DWORD 0 0.00 | |
2109 v_cmp_eq_f32_sdwa s3, v109, v49 src0_sel:DWORD src1_sel:DWORD 0 0.00 | |
2110 v_cmp_eq_f32_e32 vcc_lo, v110, v22 0 0.00 | |
2111 s_and_b32 s2, s2, s6 0 0.00 | |
2112 s_and_b32 vcc_lo, s3, vcc_lo 0 0.00 | |
2113 s_and_b32 s2, s2, vcc_lo 0 0.00 | |
2114 v_cmp_eq_f32_sdwa s3, v103, v49 src0_sel:DWORD src1_sel:DWORD 0 0.00 | |
2115 v_cmp_eq_f32_e32 vcc_lo, v102, v22 0 0.00 | |
2116 s_and_b32 vcc_lo, s3, vcc_lo 0 0.00 | |
2117 s_and_b32 vcc_lo, s2, vcc_lo 0 0.00 | |
2118 s_andn1_saveexec_b32 s2, vcc_lo 0 0.00 | |
2119 s_cbranch_execz _L75 0 0.00 | |
2120 BBF0_76: | |
2121 v_sub_f32_e32 v54, v4, v49 0 0.00 | |
2122 v_sub_f32_e32 v58, v108, v22 0 0.00 | |
2123 v_mul_f32_e32 v51, v54, v54 0 0.00 | |
2124 v_mac_f32_e32 v51, v58, v58 0 0.00 | |
2125 v_cmp_gt_f32_e32 vcc_lo, 0x2b8cbccc, v51 0 0.00 | |
2126 s_and_saveexec_b32 s3, vcc_lo 0 0.00 | |
2127 s_cbranch_execz _L76 0 0.00 | |
2128 BBF0_77: | |
2129 v_sub_f32_e32 v51, v109, v4 0 0.00 | |
2130 v_sub_f32_e32 v52, v110, v108 0 0.00 | |
2131 v_sub_f32_e32 v55, v103, v109 0 0.00 | |
2132 v_mul_f32_e32 v53, 0x360637b4, v51 0 0.00 | |
2133 v_mul_f32_e32 v51, 0x360637b4, v52 0 0.00 | |
2134 v_madmk_f32 v52, v54, 0x3f7fffde, v53 0 0.00 | |
2135 v_madmk_f32 v51, v58, 0x3f7fffde, v51 0 0.00 | |
2136 v_sub_f32_e32 v53, v102, v110 0 0.00 | |
2137 v_madmk_f32 v32, v55, 0x2b8cbccc, v52 0 0.00 | |
2138 v_madmk_f32 v34, v53, 0x2b8cbccc, v51 0 0.00 | |
2139 _L76: | |
2140 s_andn2_b32 exec_lo, s3, exec_lo 0 0.00 | |
2141 v_mov_b32_e32 v34, v58 0 0.00 | |
2142 v_mov_b32_e32 v32, v54 0 0.00 | |
2143 s_mov_b32 exec_lo, s3 0 0.00 | |
2144 s_waitcnt lgkmcnt(0) 0 0.00 | |
2145 s_load_dwordx4 s[8:11], s[0:1], 0x80 0 0.00 | |
2146 s_load_dwordx4 s[12:15], s[0:1], 0xa0 0 0.00 | |
2147 s_load_dwordx4 s[16:19], s[0:1], 0xc0 0 0.00 | |
2148 v_mov_b32_e32 v28, v49 0 0.00 | |
2149 v_mov_b32_e32 v106, v67 0 0.00 | |
2150 v_mov_b32_e32 v36, v22 0 0.00 | |
2151 v_mov_b32_e32 v40, 1.0 0 0.00 | |
2152 v_mov_b32_e32 v41, 0 0 0.00 | |
2153 v_mov_b32_e32 v107, v68 0 0.00 | |
2154 v_mov_b32_e32 v42, v71 0 0.00 | |
2155 v_mov_b32_e32 v48, v72 0 0.00 | |
2156 v_mov_b32_e32 v65, 0 0 0.00 | |
2157 v_sub_f32_e32 v55, v109, v4 0 0.00 | |
2158 v_sub_f32_e32 v56, v110, v108 0 0.00 | |
2159 v_sub_f32_e32 v53, v103, v109 0 0.00 | |
2160 v_sub_f32_e32 v57, v102, v110 0 0.00 | |
2161 v_add_nc_u32_e32 v59, 0x2000, v3 0 0.00 | |
2162 v_add_nc_u32_e32 v61, 0x1000, v3 0 0.00 | |
2163 v_ldexp_f32 v60, v2, -3 0 0.00 | |
2164 v_cmp_ge_f32_e64 s3, -v20, 0 0 0.00 | |
2165 s_mov_b32 s6, exec_lo 0 0.00 | |
2166 s_mov_b32 s20, exec_lo 0 0.00 | |
2167 _L106: | |
2168 v_cvt_f32_u32_e32 v69, v41 0 0.00 | |
2169 v_mul_f32_e32 v66, v40, v69 0 0.00 | |
2170 v_readfirstlane_b32 s21, v66 0 0.00 | |
2171 v_cmp_eq_f32_e64 vcc_lo, s21, 1.0 0 0.00 | |
2172 s_andn1_saveexec_b32 s22, vcc_lo 0 0.00 | |
2173 s_andn2_b32 exec_lo, s22, exec_lo 0 0.00 | |
2174 s_andn2_b32 s20, s20, exec_lo 0 0.00 | |
2175 s_cbranch_scc0 _L77 0 0.00 | |
2176 BBF0_78: | |
2177 s_mov_b32 exec_lo, s22 0 0.00 | |
2178 s_and_b32 exec_lo, exec_lo, s20 0 0.00 | |
2179 s_mov_b32 s22, exec_lo 0 0.00 | |
2180 s_mov_b32 s23, exec_lo 0 0.00 | |
2181 v_mul_f32_e32 v68, v32, v32 0 0.00 | |
2182 v_mac_f32_e32 v68, v34, v34 0 0.00 | |
2183 _L87: | |
2184 v_add_f32_e32 v66, s21, v40 0 0.00 | |
2185 v_sub_f32_e32 v69, 1.0, v66 0 0.00 | |
2186 v_mul_f32_e32 v76, v66, v66 0 0.00 | |
2187 v_mul_f32_e32 v70, v66, v69 0 0.00 | |
2188 v_mul_f32_e32 v73, v69, v69 0 0.00 | |
2189 v_mul_f32_e32 v74, 0x40400000, v70 0 0.00 | |
2190 v_mul_f32_e64 v72, v70, v55 mul:2 0 0.00 | |
2191 v_mul_f32_e32 v71, 0x40400000, v73 0 0.00 | |
2192 v_mul_f32_e64 v75, v70, v56 mul:2 0 0.00 | |
2193 v_mul_f32_e32 v77, v109, v74 0 0.00 | |
2194 v_mac_f32_e32 v72, v54, v73 0 0.00 | |
2195 v_mul_f32_e32 v74, v110, v74 0 0.00 | |
2196 v_mac_f32_e32 v75, v58, v73 0 0.00 | |
2197 v_mac_f32_e32 v77, v4, v71 0 0.00 | |
2198 v_mad_f32 v105, v53, v76, v72 0 0.00 | |
2199 v_mac_f32_e32 v74, v108, v71 0 0.00 | |
2200 v_mad_f32 v47, v57, v76, v75 0 0.00 | |
2201 v_mul_f32_e32 v71, v69, v73 0 0.00 | |
2202 v_mac_f32_e32 v77, v103, v76 0 0.00 | |
2203 v_mul_f32_e32 v70, v105, v105 0 0.00 | |
2204 v_mac_f32_e32 v74, v102, v76 0 0.00 | |
2205 v_mul_f32_e32 v76, v66, v77 0 0.00 | |
2206 v_mac_f32_e32 v70, v47, v47 0 0.00 | |
2207 v_mul_f32_e32 v69, v66, v74 0 0.00 | |
2208 v_mad_f32 v1, v49, v71, v76 0 0.00 | |
2209 v_cmp_gt_f32_e32 vcc_lo, 0x2b8cbccc, v70 0 0.00 | |
2210 v_mac_f32_e32 v69, v22, v71 0 0.00 | |
2211 s_and_saveexec_b32 s24, vcc_lo 0 0.00 | |
2212 s_cbranch_execz _L78 0 0.00 | |
2213 BBF0_79: | |
2214 v_add_f32_e32 v70, 0xb58637bd, v66 0 0.00 | |
2215 v_cmp_gt_f32_e32 vcc_lo, 1.0, v66 0 0.00 | |
2216 v_sub_f32_e32 v72, 1.0, v70 0 0.00 | |
2217 v_mul_f32_e32 v75, v70, v70 0 0.00 | |
2218 v_mul_f32_e32 v73, v70, v72 0 0.00 | |
2219 v_mul_f32_e32 v77, v72, v72 0 0.00 | |
2220 v_mul_f32_e64 v80, v73, v55 mul:2 0 0.00 | |
2221 v_mul_f32_e64 v71, v73, v56 mul:2 0 0.00 | |
2222 v_mac_f32_e32 v80, v54, v77 0 0.00 | |
2223 v_mac_f32_e32 v71, v58, v77 0 0.00 | |
2224 v_mad_f32 v105, v53, v75, v80 0 0.00 | |
2225 v_mad_f32 v47, v57, v75, v71 0 0.00 | |
2226 s_and_saveexec_b32 s25, vcc_lo 0 0.00 | |
2227 s_cbranch_execz _L78 0 0.00 | |
2228 BBF0_80: | |
2229 v_mul_f32_e32 v66, 0x40400000, v73 0 0.00 | |
2230 v_mul_f32_e32 v79, 0x40400000, v77 0 0.00 | |
2231 v_mul_f32_e32 v74, v72, v77 0 0.00 | |
2232 v_mul_f32_e32 v69, v109, v66 0 0.00 | |
2233 v_mul_f32_e32 v66, v110, v66 0 0.00 | |
2234 v_mac_f32_e32 v69, v4, v79 0 0.00 | |
2235 v_mac_f32_e32 v66, v108, v79 0 0.00 | |
2236 v_mac_f32_e32 v69, v103, v75 0 0.00 | |
2237 v_mac_f32_e32 v66, v102, v75 0 0.00 | |
2238 v_mul_f32_e32 v72, v70, v69 0 0.00 | |
2239 v_mul_f32_e32 v69, v70, v66 0 0.00 | |
2240 v_mov_b32_e32 v66, v70 0 0.00 | |
2241 v_mad_f32 v1, v49, v74, v72 0 0.00 | |
2242 v_mac_f32_e32 v69, v22, v74 0 0.00 | |
2243 _L78: | |
2244 s_mov_b32 exec_lo, s24 0 0.00 | |
2245 s_ff1_i32_b32 s25, exec_lo 0 0.00 | |
2246 s_mov_b32 s24, exec_lo 0 0.00 | |
2247 s_lshl_b32 s26, 1, s25 0 0.00 | |
2248 s_and_b32 s26, s26, exec_lo 0 0.00 | |
2249 s_and_saveexec_b32 s26, s26 0 0.00 | |
2250 s_cbranch_execz _L79 0 0.00 | |
2251 BBF0_81: | |
2252 s_bcnt1_i32_b32 s27, s24 0 0.00 | |
2253 v_mov_b32_e32 v72, s27 0 0.00 | |
2254 s_waitcnt lgkmcnt(0) 0 0.00 | |
2255 s_waitcnt_depctr 0xffe3 0 0.00 | |
2256 buffer_atomic_add v72, off, s[8:11], 0 offset:32 glc 0 0.00 | |
2257 _L79: | |
2258 s_waitcnt_depctr 0xffe3 0 0.00 | |
2259 s_mov_b32 exec_lo, s26 0 0.00 | |
2260 s_waitcnt vmcnt(0) 0 0.00 | |
2261 v_readlane_b32 s25, v72, s25 0 0.00 | |
2262 v_mbcnt_lo_u32_b32 v72, s24, 0 0 0.00 | |
2263 v_mov_b32_e32 v81, v3 0 0.00 | |
2264 v_mov_b32_e32 v82, s21 0 0.00 | |
2265 v_mov_b32_e32 v83, v66 0 0.00 | |
2266 v_sub_f32_e32 v75, v66, v65 0 0.00 | |
2267 v_mul_f32_e32 v79, v105, v105 0 0.00 | |
2268 v_sub_f32_e32 v76, v1, v28 0 0.00 | |
2269 v_sub_f32_e32 v80, v69, v36 0 0.00 | |
2270 v_mul_f32_e32 v77, v75, v75 0 0.00 | |
2271 v_mac_f32_e32 v79, v47, v47 0 0.00 | |
2272 v_mul_f32_e32 v74, v76, v76 0 0.00 | |
2273 v_add_nc_i32 v72, s25, v72 0 0.00 | |
2274 v_mad_f32 v50, v80, v80, v74 0 0.00 | |
2275 v_mul_lo_u32 v72, v72, 12 0 0.00 | |
2276 s_waitcnt lgkmcnt(0) 0 0.00 | |
2277 s_waitcnt_depctr 0xffe3 0 0.00 | |
2278 buffer_store_dwordx3 v[81:83], v72, s[16:19], 0 offen glc 0 0.00 | |
2279 v_mul_f32_e32 v72, v68, v77 0 0.00 | |
2280 v_mul_f32_e32 v77, v77, v79 0 0.00 | |
2281 v_sqrt_f32_e32 v78, v50 0 0.00 | |
2282 v_cmp_lt_f32_e64 s24, v72, 0x2b8cbccc 0 0.00 | |
2283 v_cmp_lt_f32_e64 s26, v77, 0x2b8cbccc 0 0.00 | |
2284 v_cmp_le_f32_e32 vcc_lo, 0x358637bd, v78 0 0.00 | |
2285 s_and_b32 s24, s24, s26 0 0.00 | |
2286 s_or_b32 vcc_lo, vcc_lo, s24 0 0.00 | |
2287 s_and_saveexec_b32 s25, vcc_lo 0 0.00 | |
2288 s_cbranch_execz _L80 0 0.00 | |
2289 BBF0_82: | |
2290 v_cmp_lt_f32_e64 s27, v50, 0x358637bd 0 0.00 | |
2291 s_andn1_saveexec_b32 s28, s27 0 0.00 | |
2292 s_cbranch_execz _L81 0 0.00 | |
2293 BBF0_83: | |
2294 v_mul_f32_e32 v77, v32, v76 0 0.00 | |
2295 v_mul_f32_e32 v81, v32, v80 0 0.00 | |
2296 v_max_f32_e32 v72, 0x358637bd, v50 0 0.00 | |
2297 v_mac_f32_e32 v77, v34, v80 0 0.00 | |
2298 v_mad_f32 v81, v34, v76, -v81 0 0.00 | |
2299 v_rcp_f32_e32 v72, v72 0 0.00 | |
2300 v_mul_f32_e32 v24, v77, v77 0 0.00 | |
2301 v_mac_f32_e32 v24, v81, v81 0 0.00 | |
2302 v_sqrt_f32_e32 v74, v24 0 0.00 | |
2303 v_mul_f32_e32 v24, v75, v72 0 0.00 | |
2304 v_cmp_nlt_f32_e32 vcc_lo, 0x358637bd, v74 0 0.00 | |
2305 s_and_saveexec_b32 s29, vcc_lo 0 0.00 | |
2306 v_mov_b32_e32 v46, 0x3eaaaaab 0 0.00 | |
2307 v_mov_b32_e32 v71, 0 0 0.00 | |
2308 s_andn2_b32 exec_lo, s29, exec_lo 0 0.00 | |
2309 s_cbranch_execz _L82 0 0.00 | |
2310 BBF0_84: | |
2311 v_max_f32_e64 v72, |v77|, |v81| 0 0.00 | |
2312 v_min_f32_e64 v82, |v77|, |v81| 0 0.00 | |
2313 s_mov_b32 s30, 0x3caaae5f 0 0.00 | |
2314 v_min_f32_e32 v84, v77, v81 0 0.00 | |
2315 v_cmp_gt_f32_e64 vcc_lo, |v81|, |v77| 0 0.00 | |
2316 v_rcp_f32_e32 v72, v72 0 0.00 | |
2317 v_mul_f32_e32 v46, v24, v74 0 0.00 | |
2318 v_mul_f32_e32 v72, v82, v72 0 0.00 | |
2319 v_mul_f32_e32 v75, v72, v72 0 0.00 | |
2320 v_madak_f32 v82, s30, v75, 0xbdae5a36 0 0.00 | |
2321 v_cmp_gt_f32_e64 s30, -v84, v84 0 0.00 | |
2322 v_madak_f32 v82, v75, v82, 0x3e3876e2 0 0.00 | |
2323 v_madak_f32 v82, v75, v82, 0xbea91d04 0 0.00 | |
2324 v_madak_f32 v85, v75, v82, 0x3f7ff738 0 0.00 | |
2325 v_mul_f32_e32 v75, v72, v85 0 0.00 | |
2326 v_madak_f32 v75, -2.0, v75, 0x3fc90fdb 0 0.00 | |
2327 v_cndmask_b32_e32 v82, 0, v75, vcc_lo 0 0.00 | |
2328 v_max_f32_e32 v75, v77, v81 0 0.00 | |
2329 v_cmp_gt_f32_e64 vcc_lo, -v77, v77 0 0.00 | |
2330 v_cndmask_b32_e64 v77, 0, 0xc0490fdb, vcc_lo 0 0.00 | |
2331 v_mac_f32_e32 v82, v72, v85 0 0.00 | |
2332 v_cmp_ge_f32_e64 vcc_lo, v75, -v75 0 0.00 | |
2333 v_add_f32_e32 v72, v82, v77 0 0.00 | |
2334 s_and_b32 vcc_lo, s30, vcc_lo 0 0.00 | |
2335 v_cndmask_b32_e64 v75, 0, 0x80000000, vcc_lo 0 0.00 | |
2336 v_xor_b32_e32 v71, v72, v75 0 0.00 | |
2337 _L82: | |
2338 s_mov_b32 exec_lo, s29 0 0.00 | |
2339 v_mul_f32_e32 v75, v105, v76 0 0.00 | |
2340 v_mul_f32_e32 v77, v47, v76 0 0.00 | |
2341 v_mac_f32_e32 v75, v47, v80 0 0.00 | |
2342 v_mad_f32 v77, v105, v80, -v77 0 0.00 | |
2343 v_mul_f32_e32 v79, v75, v75 0 0.00 | |
2344 v_mac_f32_e32 v79, v77, v77 0 0.00 | |
2345 v_sqrt_f32_e32 v50, v79 0 0.00 | |
2346 v_cmp_nlt_f32_e32 vcc_lo, 0x358637bd, v50 0 0.00 | |
2347 s_and_b32 exec_lo, s29, vcc_lo 0 0.00 | |
2348 v_mov_b32_e32 v104, 0x3eaaaaab 0 0.00 | |
2349 v_mov_b32_e32 v70, 0 0 0.00 | |
2350 s_andn2_b32 exec_lo, s29, exec_lo 0 0.00 | |
2351 s_cbranch_execz _L83 0 0.00 | |
2352 BBF0_85: | |
2353 v_max_f32_e64 v81, |v75|, |v77| 0 0.00 | |
2354 s_mov_b32 s30, 0x3caaae5f 0 0.00 | |
2355 v_min_f32_e32 v85, v75, v77 0 0.00 | |
2356 v_cmp_gt_f32_e64 vcc_lo, |v77|, |v75| 0 0.00 | |
2357 v_mul_f32_e32 v104, v24, v50 0 0.00 | |
2358 v_rcp_f32_e32 v82, v81 0 0.00 | |
2359 v_min_f32_e64 v81, |v75|, |v77| 0 0.00 | |
2360 v_mul_f32_e32 v82, v81, v82 0 0.00 | |
2361 v_mul_f32_e32 v81, v82, v82 0 0.00 | |
2362 v_madak_f32 v83, s30, v81, 0xbdae5a36 0 0.00 | |
2363 v_cmp_gt_f32_e64 s30, -v85, v85 0 0.00 | |
2364 v_madak_f32 v83, v81, v83, 0x3e3876e2 0 0.00 | |
2365 v_madak_f32 v83, v81, v83, 0xbea91d04 0 0.00 | |
2366 v_madak_f32 v81, v81, v83, 0x3f7ff738 0 0.00 | |
2367 v_mul_f32_e32 v83, v82, v81 0 0.00 | |
2368 v_madak_f32 v83, -2.0, v83, 0x3fc90fdb 0 0.00 | |
2369 v_cndmask_b32_e32 v84, 0, v83, vcc_lo 0 0.00 | |
2370 v_max_f32_e32 v83, v75, v77 0 0.00 | |
2371 v_cmp_gt_f32_e64 vcc_lo, -v75, v75 0 0.00 | |
2372 v_cndmask_b32_e64 v75, 0, 0xc0490fdb, vcc_lo 0 0.00 | |
2373 v_mac_f32_e32 v84, v82, v81 0 0.00 | |
2374 v_cmp_ge_f32_e64 vcc_lo, v83, -v83 0 0.00 | |
2375 v_add_f32_e32 v75, v84, v75 0 0.00 | |
2376 s_and_b32 vcc_lo, s30, vcc_lo 0 0.00 | |
2377 v_cndmask_b32_e64 v77, 0, 0x80000000, vcc_lo 0 0.00 | |
2378 v_xor_b32_e32 v70, v75, v77 0 0.00 | |
2379 _L83: | |
2380 s_mov_b32 exec_lo, s29 0 0.00 | |
2381 _L81: | |
2382 s_andn2_b32 exec_lo, s28, exec_lo 0 0.00 | |
2383 v_cndmask_b32_e64 v72, 0, -1, s24 0 0.00 | |
2384 v_cndmask_b32_e64 v75, 0, -1, s26 0 0.00 | |
2385 v_cndmask_b32_e64 v46, v79, 0x3eaaaaab, s27 0 0.00 | |
2386 v_cndmask_b32_e64 v104, v104, 0x3eaaaaab, s27 0 0.00 | |
2387 v_cndmask_b32_e64 v71, v72, 0, s27 0 0.00 | |
2388 v_cndmask_b32_e64 v70, v75, 0, s27 0 0.00 | |
2389 s_mov_b32 exec_lo, s28 0 0.00 | |
2390 v_mul_f32_e32 v79, 0.15915494, v71 0 0.00 | |
2391 v_mul_f32_e32 v81, 0.15915494, v70 0 0.00 | |
2392 v_cos_f32_e32 v82, v79 0 0.00 | |
2393 v_cos_f32_e32 v79, v81 0 0.00 | |
2394 v_mul_f32_e32 v81, v82, v79 0 0.00 | |
2395 v_cmp_lt_f32_e64 s24, v81, 0 0 0.00 | |
2396 s_andn2_b32 exec_lo, s28, s24 0 0.00 | |
2397 s_cbranch_execz _L84 0 0.00 | |
2398 BBF0_86: | |
2399 v_add_f32_e32 v77, 1.0, v82 0 0.00 | |
2400 v_add_f32_e32 v81, 1.0, v79 0 0.00 | |
2401 v_mul_f32_e32 v83, 0.15915494, v71 0 0.00 | |
2402 v_mul_f32_e32 v84, 0.15915494, v70 0 0.00 | |
2403 v_mul_f32_e32 v86, v104, v46 0 0.00 | |
2404 v_max_f32_e32 v77, 0x3089705f, v77 0 0.00 | |
2405 v_max_f32_e32 v81, 0x3089705f, v81 0 0.00 | |
2406 v_sin_f32_e32 v85, v83 0 0.00 | |
2407 v_add_f32_e32 v88, v70, v71 0 0.00 | |
2408 v_rcp_f32_e32 v77, v77 0 0.00 | |
2409 v_sin_f32_e32 v84, v84 0 0.00 | |
2410 v_mul_f32_e32 v79, v79, v85 0 0.00 | |
2411 v_mul_f32_e32 v83, 0x3f2aaaab, v77 0 0.00 | |
2412 v_madmk_f32 v87, v77, 0xbf2aaaab, v46 0 0.00 | |
2413 v_rcp_f32_e32 v77, v81 0 0.00 | |
2414 v_mul_f32_e64 v81, v85, v46 mul:2 0 0.00 | |
2415 v_mac_f32_e32 v79, v82, v84 0 0.00 | |
2416 v_mul_f32_e32 v90, v83, v85 0 0.00 | |
2417 v_mul_f32_e32 v24, v87, v87 0 0.00 | |
2418 v_mac_f32_e32 v81, v84, v104 0 0.00 | |
2419 v_mac_f32_e32 v90, v85, v83 0 0.00 | |
2420 v_madmk_f32 v85, v77, 0xbf2aaaab, v104 0 0.00 | |
2421 v_mul_f32_e32 v77, 0x3f2aaaab, v77 0 0.00 | |
2422 v_mac_f32_e32 v81, v84, v104 0 0.00 | |
2423 v_mul_f32_e32 v104, v88, v88 0 0.00 | |
2424 v_mac_f32_e32 v24, v85, v85 0 0.00 | |
2425 v_mac_f32_e32 v90, v84, v77 0 0.00 | |
2426 v_mul_f32_e32 v74, v83, v77 0 0.00 | |
2427 v_mad_f32 v81, -v86, v79, v81 0 0.00 | |
2428 v_mul_f32_e64 v83, |v88|, v104 0 0.00 | |
2429 v_sqrt_f32_e32 v24, v24 0 0.00 | |
2430 v_mac_f32_e32 v90, v84, v77 0 0.00 | |
2431 v_mad_f32 v90, -v74, v79, v90 0 0.00 | |
2432 v_sub_f32_e32 v74, v71, v70 0 0.00 | |
2433 v_mul_f32_e32 v79, 0x3e19999a, v90 0 0.00 | |
2434 v_mul_f32_e64 v82, |v74|, 0x3d8f5c29 0 0.00 | |
2435 v_mul_f32_e64 v74, |v74|, 0x3bf5c28f 0 0.00 | |
2436 v_mad_f32 v79, v81, 0x3e19999a, -v79 0 0.00 | |
2437 v_mad_f32 v82, |v88|, 0x3ba3d70a, v82 0 0.00 | |
2438 v_madmk_f32 v83, v83, 0x369b3073, v74 0 0.00 | |
2439 v_mul_f32_e64 v74, |v79|, 0x3fc66666 0 0.00 | |
2440 v_mac_f32_e32 v74, v83, v104 0 0.00 | |
2441 v_mad_f32 v77, v82, v24, v74 0 0.00 | |
2442 _L84: | |
2443 s_andn2_b32 exec_lo, s28, exec_lo 0 0.00 | |
2444 v_cndmask_b32_e64 v77, v50, 2.0, s24 0 0.00 | |
2445 s_mov_b32 exec_lo, s28 0 0.00 | |
2446 v_mul_f32_e32 v74, v78, v77 0 0.00 | |
2447 v_mul_f32_e32 v74, v2, v74 0 0.00 | |
2448 v_cmp_le_f32_e64 s24, v74, 0x3e800000 0 0.00 | |
2449 v_cmp_ge_f32_e32 vcc_lo, 0x37800000, v40 0 0.00 | |
2450 s_or_b32 vcc_lo, s24, vcc_lo 0 0.00 | |
2451 s_andn1_saveexec_b32 s24, vcc_lo 0 0.00 | |
2452 s_andn2_b32 exec_lo, s24, exec_lo 0 0.00 | |
2453 s_andn2_b32 s23, s23, exec_lo 0 0.00 | |
2454 s_cbranch_scc0 _L85 0 0.00 | |
2455 BBF0_87: | |
2456 s_and_b32 exec_lo, s24, s23 0 0.00 | |
2457 _L80: | |
2458 s_andn2_b32 exec_lo, s25, exec_lo 0 0.00 | |
2459 s_and_b32 exec_lo, s25, s23 0 0.00 | |
2460 s_ff1_i32_b32 s25, exec_lo 0 0.00 | |
2461 s_mov_b32 s24, exec_lo 0 0.00 | |
2462 s_lshl_b32 s26, 1, s25 0 0.00 | |
2463 v_lshlrev_b32_e32 v41, 1, v41 0 0.00 | |
2464 s_and_b32 s26, s26, exec_lo 0 0.00 | |
2465 v_ldexp_f32 v40, v40, -1 0 0.00 | |
2466 s_and_saveexec_b32 s26, s26 0 0.00 | |
2467 s_cbranch_execz _L86 0 0.00 | |
2468 BBF0_88: | |
2469 s_bcnt1_i32_b32 s27, s24 0 0.00 | |
2470 v_mov_b32_e32 v66, s27 0 0.00 | |
2471 s_waitcnt_depctr 0xffe3 0 0.00 | |
2472 buffer_atomic_add v66, off, s[8:11], 0 offset:32 glc 0 0.00 | |
2473 _L86: | |
2474 s_waitcnt_depctr 0xffe3 0 0.00 | |
2475 s_mov_b32 exec_lo, s26 0 0.00 | |
2476 v_mbcnt_lo_u32_b32 v69, s24, 0 0 0.00 | |
2477 s_waitcnt vmcnt(0) 0 0.00 | |
2478 v_readlane_b32 s24, v66, s25 0 0.00 | |
2479 v_cvt_f32_u32_e32 v72, v41 0 0.00 | |
2480 v_mov_b32_e32 v71, v40 0 0.00 | |
2481 v_mov_b32_e32 v70, v72 0 0.00 | |
2482 v_add_nc_i32 v66, s24, v69 0 0.00 | |
2483 v_mov_b32_e32 v69, v59 0 0.00 | |
2484 v_mul_lo_u32 v66, v66, 12 0 0.00 | |
2485 s_waitcnt_depctr 0xffe3 0 0.00 | |
2486 buffer_store_dwordx3 v[69:71], v66, s[16:19], 0 offen glc 0 0.00 | |
2487 s_branch _L87 0 0.00 | |
2488 _L85: | |
2489 s_mov_b32 exec_lo, s22 0 0.00 | |
2490 v_add_nc_u32_e32 v34, 1, v41 0 0.00 | |
2491 s_mov_b32 s21, exec_lo 0 0.00 | |
2492 s_ff1_i32_b32 s22, exec_lo 0 0.00 | |
2493 v_ffbl_b32_e32 v24, v34 0 0.00 | |
2494 s_lshl_b32 s23, 1, s22 0 0.00 | |
2495 s_and_b32 s23, s23, exec_lo 0 0.00 | |
2496 v_min_u32_e32 v24, 32, v24 0 0.00 | |
2497 v_lshlrev_b32_e64 v50, v24, 1 0 0.00 | |
2498 v_cvt_f32_u32_e32 v67, v50 0 0.00 | |
2499 v_lshrrev_b32_e32 v41, v24, v34 0 0.00 | |
2500 v_mul_f32_e32 v40, v40, v67 0 0.00 | |
2501 s_and_saveexec_b32 s23, s23 0 0.00 | |
2502 s_cbranch_execz _L88 0 0.00 | |
2503 BBF0_89: | |
2504 s_bcnt1_i32_b32 s24, s21 0 0.00 | |
2505 v_mov_b32_e32 v46, s24 0 0.00 | |
2506 s_waitcnt_depctr 0xffe3 0 0.00 | |
2507 buffer_atomic_add v46, off, s[8:11], 0 offset:32 glc 0 0.00 | |
2508 _L88: | |
2509 s_waitcnt_depctr 0xffe3 0 0.00 | |
2510 s_mov_b32 exec_lo, s23 0 0.00 | |
2511 v_sub_f32_e32 v65, v70, v71 0 0.00 | |
2512 s_waitcnt vmcnt(0) 0 0.00 | |
2513 v_readlane_b32 s22, v46, s22 0 0.00 | |
2514 v_mbcnt_lo_u32_b32 v46, s21, 0 0 0.00 | |
2515 v_mov_b32_e32 v84, v61 0 0.00 | |
2516 v_mov_b32_e32 v86, v40 0 0.00 | |
2517 v_mul_f32_e32 v67, v65, v65 0 0.00 | |
2518 v_add_f32_e32 v77, v71, v70 0 0.00 | |
2519 v_mul_f32_e32 v82, v67, v67 0 0.00 | |
2520 v_mad_f32 v83, v67, 0xbccccccd, 1.0 0 0.00 | |
2521 v_mul_f32_e32 v81, v77, v77 0 0.00 | |
2522 s_mov_b32 s21, 0xbc6a0ea1 0 0.00 | |
2523 s_mov_b32 s23, 0x3979a934 0 0.00 | |
2524 s_mov_b32 s24, 0x388fa325 0 0.00 | |
2525 s_mov_b32 s25, 0x3b21e3b8 0 0.00 | |
2526 s_mov_b32 s26, 0xb84c68e7 0 0.00 | |
2527 v_madmk_f32 v83, v82, 0x39b3719e, v83 0 0.00 | |
2528 v_madak_f32 v68, s21, v67, 0x40c00000 0 0.00 | |
2529 v_madak_f32 v50, s26, v67, 0x3a088889 0 0.00 | |
2530 v_madak_f32 v72, s25, v67, 0xbd2aaaab 0 0.00 | |
2531 v_madak_f32 v74, s24, v67, 0xba3b3ee7 0 0.00 | |
2532 v_madak_f32 v79, s23, v67, 0xbdcccccd 0 0.00 | |
2533 v_madmk_f32 v85, v82, 0xb8c28a7f, v68 0 0.00 | |
2534 v_mul_f32_e32 v68, v67, v82 0 0.00 | |
2535 v_cvt_f32_u32_e32 v67, v41 0 0.00 | |
2536 v_add_nc_i32 v46, s22, v46 0 0.00 | |
2537 v_madmk_f32 v79, v82, 0x378e44a1, v79 0 0.00 | |
2538 v_madmk_f32 v72, v82, 0xb81c6fca, v72 0 0.00 | |
2539 v_madmk_f32 v82, v68, 0x3494ab4c, v85 0 0.00 | |
2540 v_mov_b32_e32 v85, v67 0 0.00 | |
2541 v_mul_lo_u32 v46, v46, 12 0 0.00 | |
2542 v_madmk_f32 v50, v81, 0xb6500cec, v50 0 0.00 | |
2543 v_madmk_f32 v74, v81, 0xb70526e7, v74 0 0.00 | |
2544 s_waitcnt_depctr 0xffe3 0 0.00 | |
2545 buffer_store_dwordx3 v[84:86], v46, s[16:19], 0 offen glc 0 0.00 | |
2546 v_mac_f32_e32 v79, v74, v81 0 0.00 | |
2547 v_mac_f32_e32 v72, v50, v81 0 0.00 | |
2548 v_madmk_f32 v46, v68, 0xb601da25, v83 0 0.00 | |
2549 v_rcp_f32_e32 v50, v78 0 0.00 | |
2550 v_mac_f32_e32 v82, v79, v81 0 0.00 | |
2551 v_mac_f32_e32 v46, v72, v81 0 0.00 | |
2552 v_mul_f32_e32 v74, v65, v82 0 0.00 | |
2553 v_mul_f32_e64 v65, -v20, v46 0 0.00 | |
2554 v_cmp_gt_f32_e64 vcc_lo, 0x3a83126f, |v74| 0 0.00 | |
2555 v_mul_f32_e32 v67, v65, v50 0 0.00 | |
2556 s_andn1_saveexec_b32 s21, vcc_lo 0 0.00 | |
2557 s_cbranch_execz _L89 0 0.00 | |
2558 BBF0_90: | |
2559 v_mad_f32 v65, v74, -0.5, v77 0 0.00 | |
2560 v_cmp_gt_f32_e64 s22, 0x3a83126f, |v67| 0 0.00 | |
2561 s_andn1_saveexec_b32 s23, s22 0 0.00 | |
2562 s_cbranch_execz _L90 0 0.00 | |
2563 BBF0_91: | |
2564 v_mad_f32 v50, -v67, v65, -1.0 0 0.00 | |
2565 v_mul_f32_e32 v68, v74, v67 0 0.00 | |
2566 v_mad_f32 v50, -v65, v67, v50 0 0.00 | |
2567 v_ldexp_f32 v24, -v68, 1 0 0.00 | |
2568 v_cmp_gt_f32_e64 vcc_lo, 0x3f4ccccd, |v50| 0 0.00 | |
2569 s_andn1_saveexec_b32 s24, vcc_lo 0 0.00 | |
2570 s_cbranch_execz _L91 0 0.00 | |
2571 BBF0_92: | |
2572 v_add_f32_e64 v79, |v50|, -1.0 0 0.00 | |
2573 v_mov_b32_e32 v81, 0xbf4f5c29 0 0.00 | |
2574 v_cmp_gt_f32_e64 vcc_lo, 0x40066666, |v50| 0 0.00 | |
2575 v_cndmask_b32_e64 v83, 0.5, 0x3f23fe5d, vcc_lo 0 0.00 | |
2576 v_mov_b32_e32 v82, 0x3f6a311b 0 0.00 | |
2577 v_sqrt_f32_e64 v84, |v79| 0 0.00 | |
2578 v_cndmask_b32_e32 v81, 0xbe1fbe77, v81, vcc_lo 0 0.00 | |
2579 s_mov_b32 s25, 0x3f715bef 0 0.00 | |
2580 v_cndmask_b32_e32 v85, 0x3e255531, v82, vcc_lo 0 0.00 | |
2581 v_cmp_gt_f32_e64 vcc_lo, 0x3fa00000, |v50| 0 0.00 | |
2582 v_mad_f32 v81, v83, |v50|, v81 0 0.00 | |
2583 v_mul_f32_e32 v79, v79, v84 0 0.00 | |
2584 v_mad_f32 v85, v81, |v50|, v85 0 0.00 | |
2585 v_madak_f32 v82, s25, v79, 0x3f490fdb 0 0.00 | |
2586 v_cndmask_b32_e32 v81, v85, v82, vcc_lo 0 0.00 | |
2587 _L91: | |
2588 s_andn2_b32 exec_lo, s24, exec_lo 0 0.00 | |
2589 v_mul_f32_e64 v79, |v50|, 0x3e32e5ab 0 0.00 | |
2590 v_sin_f32_e32 v79, v79 0 0.00 | |
2591 v_mul_f32_e32 v81, 0x3f693710, v79 0 0.00 | |
2592 s_mov_b32 exec_lo, s24 0 0.00 | |
2593 v_cmp_gt_f32_e32 vcc_lo, 0, v50 0 0.00 | |
2594 v_cndmask_b32_e64 v79, 0, -1, vcc_lo 0 0.00 | |
2595 v_cmp_lt_f32_e32 vcc_lo, 0, v50 0 0.00 | |
2596 v_mad_f32 v68, v68, -2.0, v50 0 0.00 | |
2597 v_add_co_ci_u32_e64 v79, vcc_lo, v79, 0, vcc_lo 0 0.00 | |
2598 v_cmp_gt_f32_e64 vcc_lo, 0x3f4ccccd, |v68| 0 0.00 | |
2599 v_cvt_f32_i32_e32 v79, v79 0 0.00 | |
2600 v_mul_f32_e32 v34, v81, v79 0 0.00 | |
2601 s_andn2_b32 exec_lo, s24, vcc_lo 0 0.00 | |
2602 s_cbranch_execz _L92 0 0.00 | |
2603 BBF0_93: | |
2604 v_add_f32_e64 v79, |v68|, -1.0 0 0.00 | |
2605 v_mov_b32_e32 v81, 0xbf4f5c29 0 0.00 | |
2606 v_cmp_gt_f32_e64 vcc_lo, 0x40066666, |v68| 0 0.00 | |
2607 v_cndmask_b32_e64 v83, 0.5, 0x3f23fe5d, vcc_lo 0 0.00 | |
2608 s_mov_b32 s25, 0x3f715bef 0 0.00 | |
2609 v_sqrt_f32_e64 v84, |v79| 0 0.00 | |
2610 v_cndmask_b32_e32 v86, 0xbe1fbe77, v81, vcc_lo 0 0.00 | |
2611 v_mov_b32_e32 v81, 0x3f6a311b 0 0.00 | |
2612 v_mad_f32 v86, v83, |v68|, v86 0 0.00 | |
2613 v_cndmask_b32_e32 v87, 0x3e255531, v81, vcc_lo 0 0.00 | |
2614 v_cmp_gt_f32_e64 vcc_lo, 0x3fa00000, |v68| 0 0.00 | |
2615 v_mul_f32_e32 v79, v79, v84 0 0.00 | |
2616 v_mad_f32 v87, v86, |v68|, v87 0 0.00 | |
2617 v_madak_f32 v81, s25, v79, 0x3f490fdb 0 0.00 | |
2618 v_cndmask_b32_e32 v79, v87, v81, vcc_lo 0 0.00 | |
2619 _L92: | |
2620 s_andn2_b32 exec_lo, s24, exec_lo 0 0.00 | |
2621 v_mul_f32_e64 v79, |v68|, 0x3e32e5ab 0 0.00 | |
2622 v_sin_f32_e32 v79, v79 0 0.00 | |
2623 v_mul_f32_e32 v79, 0x3f693710, v79 0 0.00 | |
2624 s_mov_b32 exec_lo, s24 0 0.00 | |
2625 v_rcp_f32_e32 v83, v24 0 0.00 | |
2626 v_mul_f32_e32 v84, v74, v50 0 0.00 | |
2627 v_mad_f32 v65, -v84, v83, v65 0 0.00 | |
2628 v_cmp_gt_f32_e32 vcc_lo, 0, v68 0 0.00 | |
2629 v_cndmask_b32_e64 v81, 0, -1, vcc_lo 0 0.00 | |
2630 v_cmp_lt_f32_e32 vcc_lo, 0, v68 0 0.00 | |
2631 v_mad_f32 v67, v65, v67, 1.0 0 0.00 | |
2632 v_add_co_ci_u32_e64 v68, vcc_lo, v81, 0, vcc_lo 0 0.00 | |
2633 v_mul_f32_e32 v65, v65, v67 0 0.00 | |
2634 v_cvt_f32_i32_e32 v68, v68 0 0.00 | |
2635 v_sqrt_f32_e64 v67, |v65| 0 0.00 | |
2636 v_mad_f32 v70, v79, v68, -v34 0 0.00 | |
2637 v_mul_f32_e32 v68, v70, v67 0 0.00 | |
2638 v_mul_f32_e32 v67, v68, v83 0 0.00 | |
2639 _L90: | |
2640 s_andn2_b32 exec_lo, s23, exec_lo 0 0.00 | |
2641 s_cbranch_execz _L93 0 0.00 | |
2642 BBF0_94: | |
2643 v_sqrt_f32_e64 v50, |v65| 0 0.00 | |
2644 v_add_f32_e32 v68, v74, v65 0 0.00 | |
2645 v_mov_b32_e32 v24, v74 0 0.00 | |
2646 v_sqrt_f32_e64 v82, |v68| 0 0.00 | |
2647 v_mul_f32_e32 v34, v65, v50 0 0.00 | |
2648 v_mov_b32_e32 v50, v65 0 0.00 | |
2649 v_mad_f32 v70, v68, v82, -v34 0 0.00 | |
2650 v_rcp_f32_e32 v68, v74 0 0.00 | |
2651 v_mul_f32_e32 v79, 0x3f2aaaab, v70 0 0.00 | |
2652 v_mul_f32_e32 v67, v79, v68 0 0.00 | |
2653 _L93: | |
2654 s_mov_b32 exec_lo, s23 0 0.00 | |
2655 v_cndmask_b32_e64 v68, 0, 2, s22 0 0.00 | |
2656 _L89: | |
2657 s_andn2_b32 exec_lo, s21, exec_lo 0 0.00 | |
2658 s_cbranch_execz _L94 0 0.00 | |
2659 BBF0_95: | |
2660 v_mad_f32 v50, v77, v67, 1.0 0 0.00 | |
2661 v_mov_b32_e32 v24, 0 0 0.00 | |
2662 v_mov_b32_e32 v34, 0 0 0.00 | |
2663 v_mov_b32_e32 v68, 1 0 0.00 | |
2664 v_mov_b32_e32 v70, 0 0 0.00 | |
2665 v_mul_f32_e32 v50, v77, v50 0 0.00 | |
2666 v_sqrt_f32_e64 v67, |v50| 0 0.00 | |
2667 v_mov_b32_e32 v50, 0 0 0.00 | |
2668 _L94: | |
2669 s_mov_b32 exec_lo, s21 0 0.00 | |
2670 v_ldexp_f32 v72, v46, -2 0 0.00 | |
2671 v_mul_f32_e32 v83, v60, v78 0 0.00 | |
2672 v_mov_b32_e32 v32, 0 0 0.00 | |
2673 s_movk_i32 s26, 0xffff 0 0.00 | |
2674 v_ldexp_f32 v82, v74, -1 0 0.00 | |
2675 v_rcp_f32_e32 v72, v72 0 0.00 | |
2676 s_mov_b32 s24, exec_lo 0 0.00 | |
2677 s_mov_b32 s25, exec_lo 0 0.00 | |
2678 v_rcp_f32_e32 v65, v24 0 0.00 | |
2679 v_mul_f32_e32 v72, v83, v72 0 0.00 | |
2680 v_cmp_eq_f32_e64 s21, v66, 1.0 0 0.00 | |
2681 v_cmp_ne_i32_e64 s22, v68, 1 0 0.00 | |
2682 v_cmp_ne_i32_e64 s23, v68, 2 0 0.00 | |
2683 v_rcp_f32_e32 v68, v46 0 0.00 | |
2684 v_sqrt_f32_e32 v72, v72 0 0.00 | |
2685 v_mul_f32_e32 v67, v72, v67 0 0.00 | |
2686 v_max_f32_e32 v72, 0x358637bd, v78 0 0.00 | |
2687 v_ceil_f32_e32 v67, v67 0 0.00 | |
2688 v_rcp_f32_e32 v78, v72 0 0.00 | |
2689 v_max_f32_e32 v104, 1.0, v67 0 0.00 | |
2690 v_cvt_u32_f32_e32 v83, v104 0 0.00 | |
2691 v_mul_f32_e64 v78, -v20, v78 0 0.00 | |
2692 v_rcp_f32_e32 v72, v104 0 0.00 | |
2693 s_nop 0 0 0.00 | |
2694 s_nop 0 0 0.00 | |
2695 s_nop 0 0 0.00 | |
2696 s_nop 0 0 0.00 | |
2697 s_nop 0 0 0.00 | |
2698 s_nop 0 0 0.00 | |
2699 s_nop 0 0 0.00 | |
2700 _L105: | |
2701 v_cmp_eq_i32_e64 s26, s26, 0 0 0.00 | |
2702 v_add_co_ci_u32_e64 v84, vcc_lo, v32, 0, s26 0 0.00 | |
2703 v_cmp_gt_u32_e32 vcc_lo, v83, v84 0 0.00 | |
2704 s_and_saveexec_b32 s27, vcc_lo 0 0.00 | |
2705 s_andn2_b32 exec_lo, s27, exec_lo 0 0.00 | |
2706 s_andn2_b32 s25, s25, exec_lo 0 0.00 | |
2707 s_cbranch_scc0 _L95 0 0.00 | |
2708 BBF0_96: | |
2709 s_and_b32 exec_lo, s27, s25 0 0.00 | |
2710 v_add_co_ci_u32_e64 v32, vcc_lo, v32, 1, s26 0 0.00 | |
2711 v_cmp_eq_i32_e32 vcc_lo, v83, v32 0 0.00 | |
2712 s_and_b32 vcc_lo, vcc_lo, s21 0 0.00 | |
2713 s_andn1_saveexec_b32 s26, vcc_lo 0 0.00 | |
2714 s_cbranch_execz _L96 0 0.00 | |
2715 BBF0_97: | |
2716 v_cvt_f32_u32_e32 v85, v32 0 0.00 | |
2717 v_mul_f32_e32 v85, v85, v72 0 0.00 | |
2718 s_and_saveexec_b32 s27, s22 0 0.00 | |
2719 s_cbranch_execz _L97 0 0.00 | |
2720 BBF0_98: | |
2721 v_mad_f32 v85, v70, v85, v34 0 0.00 | |
2722 s_and_saveexec_b32 s28, s23 0 0.00 | |
2723 s_cbranch_execz _L98 0 0.00 | |
2724 BBF0_99: | |
2725 v_cmp_gt_f32_e64 vcc_lo, 0x3f337960, |v85| 0 0.00 | |
2726 s_andn1_saveexec_b32 s29, vcc_lo 0 0.00 | |
2727 s_cbranch_execz _L99 0 0.00 | |
2728 BBF0_100: | |
2729 v_cmp_gt_f32_e64 vcc_lo, 0x3f673b59, |v85| 0 0.00 | |
2730 s_andn1_saveexec_b32 s30, vcc_lo 0 0.00 | |
2731 s_cbranch_execz _L100 0 0.00 | |
2732 BBF0_101: | |
2733 v_mov_b32_e32 v32, 0xbf83a110 0 0.00 | |
2734 v_cmp_gt_f32_e64 vcc_lo, 0x40027ca5, |v85| 0 0.00 | |
2735 v_cndmask_b32_e64 v86, 2.0, 0x3fc7d00b, vcc_lo 0 0.00 | |
2736 v_cndmask_b32_e32 v32, 0xbe98df6c, v32, vcc_lo 0 0.00 | |
2737 v_mad_f32 v32, v86, |v85|, v32 0 0.00 | |
2738 v_mov_b32_e32 v86, 0x3f21d928 0 0.00 | |
2739 v_sqrt_f32_e32 v32, v32 0 0.00 | |
2740 v_cndmask_b32_e32 v87, 0x3e1fbe77, v86, vcc_lo 0 0.00 | |
2741 v_add_f32_e32 v32, v87, v32 0 0.00 | |
2742 _L100: | |
2743 s_andn2_b32 exec_lo, s30, exec_lo 0 0.00 | |
2744 s_cbranch_execz _L101 0 0.00 | |
2745 BBF0_102: | |
2746 v_add_f32_e64 v32, |v85|, 0xbf490fdb 0 0.00 | |
2747 v_log_f32_e64 v86, |v32| 0 0.00 | |
2748 v_cmp_gt_f32_e32 vcc_lo, 0, v32 0 0.00 | |
2749 v_cndmask_b32_e64 v87, 0, -1, vcc_lo 0 0.00 | |
2750 v_cmp_lt_f32_e32 vcc_lo, 0, v32 0 0.00 | |
2751 v_mul_f32_e32 v32, 0x3f2aaaab, v86 0 0.00 | |
2752 v_add_co_ci_u32_e64 v86, vcc_lo, v87, 0, vcc_lo 0 0.00 | |
2753 v_exp_f32_e32 v32, v32 0 0.00 | |
2754 v_cvt_f32_i32_e32 v87, v86 0 0.00 | |
2755 v_mul_f32_e32 v32, v32, v87 0 0.00 | |
2756 v_mad_f32 v32, v32, 0x3f852018, 1.0 0 0.00 | |
2757 _L101: | |
2758 s_mov_b32 exec_lo, s30 0 0.00 | |
2759 _L99: | |
2760 s_andn2_b32 exec_lo, s29, exec_lo 0 0.00 | |
2761 s_cbranch_execz _L102 0 0.00 | |
2762 BBF0_103: | |
2763 s_mov_b32 s30, 0xbca86ba3 0 0.00 | |
2764 v_mul_f32_e64 v32, |v85|, 0x3f8c8168 0 0.00 | |
2765 v_mad_f32 v86, |v85|, 0xbf8c8168, 1.0 0 0.00 | |
2766 v_mad_f32 v87, |v85|, s30, 0x3d981627 0 0.00 | |
2767 v_sqrt_f32_e32 v86, v86 0 0.00 | |
2768 v_madak_f32 v87, v87, v32, 0xbe593484 0 0.00 | |
2769 v_madak_f32 v32, v87, v32, 0x3fc90da4 0 0.00 | |
2770 v_mad_f32 v32, -v32, v86, 0x3fc90fdb 0 0.00 | |
2771 v_mul_f32_e32 v32, 0x3f693710, v32 0 0.00 | |
2772 _L102: | |
2773 s_mov_b32 exec_lo, s29 0 0.00 | |
2774 v_cmp_gt_f32_e32 vcc_lo, 0, v85 0 0.00 | |
2775 v_cndmask_b32_e64 v86, 0, -1, vcc_lo 0 0.00 | |
2776 v_cmp_lt_f32_e32 vcc_lo, 0, v85 0 0.00 | |
2777 v_add_co_ci_u32_e64 v85, vcc_lo, v86, 0, vcc_lo 0 0.00 | |
2778 v_cvt_f32_i32_e32 v85, v85 0 0.00 | |
2779 v_mul_f32_e32 v32, v32, v85 0 0.00 | |
2780 _L98: | |
2781 s_andn2_b32 exec_lo, s28, exec_lo 0 0.00 | |
2782 s_cbranch_execz _L103 0 0.00 | |
2783 BBF0_104: | |
2784 v_log_f32_e64 v32, |v85| 0 0.00 | |
2785 v_cmp_gt_f32_e32 vcc_lo, 0, v85 0 0.00 | |
2786 v_cndmask_b32_e64 v86, 0, -1, vcc_lo 0 0.00 | |
2787 v_cmp_lt_f32_e32 vcc_lo, 0, v85 0 0.00 | |
2788 v_mul_f32_e32 v32, 0x3f2aaaab, v32 0 0.00 | |
2789 v_add_co_ci_u32_e64 v85, vcc_lo, v86, 0, vcc_lo 0 0.00 | |
2790 v_exp_f32_e32 v32, v32 0 0.00 | |
2791 v_cvt_f32_i32_e32 v85, v85 0 0.00 | |
2792 v_mul_f32_e32 v32, v85, v32 0 0.00 | |
2793 _L103: | |
2794 s_mov_b32 exec_lo, s28 0 0.00 | |
2795 v_sub_f32_e32 v32, v32, v50 0 0.00 | |
2796 v_mul_f32_e32 v85, v32, v65 0 0.00 | |
2797 _L97: | |
2798 s_mov_b32 exec_lo, s27 0 0.00 | |
2799 v_add_f32_e64 v87, v85, -1.0 div:2 0 0.00 | |
2800 v_mul_f32_e32 v32, v85, v85 0 0.00 | |
2801 v_add_f32_e64 v91, v85, -2.0 div:2 0 0.00 | |
2802 v_ldexp_f32 v98, v85, -1 0 0.00 | |
2803 v_mad_f32 v87, v74, v87, v77 0 0.00 | |
2804 v_mul_f32_e32 v86, v74, v32 0 0.00 | |
2805 v_mad_f32 v96, v82, v91, v77 0 0.00 | |
2806 v_mul_f32_e32 v87, v85, v87 0 0.00 | |
2807 v_ldexp_f32 v32, v86, -1 0 0.00 | |
2808 v_mad_f32 v96, v96, v98, -v71 0 0.00 | |
2809 v_mul_f32_e32 v88, v87, v87 0 0.00 | |
2810 v_mul_f32_e64 v89, v86, v32 div:2 0 0.00 | |
2811 v_mul_f32_e64 v90, v86, v87 div:2 0 0.00 | |
2812 v_mul_f32_e32 v93, v88, v88 0 0.00 | |
2813 v_mul_f32_e32 v94, v88, v89 0 0.00 | |
2814 v_mac_f32_e32 v90, v87, v32 0 0.00 | |
2815 v_mul_f32_e32 v91, v89, v89 0 0.00 | |
2816 v_mad_f32 v98, v89, 0xbbcccccd, 1.0 0 0.00 | |
2817 v_mac_f32_e32 v94, v88, v89 0 0.00 | |
2818 v_mul_f32_e64 v97, v90, v88 mul:2 0 0.00 | |
2819 v_mul_f32_e32 v92, v90, v89 0 0.00 | |
2820 v_mac_f32_e32 v94, v90, v90 0 0.00 | |
2821 v_mul_f32_e32 v99, v87, v97 0 0.00 | |
2822 v_mul_f32_e32 v101, v87, v92 0 0.00 | |
2823 v_mul_f32_e32 v95, 0x38c30c31, v94 0 0.00 | |
2824 v_mac_f32_e32 v99, v93, v32 0 0.00 | |
2825 v_mac_f32_e32 v101, v87, v92 0 0.00 | |
2826 v_mul_f32_e32 v92, 0.15915494, v96 0 0.00 | |
2827 v_madmk_f32 v96, v88, 0xbd2aaaab, v98 0 0.00 | |
2828 v_madmk_f32 v100, v93, 0x3a088889, v95 0 0.00 | |
2829 v_mul_f32_e32 v95, v90, v97 0 0.00 | |
2830 v_mac_f32_e32 v101, v94, v32 0 0.00 | |
2831 v_mad_f32 v98, v32, v87, v90 0 0.00 | |
2832 v_mac_f32_e32 v97, v90, v88 0 0.00 | |
2833 v_madmk_f32 v91, v91, 0x3797b426, v100 0 0.00 | |
2834 v_mac_f32_e32 v95, v93, v89 0 0.00 | |
2835 v_mul_f32_e64 v89, v86, v89 div:2 0 0.00 | |
2836 v_mac_f32_e32 v95, v94, v88 0 0.00 | |
2837 v_add_f32_e32 v94, v96, v91 0 0.00 | |
2838 v_mul_f32_e32 v91, v98, v87 0 0.00 | |
2839 v_mul_f32_e32 v98, 0x39c30c31, v89 0 0.00 | |
2840 v_mul_f32_e32 v96, 0x3672b9d6, v101 0 0.00 | |
2841 v_mul_f32_e32 v87, v87, v97 0 0.00 | |
2842 v_madmk_f32 v89, v95, 0xb521d13a, v94 0 0.00 | |
2843 v_add_f32_e32 v95, -1.0, v85 0 0.00 | |
2844 v_mul_f32_e32 v94, v88, v93 0 0.00 | |
2845 v_madmk_f32 v91, v91, 0x3b088889, v98 0 0.00 | |
2846 v_madmk_f32 v90, v99, 0x379c09c1, v96 0 0.00 | |
2847 v_mac_f32_e32 v87, v32, v93 0 0.00 | |
2848 v_mad_f32 v100, v82, v95, v77 0 0.00 | |
2849 v_madmk_f32 v89, v94, 0xb6500d01, v89 0 0.00 | |
2850 v_sin_f32_e32 v95, v92 0 0.00 | |
2851 v_mad_f32 v91, v86, 0x3d2aaaab, -v91 0 0.00 | |
2852 v_mul_f32_e32 v86, v93, v93 0 0.00 | |
2853 v_mad_f32 v94, v100, v85, -v71 0 0.00 | |
2854 v_mul_f32_e32 v87, v87, v88 0 0.00 | |
2855 v_add_f32_e32 v90, v91, v90 0 0.00 | |
2856 v_madmk_f32 v86, v86, 0x3238ef1d, v89 0 0.00 | |
2857 v_mul_f32_e32 v32, 0.15915494, v94 0 0.00 | |
2858 v_mul_f32_e32 v94, v85, v68 0 0.00 | |
2859 v_cos_f32_e32 v85, v92 0 0.00 | |
2860 v_madmk_f32 v90, v87, 0xb3b8ef1d, v90 0 0.00 | |
2861 v_mul_f32_e32 v88, v94, v95 0 0.00 | |
2862 v_cos_f32_e32 v91, v32 0 0.00 | |
2863 v_sin_f32_e32 v89, v32 0 0.00 | |
2864 v_mul_f32_e32 v87, v94, v85 0 0.00 | |
2865 v_mul_f32_e32 v85, v86, v88 0 0.00 | |
2866 v_mul_f32_e32 v32, v90, v88 0 0.00 | |
2867 v_mad_f32 v85, -v90, v87, -v85 0 0.00 | |
2868 v_mad_f32 v32, v86, v87, -v32 0 0.00 | |
2869 v_mac_f32_e32 v85, v78, v91 0 0.00 | |
2870 v_mac_f32_e32 v32, v78, v89 0 0.00 | |
2871 v_mul_f32_e32 v86, v80, v85 0 0.00 | |
2872 v_mul_f32_e32 v85, v76, v85 0 0.00 | |
2873 v_mad_f32 v86, v76, v32, -v86 0 0.00 | |
2874 v_mac_f32_e32 v85, v80, v32 0 0.00 | |
2875 v_add_f32_e32 v32, v28, v86 0 0.00 | |
2876 v_add_f32_e32 v85, v36, v85 0 0.00 | |
2877 _L96: | |
2878 s_andn2_b32 exec_lo, s26, exec_lo 0 0.00 | |
2879 v_mov_b32_e32 v85, v44 0 0.00 | |
2880 v_mov_b32_e32 v32, v45 0 0.00 | |
2881 s_mov_b32 exec_lo, s26 0 0.00 | |
2882 s_ff1_i32_b32 s27, exec_lo 0 0.00 | |
2883 s_mov_b32 s26, exec_lo 0 0.00 | |
2884 s_lshl_b32 s28, 1, s27 0 0.00 | |
2885 s_and_b32 s28, s28, exec_lo 0 0.00 | |
2886 s_and_saveexec_b32 s28, s28 0 0.00 | |
2887 s_cbranch_execz _L104 0 0.00 | |
2888 BBF0_105: | |
2889 s_bcnt1_i32_b32 s29, s26 0 0.00 | |
2890 v_mov_b32_e32 v86, s29 0 0.00 | |
2891 s_waitcnt_depctr 0xffe3 0 0.00 | |
2892 buffer_atomic_add v86, off, s[8:11], 0 offset:28 glc 0 0.00 | |
2893 _L104: | |
2894 s_waitcnt_depctr 0xffe3 0 0.00 | |
2895 s_mov_b32 exec_lo, s28 0 0.00 | |
2896 s_waitcnt vmcnt(0) 0 0.00 | |
2897 v_readlane_b32 s27, v86, s27 0 0.00 | |
2898 v_cndmask_b32_e64 v87, v63, v85, s3 0 0.00 | |
2899 v_cndmask_b32_e64 v51, v85, v63, s3 0 0.00 | |
2900 v_cndmask_b32_e64 v88, v64, v32, s3 0 0.00 | |
2901 v_cndmask_b32_e64 v46, v32, v64, s3 0 0.00 | |
2902 v_mbcnt_lo_u32_b32 v86, s26, 0 0 0.00 | |
2903 v_mul_f32_e32 v90, v25, v87 0 0.00 | |
2904 v_mul_f32_e32 v89, v25, v51 0 0.00 | |
2905 v_mul_f32_e32 v51, v8, v51 0 0.00 | |
2906 v_mul_f32_e32 v87, v8, v87 0 0.00 | |
2907 v_mov_b32_e32 v63, v85 0 0.00 | |
2908 v_mac_f32_e32 v90, v31, v88 0 0.00 | |
2909 v_mac_f32_e32 v89, v31, v46 0 0.00 | |
2910 v_mac_f32_e32 v51, v29, v46 0 0.00 | |
2911 v_mac_f32_e32 v87, v29, v88 0 0.00 | |
2912 v_mov_b32_e32 v64, v32 0 0.00 | |
2913 v_add_nc_i32 v86, s27, v86 0 0.00 | |
2914 v_add_f32_e32 v88, v23, v89 0 0.00 | |
2915 v_add_f32_e32 v89, v16, v51 0 0.00 | |
2916 v_add_f32_e32 v90, v23, v90 0 0.00 | |
2917 v_add_f32_e32 v91, v16, v87 0 0.00 | |
2918 v_mul_lo_u32 v86, v86, 24 0 0.00 | |
2919 s_movk_i32 s26, 0x0 0 0.00 | |
2920 v_mov_b32_e32 v32, v84 0 0.00 | |
2921 s_waitcnt_depctr 0xffe3 0 0.00 | |
2922 s_clause 0x1 0 0.00 | |
2923 buffer_store_dword v7, v86, s[12:15], 0 offen glc 0 0.00 | |
2924 buffer_store_dwordx4 v[88:91], v86, s[12:15], 0 offen offset:8 glc 0 0.00 | |
2925 v_min3_f32 v48, v88, v90, v48 0 0.00 | |
2926 v_min3_f32 v42, v89, v91, v42 0 0.00 | |
2927 v_max3_f32 v107, v88, v90, v107 0 0.00 | |
2928 v_max3_f32 v106, v89, v91, v106 0 0.00 | |
2929 s_branch _L105 0 0.00 | |
2930 _L95: | |
2931 s_mov_b32 exec_lo, s24 0 0.00 | |
2932 v_mov_b32_e32 v28, v1 0 0.00 | |
2933 v_mov_b32_e32 v34, v47 0 0.00 | |
2934 v_mov_b32_e32 v36, v69 0 0.00 | |
2935 v_mov_b32_e32 v65, v66 0 0.00 | |
2936 v_mov_b32_e32 v32, v105 0 0.00 | |
2937 s_branch _L106 0 0.00 | |
2938 _L77: | |
2939 s_mov_b32 exec_lo, s6 0 0.00 | |
2940 _L75: | |
2941 s_andn2_b32 exec_lo, s2, exec_lo 0 0.00 | |
2942 v_mov_b32_e32 v106, v67 0 0.00 | |
2943 v_mov_b32_e32 v107, v68 0 0.00 | |
2944 v_mov_b32_e32 v42, v71 0 0.00 | |
2945 v_mov_b32_e32 v48, v72 0 0.00 | |
2946 s_mov_b32 exec_lo, s2 0 0.00 | |
2947 v_cmp_eq_i32_e64 s2, v17, 1 0 0.00 | |
2948 v_cmp_eq_i32_e32 vcc_lo, 0, v10 0 0.00 | |
2949 s_or_b32 vcc_lo, s2, vcc_lo 0 0.00 | |
2950 s_andn1_saveexec_b32 s2, vcc_lo 0 0.00 | |
2951 s_cbranch_execz _L107 0 0.00 | |
2952 BBF0_106: | |
2953 v_and_b32_e32 v5, 0x3000000, v6 0 0.00 | |
2954 v_cmp_eq_i32_e64 s3, v5, 0x2000000 0 0.00 | |
2955 s_and_saveexec_b32 s6, s3 0 0.00 | |
2956 s_cbranch_execz _L108 0 0.00 | |
2957 BBF0_107: | |
2958 s_waitcnt lgkmcnt(0) 0 0.00 | |
2959 s_load_dwordx4 s[8:11], s[0:1], 0x80 0 0.00 | |
2960 s_load_dwordx4 s[12:15], s[0:1], 0xa0 0 0.00 | |
2961 v_mul_f32_e32 v6, v13, v37 0 0.00 | |
2962 v_mul_f32_e32 v16, v13, v15 0 0.00 | |
2963 v_mul_f32_e32 v9, v14, v37 0 0.00 | |
2964 v_mul_f32_e32 v17, v14, v15 0 0.00 | |
2965 s_mov_b32 s16, 0xbc996e30 0 0.00 | |
2966 v_mac_f32_e32 v6, v11, v39 0 0.00 | |
2967 v_mac_f32_e32 v16, v11, v111 0 0.00 | |
2968 v_mac_f32_e32 v9, v12, v39 0 0.00 | |
2969 v_mac_f32_e32 v17, v12, v111 0 0.00 | |
2970 v_add_f32_e32 v44, v18, v6 0 0.00 | |
2971 v_add_f32_e32 v16, v18, v16 0 0.00 | |
2972 v_add_f32_e32 v45, v19, v9 0 0.00 | |
2973 v_add_f32_e32 v10, v19, v17 0 0.00 | |
2974 v_sub_f32_e32 v17, v44, v16 0 0.00 | |
2975 v_sub_f32_e32 v23, v45, v10 0 0.00 | |
2976 v_mul_f32_e32 v16, v17, v17 0 0.00 | |
2977 v_mac_f32_e32 v16, v23, v23 0 0.00 | |
2978 v_sqrt_f32_e32 v10, v16 0 0.00 | |
2979 v_max_f32_e32 v10, 0x3e800000, v10 0 0.00 | |
2980 v_rcp_f32_e32 v10, v10 0 0.00 | |
2981 v_mad_f32 v10, 0xbe800000, v10, 1.0 0 0.00 | |
2982 v_sub_f32_e32 v16, 1.0, v10 0 0.00 | |
2983 v_madak_f32 v17, s16, v10, 0x3d981627 0 0.00 | |
2984 v_sqrt_f32_e32 v22, v16 0 0.00 | |
2985 v_madak_f32 v17, v17, v10, 0xbe593484 0 0.00 | |
2986 v_madak_f32 v16, v17, v10, 0x3fc90da4 0 0.00 | |
2987 v_mul_f32_e64 v10, v16, v22 mul:2 0 0.00 | |
2988 v_max_f32_e32 v10, 0x38d1b717, v10 0 0.00 | |
2989 v_rcp_f32_e32 v16, v10 0 0.00 | |
2990 v_mul_f32_e32 v10, 0.15915494, v10 0 0.00 | |
2991 v_sin_f32_e32 v23, v10 0 0.00 | |
2992 v_mul_f32_e32 v16, 0x40490fdb, v16 0 0.00 | |
2993 v_cos_f32_e32 v24, v10 0 0.00 | |
2994 v_ceil_f32_e32 v16, v16 0 0.00 | |
2995 v_cvt_u32_f32_e32 v16, v16 0 0.00 | |
2996 v_mov_b32_e32 v10, v16 0 0.00 | |
2997 s_waitcnt lgkmcnt(0) 0 0.00 | |
2998 s_waitcnt_depctr 0xffe3 0 0.00 | |
2999 buffer_atomic_add v10, off, s[8:11], 0 offset:28 glc 0 0.00 | |
3000 s_waitcnt_depctr 0xffe3 0 0.00 | |
3001 s_movk_i32 s10, 0xffff 0 0.00 | |
3002 s_mov_b32 s8, exec_lo 0 0.00 | |
3003 s_mov_b32 s9, exec_lo 0 0.00 | |
3004 v_mov_b32_e32 v1, 0 0 0.00 | |
3005 v_add_nc_u32_e32 v17, -1, v16 0 0.00 | |
3006 s_nop 0 0 0.00 | |
3007 s_nop 0 0 0.00 | |
3008 s_nop 0 0 0.00 | |
3009 _L110: | |
3010 v_cmp_eq_i32_e64 s10, s10, 0 0 0.00 | |
3011 v_add_co_ci_u32_e64 v28, vcc_lo, v1, 0, s10 0 0.00 | |
3012 v_cmp_gt_u32_e32 vcc_lo, v17, v28 0 0.00 | |
3013 s_and_saveexec_b32 s11, vcc_lo 0 0.00 | |
3014 s_andn2_b32 exec_lo, s11, exec_lo 0 0.00 | |
3015 s_andn2_b32 s9, s9, exec_lo 0 0.00 | |
3016 s_cbranch_scc0 _L109 0 0.00 | |
3017 BBF0_108: | |
3018 s_and_b32 exec_lo, s11, s9 0 0.00 | |
3019 v_mul_f32_e64 v31, -v23, v0 0 0.00 | |
3020 v_mul_f32_e32 v6, v24, v0 0 0.00 | |
3021 s_waitcnt vmcnt(0) 0 0.00 | |
3022 v_add_co_ci_u32_e64 v1, vcc_lo, v1, v10, s10 0 0.00 | |
3023 s_movk_i32 s10, 0x0 0 0.00 | |
3024 v_mac_f32_e32 v31, v21, v24 0 0.00 | |
3025 v_mad_f32 v0, v21, v23, v6 0 0.00 | |
3026 v_mul_lo_u32 v1, v1, 24 0 0.00 | |
3027 v_add_f32_e32 v9, v15, v31 0 0.00 | |
3028 v_add_f32_e32 v32, v111, v0 0 0.00 | |
3029 v_mov_b32_e32 v21, v31 0 0.00 | |
3030 v_mul_f32_e32 v34, v13, v9 0 0.00 | |
3031 v_mul_f32_e32 v9, v14, v9 0 0.00 | |
3032 v_mac_f32_e32 v34, v11, v32 0 0.00 | |
3033 v_mac_f32_e32 v9, v12, v32 0 0.00 | |
3034 v_add_f32_e32 v46, v18, v34 0 0.00 | |
3035 v_add_f32_e32 v47, v19, v9 0 0.00 | |
3036 s_waitcnt_depctr 0xffe3 0 0.00 | |
3037 s_clause 0x1 0 0.00 | |
3038 buffer_store_dword v7, v1, s[12:15], 0 offen glc 0 0.00 | |
3039 buffer_store_dwordx4 v[44:47], v1, s[12:15], 0 offen offset:8 glc 0 0.00 | |
3040 v_mov_b32_e32 v1, v28 0 0.00 | |
3041 v_min3_f32 v48, v44, v46, v48 0 0.00 | |
3042 v_min3_f32 v42, v45, v47, v42 0 0.00 | |
3043 v_max3_f32 v107, v44, v46, v107 0 0.00 | |
3044 v_max3_f32 v106, v45, v47, v106 0 0.00 | |
3045 v_mov_b32_e32 v45, v47 0 0.00 | |
3046 v_mov_b32_e32 v44, v46 0 0.00 | |
3047 s_branch _L110 0 0.00 | |
3048 _L109: | |
3049 s_mov_b32 exec_lo, s8 0 0.00 | |
3050 v_mul_f32_e32 v1, v13, v43 0 0.00 | |
3051 v_mul_f32_e32 v9, v14, v43 0 0.00 | |
3052 s_waitcnt vmcnt(0) 0 0.00 | |
3053 v_add_nc_u32_e32 v6, v16, v10 0 0.00 | |
3054 v_mac_f32_e32 v1, v11, v38 0 0.00 | |
3055 v_mac_f32_e32 v9, v12, v38 0 0.00 | |
3056 v_mul_lo_u32 v6, v6, 24 0 0.00 | |
3057 v_add_f32_e32 v46, v18, v1 0 0.00 | |
3058 v_add_f32_e32 v47, v19, v9 0 0.00 | |
3059 v_add_nc_u32_e32 v16, 0xffffffe8, v6 0 0.00 | |
3060 v_min3_f32 v48, v44, v46, v48 0 0.00 | |
3061 v_min3_f32 v42, v45, v47, v42 0 0.00 | |
3062 v_max3_f32 v107, v44, v46, v107 0 0.00 | |
3063 v_max3_f32 v106, v45, v47, v106 0 0.00 | |
3064 v_add_nc_u32_e32 v6, -16, v6 0 0.00 | |
3065 s_waitcnt_depctr 0xffe3 0 0.00 | |
3066 buffer_store_dword v7, v16, s[12:15], 0 offen glc 0 0.00 | |
3067 buffer_store_dwordx4 v[44:47], v6, s[12:15], 0 offen glc 0 0.00 | |
3068 _L108: | |
3069 s_waitcnt_depctr 0xffe3 0 0.00 | |
3070 s_andn2_b32 exec_lo, s6, s3 0 0.00 | |
3071 s_cbranch_execz _L111 0 0.00 | |
3072 BBF0_109: | |
3073 s_waitcnt lgkmcnt(0) 0 0.00 | |
3074 s_load_dwordx4 s[8:11], s[0:1], 0x80 0 0.00 | |
3075 v_cmp_eq_i32_e32 vcc_lo, 0x1000000, v5 0 0.00 | |
3076 v_cndmask_b32_e64 v4, 1, 3, vcc_lo 0 0.00 | |
3077 s_waitcnt lgkmcnt(0) 0 0.00 | |
3078 s_waitcnt_depctr 0xffe3 0 0.00 | |
3079 buffer_atomic_add v4, off, s[8:11], 0 offset:28 glc 0 0.00 | |
3080 s_waitcnt_depctr 0xffe3 0 0.00 | |
3081 s_and_saveexec_b32 s3, vcc_lo 0 0.00 | |
3082 s_cbranch_execz _L112 0 0.00 | |
3083 BBF0_110: | |
3084 s_load_dwordx4 s[8:11], s[0:1], 0xa0 0 0.00 | |
3085 v_mul_f32_e32 v10, v13, v37 0 0.00 | |
3086 v_mad_f32 v36, v20, v27, v37 0 0.00 | |
3087 v_mad_f32 v25, v20, v27, v43 0 0.00 | |
3088 v_mad_f32 v40, v20, v30, v39 0 0.00 | |
3089 v_mul_f32_e32 v9, v14, v37 0 0.00 | |
3090 v_mac_f32_e32 v10, v11, v39 0 0.00 | |
3091 v_mul_f32_e32 v22, v13, v36 0 0.00 | |
3092 v_mul_f32_e32 v15, v14, v36 0 0.00 | |
3093 v_mad_f32 v17, v20, v30, v38 0 0.00 | |
3094 v_mul_f32_e32 v20, v13, v25 0 0.00 | |
3095 v_add_f32_e32 v28, v18, v10 0 0.00 | |
3096 v_mul_f32_e32 v10, v14, v25 0 0.00 | |
3097 v_mul_f32_e32 v21, v13, v43 0 0.00 | |
3098 v_mul_f32_e32 v23, v14, v43 0 0.00 | |
3099 v_mac_f32_e32 v15, v12, v40 0 0.00 | |
3100 v_mac_f32_e32 v22, v11, v40 0 0.00 | |
3101 v_mac_f32_e32 v9, v12, v39 0 0.00 | |
3102 v_mac_f32_e32 v20, v11, v17 0 0.00 | |
3103 v_mac_f32_e32 v10, v12, v17 0 0.00 | |
3104 v_mac_f32_e32 v21, v11, v38 0 0.00 | |
3105 v_mac_f32_e32 v23, v12, v38 0 0.00 | |
3106 v_add_f32_e32 v31, v19, v15 0 0.00 | |
3107 v_add_f32_e32 v30, v18, v22 0 0.00 | |
3108 v_add_f32_e32 v29, v19, v9 0 0.00 | |
3109 s_waitcnt vmcnt(0) 0 0.00 | |
3110 v_mul_lo_u32 v16, v4, 24 0 0.00 | |
3111 v_add_f32_e32 v32, v18, v20 0 0.00 | |
3112 v_add_f32_e32 v33, v19, v10 0 0.00 | |
3113 v_add_f32_e32 v34, v18, v21 0 0.00 | |
3114 v_add_f32_e32 v35, v19, v23 0 0.00 | |
3115 v_min3_f32 v3, v28, v30, v48 0 0.00 | |
3116 v_min3_f32 v2, v29, v31, v42 0 0.00 | |
3117 v_max3_f32 v27, v28, v30, v107 0 0.00 | |
3118 v_max3_f32 v1, v29, v31, v106 0 0.00 | |
3119 s_waitcnt lgkmcnt(0) 0 0.00 | |
3120 s_clause 0x3 0 0.00 | |
3121 buffer_store_dword v7, v16, s[8:11], 0 offen offset:24 glc 0 0.00 | |
3122 buffer_store_dwordx4 v[28:31], v16, s[8:11], 0 offen offset:32 glc 0 0.00 | |
3123 buffer_store_dword v7, v16, s[8:11], 0 offen offset:48 glc 0 0.00 | |
3124 buffer_store_dwordx4 v[32:35], v16, s[8:11], 0 offen offset:56 glc 0 0.00 | |
3125 v_min3_f32 v48, v34, v32, v3 0 0.00 | |
3126 v_max3_f32 v106, v35, v33, v1 0 0.00 | |
3127 v_min3_f32 v42, v35, v33, v2 0 0.00 | |
3128 v_max3_f32 v107, v34, v32, v27 0 0.00 | |
3129 s_nop 0 0 0.00 | |
3130 _L112: | |
3131 s_andn2_b32 exec_lo, s3, exec_lo 0 0.00 | |
3132 v_mov_b32_e32 v25, v43 0 0.00 | |
3133 v_mov_b32_e32 v36, v37 0 0.00 | |
3134 v_mov_b32_e32 v17, v38 0 0.00 | |
3135 v_mov_b32_e32 v40, v39 0 0.00 | |
3136 s_mov_b32 exec_lo, s3 0 0.00 | |
3137 s_load_dwordx4 s[8:11], s[0:1], 0xa0 0 0.00 | |
3138 s_waitcnt vmcnt(0) 0 0.00 | |
3139 v_mul_lo_u32 v10, v4, 24 0 0.00 | |
3140 v_mul_f32_e32 v4, v13, v36 0 0.00 | |
3141 v_mul_f32_e32 v1, v14, v36 0 0.00 | |
3142 v_mul_f32_e32 v13, v13, v25 0 0.00 | |
3143 v_mul_f32_e32 v15, v14, v25 0 0.00 | |
3144 v_mac_f32_e32 v4, v11, v40 0 0.00 | |
3145 v_mac_f32_e32 v1, v12, v40 0 0.00 | |
3146 v_mac_f32_e32 v13, v11, v17 0 0.00 | |
3147 v_mac_f32_e32 v15, v12, v17 0 0.00 | |
3148 v_add_f32_e32 v11, v18, v4 0 0.00 | |
3149 v_add_f32_e32 v12, v19, v1 0 0.00 | |
3150 v_add_f32_e32 v13, v18, v13 0 0.00 | |
3151 v_add_f32_e32 v14, v19, v15 0 0.00 | |
3152 s_waitcnt lgkmcnt(0) 0 0.00 | |
3153 s_waitcnt_depctr 0xffe3 0 0.00 | |
3154 s_clause 0x1 0 0.00 | |
3155 buffer_store_dword v7, v10, s[8:11], 0 offen glc 0 0.00 | |
3156 buffer_store_dwordx4 v[11:14], v10, s[8:11], 0 offen offset:8 glc 0 0.00 | |
3157 v_min3_f32 v48, v11, v13, v48 0 0.00 | |
3158 v_min3_f32 v42, v12, v14, v42 0 0.00 | |
3159 v_max3_f32 v107, v11, v13, v107 0 0.00 | |
3160 v_max3_f32 v106, v12, v14, v106 0 0.00 | |
3161 _L111: | |
3162 s_mov_b32 exec_lo, s6 0 0.00 | |
3163 _L107: | |
3164 s_andn2_b32 exec_lo, s2, exec_lo 0 0.00 | |
3165 s_cbranch_execz _L113 0 0.00 | |
3166 BBF0_111: | |
3167 v_mul_f32_e32 v10, v26, v26 0 0.00 | |
3168 v_mac_f32_e32 v10, v9, v9 0 0.00 | |
3169 v_rsq_f32_e32 v0, v10 0 0.00 | |
3170 v_and_b32_e32 v10, 0x30000000, v6 0 0.00 | |
3171 v_cmp_ne_i32_e32 vcc_lo, 0, v10 0 0.00 | |
3172 v_mul_f32_e32 v23, v0, v5 0 0.00 | |
3173 v_mul_f32_e64 v0, v23, v9 div:2 0 0.00 | |
3174 v_mul_f32_e64 v5, v23, v26 div:2 0 0.00 | |
3175 v_subrev_f32_e32 v23, v0, v111 0 0.00 | |
3176 v_add_f32_e32 v22, v5, v15 0 0.00 | |
3177 v_add_f32_e32 v0, v0, v111 0 0.00 | |
3178 v_subrev_f32_e32 v8, v5, v15 0 0.00 | |
3179 s_and_saveexec_b32 s3, vcc_lo 0 0.00 | |
3180 s_cbranch_execz _L114 0 0.00 | |
3181 BBF0_112: | |
3182 v_mul_f32_e32 v5, v33, v26 0 0.00 | |
3183 v_mul_f32_e32 v30, v35, v26 0 0.00 | |
3184 v_cmp_ne_i32_e32 vcc_lo, 0x10000000, v10 0 0.00 | |
3185 v_mad_f32 v5, v35, v9, -v5 0 0.00 | |
3186 v_mac_f32_e32 v30, v33, v9 0 0.00 | |
3187 s_and_saveexec_b32 s6, vcc_lo 0 0.00 | |
3188 s_cbranch_execz _L115 0 0.00 | |
3189 BBF0_113: | |
3190 v_cmp_eq_i32_e32 vcc_lo, 0x20000000, v10 0 0.00 | |
3191 s_waitcnt lgkmcnt(0) 0 0.00 | |
3192 s_and_saveexec_b32 s8, vcc_lo 0 0.00 | |
3193 s_cbranch_execz _L116 0 0.00 | |
3194 BBF0_114: | |
3195 s_load_dwordx4 s[12:15], s[0:1], 0x80 0 0.00 | |
3196 s_load_dwordx4 s[16:19], s[0:1], 0xa0 0 0.00 | |
3197 v_cmp_lt_f32_e32 vcc_lo, 0, v5 0 0.00 | |
3198 v_cndmask_b32_e32 v9, v37, v8, vcc_lo 0 0.00 | |
3199 v_cndmask_b32_e32 v20, v39, v0, vcc_lo 0 0.00 | |
3200 v_cndmask_b32_e32 v10, v22, v43, vcc_lo 0 0.00 | |
3201 v_cndmask_b32_e32 v21, v43, v22, vcc_lo 0 0.00 | |
3202 v_mul_f32_e32 v24, v13, v15 0 0.00 | |
3203 v_mul_f32_e32 v22, v13, v9 0 0.00 | |
3204 v_cndmask_b32_e32 v16, v23, v38, vcc_lo 0 0.00 | |
3205 v_cndmask_b32_e32 v17, v38, v23, vcc_lo 0 0.00 | |
3206 v_mul_f32_e32 v23, v14, v9 0 0.00 | |
3207 v_mac_f32_e32 v24, v11, v111 0 0.00 | |
3208 v_mac_f32_e32 v22, v11, v20 0 0.00 | |
3209 v_mul_f32_e32 v25, v14, v15 0 0.00 | |
3210 v_min_f32_e64 v27, |v30|, |v5| 0 0.00 | |
3211 v_mac_f32_e32 v23, v12, v20 0 0.00 | |
3212 v_add_f32_e32 v24, v18, v24 0 0.00 | |
3213 v_add_f32_e32 v33, v18, v22 0 0.00 | |
3214 v_mac_f32_e32 v25, v12, v111 0 0.00 | |
3215 s_mov_b32 s9, 0x3caaae5f 0 0.00 | |
3216 v_add_f32_e32 v34, v19, v23 0 0.00 | |
3217 v_cndmask_b32_e32 v6, v8, v37, vcc_lo 0 0.00 | |
3218 v_cndmask_b32_e32 v0, v0, v39, vcc_lo 0 0.00 | |
3219 v_add_f32_e32 v26, v19, v25 0 0.00 | |
3220 v_subrev_f32_e32 v25, v24, v33 0 0.00 | |
3221 v_max_f32_e64 v24, |v30|, |v5| 0 0.00 | |
3222 v_cmp_gt_f32_e64 vcc_lo, |v5|, |v30| 0 0.00 | |
3223 v_subrev_f32_e32 v26, v26, v34 0 0.00 | |
3224 v_mul_f32_e32 v25, v25, v25 0 0.00 | |
3225 v_rcp_f32_e32 v24, v24 0 0.00 | |
3226 v_mac_f32_e32 v25, v26, v26 0 0.00 | |
3227 v_sqrt_f32_e32 v25, v25 0 0.00 | |
3228 v_mul_f32_e32 v29, v27, v24 0 0.00 | |
3229 v_mul_f32_e32 v26, v29, v29 0 0.00 | |
3230 v_max_f32_e32 v24, 0x3e800000, v25 0 0.00 | |
3231 v_madak_f32 v25, s9, v26, 0xbdae5a36 0 0.00 | |
3232 s_mov_b32 s9, 0xbc996e30 0 0.00 | |
3233 v_rcp_f32_e32 v24, v24 0 0.00 | |
3234 v_madak_f32 v25, v26, v25, 0x3e3876e2 0 0.00 | |
3235 v_madak_f32 v25, v26, v25, 0xbea91d04 0 0.00 | |
3236 v_mad_f32 v24, 0xbe800000, v24, 1.0 0 0.00 | |
3237 v_madak_f32 v26, v26, v25, 0x3f7ff738 0 0.00 | |
3238 v_sub_f32_e32 v25, 1.0, v24 0 0.00 | |
3239 v_madak_f32 v27, s9, v24, 0x3d981627 0 0.00 | |
3240 v_sqrt_f32_e32 v28, v25 0 0.00 | |
3241 v_mul_f32_e32 v25, v29, v26 0 0.00 | |
3242 v_madak_f32 v27, v27, v24, 0xbe593484 0 0.00 | |
3243 v_madak_f32 v25, -2.0, v25, 0x3fc90fdb 0 0.00 | |
3244 v_madak_f32 v27, v27, v24, 0x3fc90da4 0 0.00 | |
3245 v_cndmask_b32_e32 v31, 0, v25, vcc_lo 0 0.00 | |
3246 v_min_f32_e32 v25, v5, v30 0 0.00 | |
3247 v_mul_f32_e64 v24, v27, v28 mul:2 0 0.00 | |
3248 v_max_f32_e32 v5, v5, v30 0 0.00 | |
3249 v_cmp_gt_f32_e64 vcc_lo, -v30, v30 0 0.00 | |
3250 v_mac_f32_e32 v31, v29, v26 0 0.00 | |
3251 v_cmp_gt_f32_e64 s9, -v25, v25 0 0.00 | |
3252 v_cndmask_b32_e64 v26, 0, 0xc0490fdb, vcc_lo 0 0.00 | |
3253 v_max_f32_e32 v24, 0x38d1b717, v24 0 0.00 | |
3254 v_cmp_ge_f32_e64 vcc_lo, v5, -v5 0 0.00 | |
3255 v_add_f32_e32 v5, v31, v26 0 0.00 | |
3256 v_rcp_f32_e32 v25, v24 0 0.00 | |
3257 s_and_b32 vcc_lo, s9, vcc_lo 0 0.00 | |
3258 v_cndmask_b32_e64 v26, 0, 0x80000000, vcc_lo 0 0.00 | |
3259 v_xor_b32_e32 v26, v5, v26 0 0.00 | |
3260 v_mul_f32_e64 v5, |v26|, v25 0 0.00 | |
3261 v_ceil_f32_e32 v5, v5 0 0.00 | |
3262 v_cvt_u32_f32_e32 v5, v5 0 0.00 | |
3263 v_max_u32_e32 v26, 1, v5 0 0.00 | |
3264 v_mul_f32_e32 v5, 0.15915494, v24 0 0.00 | |
3265 v_sin_f32_e32 v24, v5 0 0.00 | |
3266 v_cos_f32_e32 v28, v5 0 0.00 | |
3267 v_mov_b32_e32 v5, v26 0 0.00 | |
3268 s_waitcnt lgkmcnt(0) 0 0.00 | |
3269 s_waitcnt_depctr 0xffe3 0 0.00 | |
3270 buffer_atomic_add v5, off, s[12:15], 0 offset:28 glc 0 0.00 | |
3271 s_movk_i32 s11, 0xffff 0 0.00 | |
3272 s_mov_b32 s9, exec_lo 0 0.00 | |
3273 s_mov_b32 s10, exec_lo 0 0.00 | |
3274 v_sub_f32_e32 v4, v20, v111 0 0.00 | |
3275 v_mov_b32_e32 v20, 0 0 0.00 | |
3276 v_sub_f32_e32 v2, v9, v15 0 0.00 | |
3277 v_add_nc_u32_e32 v25, -1, v26 0 0.00 | |
3278 s_nop 0 0 0.00 | |
3279 s_nop 0 0 0.00 | |
3280 s_nop 0 0 0.00 | |
3281 s_nop 0 0 0.00 | |
3282 _L118: | |
3283 v_cmp_eq_i32_e64 s11, s11, 0 0 0.00 | |
3284 v_add_co_ci_u32_e64 v31, vcc_lo, v20, 0, s11 0 0.00 | |
3285 v_cmp_gt_u32_e32 vcc_lo, v25, v31 0 0.00 | |
3286 s_and_saveexec_b32 s20, vcc_lo 0 0.00 | |
3287 s_andn2_b32 exec_lo, s20, exec_lo 0 0.00 | |
3288 s_andn2_b32 s10, s10, exec_lo 0 0.00 | |
3289 s_cbranch_scc0 _L117 0 0.00 | |
3290 BBF0_115: | |
3291 s_and_b32 exec_lo, s20, s10 0 0.00 | |
3292 v_mul_f32_e64 v29, -v24, v4 0 0.00 | |
3293 v_mul_f32_e32 v9, v28, v4 0 0.00 | |
3294 s_waitcnt vmcnt(0) 0 0.00 | |
3295 v_add_co_ci_u32_e64 v20, vcc_lo, v20, v5, s11 0 0.00 | |
3296 s_movk_i32 s11, 0x0 0 0.00 | |
3297 v_mac_f32_e32 v29, v2, v28 0 0.00 | |
3298 v_mad_f32 v4, v2, v24, v9 0 0.00 | |
3299 v_mul_lo_u32 v20, v20, 24 0 0.00 | |
3300 v_add_f32_e32 v23, v15, v29 0 0.00 | |
3301 v_add_f32_e32 v30, v111, v4 0 0.00 | |
3302 v_mov_b32_e32 v2, v29 0 0.00 | |
3303 v_mul_f32_e32 v32, v13, v23 0 0.00 | |
3304 v_mul_f32_e32 v23, v14, v23 0 0.00 | |
3305 v_mac_f32_e32 v32, v11, v30 0 0.00 | |
3306 v_mac_f32_e32 v23, v12, v30 0 0.00 | |
3307 v_add_f32_e32 v35, v18, v32 0 0.00 | |
3308 v_add_f32_e32 v36, v19, v23 0 0.00 | |
3309 s_waitcnt_depctr 0xffe3 0 0.00 | |
3310 s_clause 0x1 0 0.00 | |
3311 buffer_store_dword v7, v20, s[16:19], 0 offen glc 0 0.00 | |
3312 buffer_store_dwordx4 v[33:36], v20, s[16:19], 0 offen offset:8 glc 0 0.00 | |
3313 v_mov_b32_e32 v20, v31 0 0.00 | |
3314 v_min3_f32 v48, v33, v35, v48 0 0.00 | |
3315 v_min3_f32 v42, v34, v36, v42 0 0.00 | |
3316 v_max3_f32 v107, v33, v35, v107 0 0.00 | |
3317 v_max3_f32 v106, v34, v36, v106 0 0.00 | |
3318 v_mov_b32_e32 v34, v36 0 0.00 | |
3319 v_mov_b32_e32 v33, v35 0 0.00 | |
3320 s_branch _L118 0 0.00 | |
3321 _L117: | |
3322 s_mov_b32 exec_lo, s9 0 0.00 | |
3323 s_waitcnt vmcnt(0) 0 0.00 | |
3324 v_add_nc_u32_e32 v2, v26, v5 0 0.00 | |
3325 v_mul_f32_e32 v26, v13, v10 0 0.00 | |
3326 v_mul_f32_e32 v5, v14, v10 0 0.00 | |
3327 s_ff1_i32_b32 s10, exec_lo 0 0.00 | |
3328 s_mov_b32 s9, exec_lo 0 0.00 | |
3329 v_mul_lo_u32 v2, v2, 24 0 0.00 | |
3330 s_lshl_b32 s11, 1, s10 0 0.00 | |
3331 s_and_b32 s11, s11, exec_lo 0 0.00 | |
3332 v_mac_f32_e32 v26, v11, v16 0 0.00 | |
3333 v_mac_f32_e32 v5, v12, v16 0 0.00 | |
3334 v_add_nc_u32_e32 v9, 0xffffffe8, v2 0 0.00 | |
3335 v_add_f32_e32 v35, v18, v26 0 0.00 | |
3336 v_add_f32_e32 v36, v19, v5 0 0.00 | |
3337 v_add_nc_u32_e32 v2, -16, v2 0 0.00 | |
3338 s_waitcnt_depctr 0xffe3 0 0.00 | |
3339 s_clause 0x1 0 0.00 | |
3340 buffer_store_dword v7, v9, s[16:19], 0 offen glc 0 0.00 | |
3341 buffer_store_dwordx4 v[33:36], v2, s[16:19], 0 offen glc 0 0.00 | |
3342 v_min3_f32 v5, v33, v35, v48 0 0.00 | |
3343 v_min3_f32 v8, v34, v36, v42 0 0.00 | |
3344 v_max3_f32 v1, v33, v35, v107 0 0.00 | |
3345 v_max3_f32 v4, v34, v36, v106 0 0.00 | |
3346 v_mbcnt_lo_u32_b32 v2, s9, 0 0 0.00 | |
3347 s_and_saveexec_b32 s11, s11 0 0.00 | |
3348 s_cbranch_execz _L119 0 0.00 | |
3349 BBF0_116: | |
3350 s_bcnt1_i32_b32 s9, s9 0 0.00 | |
3351 v_mov_b32_e32 v3, s9 0 0.00 | |
3352 buffer_atomic_add v3, off, s[12:15], 0 offset:28 glc 0 0.00 | |
3353 _L119: | |
3354 s_waitcnt_depctr 0xffe3 0 0.00 | |
3355 s_mov_b32 exec_lo, s11 0 0.00 | |
3356 s_waitcnt vmcnt(0) 0 0.00 | |
3357 v_readlane_b32 s9, v3, s10 0 0.00 | |
3358 v_mul_f32_e32 v10, v13, v6 0 0.00 | |
3359 v_mul_f32_e32 v3, v14, v6 0 0.00 | |
3360 v_mul_f32_e32 v6, v13, v21 0 0.00 | |
3361 v_mac_f32_e32 v10, v11, v0 0 0.00 | |
3362 v_mac_f32_e32 v3, v12, v0 0 0.00 | |
3363 v_mac_f32_e32 v6, v11, v17 0 0.00 | |
3364 v_add_f32_e32 v9, v18, v10 0 0.00 | |
3365 v_add_f32_e32 v10, v19, v3 0 0.00 | |
3366 v_mul_f32_e32 v3, v14, v21 0 0.00 | |
3367 v_add_f32_e32 v11, v18, v6 0 0.00 | |
3368 v_add_nc_i32 v2, s9, v2 0 0.00 | |
3369 v_mac_f32_e32 v3, v12, v17 0 0.00 | |
3370 v_min3_f32 v48, v9, v11, v5 0 0.00 | |
3371 v_max3_f32 v107, v9, v11, v1 0 0.00 | |
3372 v_mul_lo_u32 v2, v2, 24 0 0.00 | |
3373 v_add_f32_e32 v12, v19, v3 0 0.00 | |
3374 v_min3_f32 v42, v10, v12, v8 0 0.00 | |
3375 v_max3_f32 v106, v10, v12, v4 0 0.00 | |
3376 s_waitcnt_depctr 0xffe3 0 0.00 | |
3377 buffer_store_dword v7, v2, s[16:19], 0 offen glc 0 0.00 | |
3378 buffer_store_dwordx4 v[9:12], v2, s[16:19], 0 offen offset:8 glc 0 0.00 | |
3379 _L116: | |
3380 s_waitcnt_depctr 0xffe3 0 0.00 | |
3381 s_mov_b32 exec_lo, s8 0 0.00 | |
3382 _L115: | |
3383 s_andn2_b32 exec_lo, s6, exec_lo 0 0.00 | |
3384 s_cbranch_execz _L120 0 0.00 | |
3385 BBF0_117: | |
3386 v_mul_f32_e32 v15, v5, v5 0 0.00 | |
3387 s_waitcnt lgkmcnt(0) 0 0.00 | |
3388 v_cmp_neq_f32_e64 s8, v5, 0 0 0.00 | |
3389 v_fma_mix_f32 v2, v6, v6, 0 op_sel_hi:[1, 1, 0] 0 0.00 | |
3390 v_mac_f32_e32 v15, v30, v30 0 0.00 | |
3391 v_sqrt_f32_e32 v15, v15 0 0.00 | |
3392 v_add_f32_e32 v17, v30, v15 0 0.00 | |
3393 v_ldexp_f32 v6, v15, 1 0 0.00 | |
3394 v_mul_f32_e32 v15, v17, v2 0 0.00 | |
3395 v_cmp_gt_f32_e32 vcc_lo, v15, v6 0 0.00 | |
3396 s_and_b32 vcc_lo, vcc_lo, s8 0 0.00 | |
3397 s_load_dwordx4 s[8:11], s[0:1], 0x80 0 0.00 | |
3398 s_andn1_saveexec_b32 s12, vcc_lo 0 0.00 | |
3399 s_cbranch_execz _L121 0 0.00 | |
3400 BBF0_118: | |
3401 s_ff1_i32_b32 s14, exec_lo 0 0.00 | |
3402 s_mov_b32 s13, exec_lo 0 0.00 | |
3403 s_lshl_b32 s15, 1, s14 0 0.00 | |
3404 s_and_b32 s15, s15, exec_lo 0 0.00 | |
3405 s_and_saveexec_b32 s15, s15 0 0.00 | |
3406 s_cbranch_execz _L122 0 0.00 | |
3407 BBF0_119: | |
3408 s_bcnt1_i32_b32 s16, s13 0 0.00 | |
3409 s_mulk_i32 s16, 0x2 0 0.00 | |
3410 v_mov_b32_e32 v2, s16 0 0.00 | |
3411 s_waitcnt lgkmcnt(0) 0 0.00 | |
3412 s_waitcnt_depctr 0xffe3 0 0.00 | |
3413 buffer_atomic_add v2, off, s[8:11], 0 offset:28 glc 0 0.00 | |
3414 _L122: | |
3415 s_waitcnt_depctr 0xffe3 0 0.00 | |
3416 s_mov_b32 exec_lo, s15 0 0.00 | |
3417 s_waitcnt vmcnt(0) 0 0.00 | |
3418 v_readlane_b32 s14, v2, s14 0 0.00 | |
3419 v_mbcnt_lo_u32_b32 v2, s13, 0 0 0.00 | |
3420 v_mul_lo_u32 v2, v2, 2 0 0.00 | |
3421 v_add_nc_i32 v6, s14, v2 0 0.00 | |
3422 _L121: | |
3423 s_andn2_b32 exec_lo, s12, exec_lo 0 0.00 | |
3424 s_cbranch_execz _L123 0 0.00 | |
3425 BBF0_120: | |
3426 s_ff1_i32_b32 s14, exec_lo 0 0.00 | |
3427 s_mov_b32 s13, exec_lo 0 0.00 | |
3428 s_lshl_b32 s15, 1, s14 0 0.00 | |
3429 s_and_b32 s15, s15, exec_lo 0 0.00 | |
3430 s_and_saveexec_b32 s15, s15 0 0.00 | |
3431 s_cbranch_execz _L124 0 0.00 | |
3432 BBF0_121: | |
3433 s_bcnt1_i32_b32 s16, s13 0 0.00 | |
3434 s_mulk_i32 s16, 0x3 0 0.00 | |
3435 v_mov_b32_e32 v2, s16 0 0.00 | |
3436 s_waitcnt lgkmcnt(0) 0 0.00 | |
3437 s_waitcnt_depctr 0xffe3 0 0.00 | |
3438 buffer_atomic_add v2, off, s[8:11], 0 offset:28 glc 0 0.00 | |
3439 _L124: | |
3440 s_waitcnt_depctr 0xffe3 0 0.00 | |
3441 s_mov_b32 exec_lo, s15 0 0.00 | |
3442 s_waitcnt lgkmcnt(0) 0 0.00 | |
3443 s_load_dwordx4 s[8:11], s[0:1], 0xa0 0 0.00 | |
3444 v_cmp_lt_f32_e32 vcc_lo, 0, v5 0 0.00 | |
3445 v_cndmask_b32_e32 v6, v39, v38, vcc_lo 0 0.00 | |
3446 v_cndmask_b32_e32 v17, v23, v0, vcc_lo 0 0.00 | |
3447 v_cndmask_b32_e32 v15, v37, v43, vcc_lo 0 0.00 | |
3448 v_cndmask_b32_e32 v10, v22, v8, vcc_lo 0 0.00 | |
3449 v_rcp_f32_e32 v5, v5 0 0.00 | |
3450 v_cndmask_b32_e32 v21, v37, v8, vcc_lo 0 0.00 | |
3451 v_sub_f32_e32 v6, v17, v6 0 0.00 | |
3452 v_cndmask_b32_e32 v20, v39, v0, vcc_lo 0 0.00 | |
3453 v_sub_f32_e32 v25, v10, v15 0 0.00 | |
3454 s_waitcnt vmcnt(0) 0 0.00 | |
3455 v_readlane_b32 s14, v2, s14 0 0.00 | |
3456 v_mul_f32_e32 v15, v14, v21 0 0.00 | |
3457 v_mul_f32_e32 v24, v33, v6 0 0.00 | |
3458 v_mbcnt_lo_u32_b32 v2, s13, 0 0 0.00 | |
3459 v_mul_f32_e32 v6, v13, v21 0 0.00 | |
3460 v_mac_f32_e32 v15, v12, v20 0 0.00 | |
3461 v_mad_f32 v24, v35, v25, -v24 0 0.00 | |
3462 v_mul_lo_u32 v2, v2, 3 0 0.00 | |
3463 v_mul_f32_e32 v27, v24, v5 0 0.00 | |
3464 v_add_f32_e32 v29, v19, v15 0 0.00 | |
3465 v_mac_f32_e32 v6, v11, v20 0 0.00 | |
3466 v_add_nc_i32 v2, s14, v2 0 0.00 | |
3467 v_mad_f32 v10, -v9, v27, v10 0 0.00 | |
3468 v_mad_f32 v17, -v26, v27, v17 0 0.00 | |
3469 v_add_f32_e32 v28, v18, v6 0 0.00 | |
3470 v_mul_lo_u32 v6, v2, 24 0 0.00 | |
3471 v_mul_f32_e32 v24, v13, v10 0 0.00 | |
3472 v_mul_f32_e32 v15, v14, v10 0 0.00 | |
3473 v_cndmask_b32_e32 v8, v8, v10, vcc_lo 0 0.00 | |
3474 v_cndmask_b32_e32 v39, v17, v39, vcc_lo 0 0.00 | |
3475 v_cndmask_b32_e32 v37, v10, v37, vcc_lo 0 0.00 | |
3476 v_mac_f32_e32 v24, v11, v17 0 0.00 | |
3477 v_mac_f32_e32 v15, v12, v17 0 0.00 | |
3478 v_cndmask_b32_e32 v0, v0, v17, vcc_lo 0 0.00 | |
3479 v_add_f32_e32 v30, v18, v24 0 0.00 | |
3480 v_add_f32_e32 v31, v19, v15 0 0.00 | |
3481 s_waitcnt lgkmcnt(0) 0 0.00 | |
3482 s_waitcnt_depctr 0xffe3 0 0.00 | |
3483 s_clause 0x1 0 0.00 | |
3484 buffer_store_dword v7, v6, s[8:11], 0 offen glc 0 0.00 | |
3485 buffer_store_dwordx4 v[28:31], v6, s[8:11], 0 offen offset:8 glc 0 0.00 | |
3486 v_min3_f32 v48, v28, v30, v48 0 0.00 | |
3487 v_min3_f32 v42, v29, v31, v42 0 0.00 | |
3488 v_max3_f32 v107, v28, v30, v107 0 0.00 | |
3489 v_max3_f32 v106, v29, v31, v106 0 0.00 | |
3490 v_add_nc_u32_e32 v6, 1, v2 0 0.00 | |
3491 _L123: | |
3492 s_mov_b32 exec_lo, s12 0 0.00 | |
3493 s_waitcnt lgkmcnt(0) 0 0.00 | |
3494 s_load_dwordx4 s[8:11], s[0:1], 0xa0 0 0.00 | |
3495 v_mul_f32_e32 v17, v14, v22 0 0.00 | |
3496 v_mul_f32_e32 v10, v13, v37 0 0.00 | |
3497 v_mul_f32_e32 v1, v14, v37 0 0.00 | |
3498 v_mul_f32_e32 v16, v13, v22 0 0.00 | |
3499 v_mul_f32_e32 v22, v13, v8 0 0.00 | |
3500 v_mac_f32_e32 v17, v12, v23 0 0.00 | |
3501 v_mac_f32_e32 v10, v11, v39 0 0.00 | |
3502 v_mac_f32_e32 v1, v12, v39 0 0.00 | |
3503 v_mac_f32_e32 v16, v11, v23 0 0.00 | |
3504 v_mac_f32_e32 v22, v11, v0 0 0.00 | |
3505 v_add_f32_e32 v26, v19, v17 0 0.00 | |
3506 v_mul_f32_e32 v17, v14, v8 0 0.00 | |
3507 v_mul_f32_e32 v8, v13, v43 0 0.00 | |
3508 v_mul_f32_e32 v13, v14, v43 0 0.00 | |
3509 v_add_f32_e32 v25, v18, v16 0 0.00 | |
3510 v_add_f32_e32 v24, v19, v1 0 0.00 | |
3511 v_mac_f32_e32 v17, v12, v0 0 0.00 | |
3512 v_mac_f32_e32 v8, v11, v38 0 0.00 | |
3513 v_mac_f32_e32 v13, v12, v38 0 0.00 | |
3514 v_add_f32_e32 v23, v18, v10 0 0.00 | |
3515 v_mul_lo_u32 v6, v6, 24 0 0.00 | |
3516 v_add_f32_e32 v10, v18, v22 0 0.00 | |
3517 v_add_f32_e32 v11, v19, v17 0 0.00 | |
3518 v_add_f32_e32 v12, v18, v8 0 0.00 | |
3519 v_add_f32_e32 v13, v19, v13 0 0.00 | |
3520 v_min3_f32 v9, v23, v25, v48 0 0.00 | |
3521 v_min3_f32 v4, v24, v26, v42 0 0.00 | |
3522 v_max3_f32 v5, v23, v25, v107 0 0.00 | |
3523 v_max3_f32 v8, v24, v26, v106 0 0.00 | |
3524 s_waitcnt lgkmcnt(0) 0 0.00 | |
3525 s_waitcnt_depctr 0xffe3 0 0.00 | |
3526 s_clause 0x3 0 0.00 | |
3527 buffer_store_dword v7, v6, s[8:11], 0 offen glc 0 0.00 | |
3528 buffer_store_dwordx4 v[23:26], v6, s[8:11], 0 offen offset:8 glc 0 0.00 | |
3529 buffer_store_dword v7, v6, s[8:11], 0 offen offset:24 glc 0 0.00 | |
3530 buffer_store_dwordx4 v[10:13], v6, s[8:11], 0 offen offset:32 glc 0 0.00 | |
3531 v_min3_f32 v48, v12, v10, v9 0 0.00 | |
3532 v_max3_f32 v106, v13, v11, v8 0 0.00 | |
3533 v_min3_f32 v42, v13, v11, v4 0 0.00 | |
3534 v_max3_f32 v107, v12, v10, v5 0 0.00 | |
3535 _L120: | |
3536 s_mov_b32 exec_lo, s6 0 0.00 | |
3537 _L114: | |
3538 s_andn2_b32 exec_lo, s3, exec_lo 0 0.00 | |
3539 s_cbranch_execz _L113 0 0.00 | |
3540 BBF0_122: | |
3541 s_waitcnt lgkmcnt(0) 0 0.00 | |
3542 s_ff1_i32_b32 s8, exec_lo 0 0.00 | |
3543 s_mov_b32 s6, exec_lo 0 0.00 | |
3544 s_lshl_b32 s9, 1, s8 0 0.00 | |
3545 s_and_b32 s9, s9, exec_lo 0 0.00 | |
3546 s_and_saveexec_b32 s9, s9 0 0.00 | |
3547 s_cbranch_execz _L125 0 0.00 | |
3548 BBF0_123: | |
3549 s_load_dwordx4 s[12:15], s[0:1], 0x80 0 0.00 | |
3550 s_bcnt1_i32_b32 s10, s6 0 0.00 | |
3551 s_mulk_i32 s10, 0x2 0 0.00 | |
3552 v_mov_b32_e32 v2, s10 0 0.00 | |
3553 s_waitcnt lgkmcnt(0) 0 0.00 | |
3554 s_waitcnt_depctr 0xffe3 0 0.00 | |
3555 buffer_atomic_add v2, off, s[12:15], 0 offset:28 glc 0 0.00 | |
3556 _L125: | |
3557 s_waitcnt_depctr 0xffe3 0 0.00 | |
3558 s_mov_b32 exec_lo, s9 0 0.00 | |
3559 s_load_dwordx4 s[12:15], s[0:1], 0xa0 0 0.00 | |
3560 s_waitcnt vmcnt(0) 0 0.00 | |
3561 v_readlane_b32 s8, v2, s8 0 0.00 | |
3562 v_mul_f32_e32 v6, v13, v37 0 0.00 | |
3563 v_mbcnt_lo_u32_b32 v2, s6, 0 0 0.00 | |
3564 v_mul_f32_e32 v10, v13, v22 0 0.00 | |
3565 v_mul_f32_e32 v9, v14, v22 0 0.00 | |
3566 v_mul_f32_e32 v5, v14, v37 0 0.00 | |
3567 v_mac_f32_e32 v6, v11, v39 0 0.00 | |
3568 v_mul_lo_u32 v2, v2, 2 0 0.00 | |
3569 v_mul_f32_e32 v22, v13, v8 0 0.00 | |
3570 v_mac_f32_e32 v9, v12, v23 0 0.00 | |
3571 v_mac_f32_e32 v10, v11, v23 0 0.00 | |
3572 v_add_f32_e32 v23, v18, v6 0 0.00 | |
3573 v_mul_f32_e32 v17, v14, v8 0 0.00 | |
3574 v_mul_f32_e32 v6, v13, v43 0 0.00 | |
3575 v_mul_f32_e32 v13, v14, v43 0 0.00 | |
3576 v_mac_f32_e32 v5, v12, v39 0 0.00 | |
3577 v_add_nc_i32 v2, s8, v2 0 0.00 | |
3578 v_mac_f32_e32 v22, v11, v0 0 0.00 | |
3579 v_mac_f32_e32 v17, v12, v0 0 0.00 | |
3580 v_mac_f32_e32 v6, v11, v38 0 0.00 | |
3581 v_mac_f32_e32 v13, v12, v38 0 0.00 | |
3582 v_add_f32_e32 v26, v19, v9 0 0.00 | |
3583 v_add_f32_e32 v25, v18, v10 0 0.00 | |
3584 v_add_f32_e32 v24, v19, v5 0 0.00 | |
3585 v_mul_lo_u32 v2, v2, 24 0 0.00 | |
3586 v_add_f32_e32 v9, v18, v22 0 0.00 | |
3587 v_add_f32_e32 v10, v19, v17 0 0.00 | |
3588 v_add_f32_e32 v11, v18, v6 0 0.00 | |
3589 v_add_f32_e32 v12, v19, v13 0 0.00 | |
3590 v_min3_f32 v3, v23, v25, v48 0 0.00 | |
3591 v_min3_f32 v13, v24, v26, v42 0 0.00 | |
3592 v_max3_f32 v8, v23, v25, v107 0 0.00 | |
3593 v_max3_f32 v1, v24, v26, v106 0 0.00 | |
3594 s_waitcnt lgkmcnt(0) 0 0.00 | |
3595 s_waitcnt_depctr 0xffe3 0 0.00 | |
3596 s_clause 0x3 0 0.00 | |
3597 buffer_store_dword v7, v2, s[12:15], 0 offen glc 0 0.00 | |
3598 buffer_store_dwordx4 v[23:26], v2, s[12:15], 0 offen offset:8 glc 0 0.00 | |
3599 buffer_store_dword v7, v2, s[12:15], 0 offen offset:24 glc 0 0.00 | |
3600 buffer_store_dwordx4 v[9:12], v2, s[12:15], 0 offen offset:32 glc 0 0.00 | |
3601 v_min3_f32 v48, v11, v9, v3 0 0.00 | |
3602 v_max3_f32 v106, v12, v10, v1 0 0.00 | |
3603 v_min3_f32 v42, v12, v10, v13 0 0.00 | |
3604 v_max3_f32 v107, v11, v9, v8 0 0.00 | |
3605 _L113: | |
3606 s_mov_b32 exec_lo, s2 0 0.00 | |
3607 _L32: | |
3608 s_andn2_b32 exec_lo, s4, exec_lo 0 0.00 | |
3609 s_cbranch_execz _L31 0 0.00 | |
3610 BBF0_124: | |
3611 v_cmp_eq_i32_e32 vcc_lo, 1, v9 0 0.00 | |
3612 s_and_saveexec_b32 s2, vcc_lo 0 0.00 | |
3613 v_mov_b32_e32 v106, 0xf2fc6f7c 0 0.00 | |
3614 v_mov_b32_e32 v107, 0xf2fc6f7c 0 0.00 | |
3615 v_mov_b32_e32 v42, 0x72fc6f7c 0 0.00 | |
3616 v_mov_b32_e32 v48, 0x72fc6f7c 0 0.00 | |
3617 s_andn2_b32 exec_lo, s2, exec_lo 0 0.00 | |
3618 s_cbranch_execz _L31 0 0.00 | |
3619 BBF0_125: | |
3620 v_subrev_f32_e32 v9, v23, v109 0 0.00 | |
3621 v_subrev_f32_e32 v3, v22, v110 0 0.00 | |
3622 v_subrev_f32_e32 v10, v23, v4 0 0.00 | |
3623 v_subrev_f32_e32 v1, v22, v108 0 0.00 | |
3624 v_subrev_f32_e32 v2, v23, v111 0 0.00 | |
3625 v_mul_f32_e32 v17, v9, v9 0 0.00 | |
3626 v_mul_f32_e32 v0, v10, v10 0 0.00 | |
3627 v_mac_f32_e32 v17, v3, v3 0 0.00 | |
3628 v_mac_f32_e32 v0, v1, v1 0 0.00 | |
3629 v_cmp_gt_f32_e64 s3, v17, 0x2b8cbccc 0 0.00 | |
3630 v_cmp_lt_f32_e32 vcc_lo, 0x2b8cbccc, v0 0 0.00 | |
3631 v_cndmask_b32_e64 v9, v2, v9, s3 0 0.00 | |
3632 v_subrev_f32_e32 v2, v22, v15 0 0.00 | |
3633 v_cndmask_b32_e64 v0, v2, v3, s3 0 0.00 | |
3634 v_cndmask_b32_e32 v2, v9, v10, vcc_lo 0 0.00 | |
3635 v_cndmask_b32_e32 v10, v0, v1, vcc_lo 0 0.00 | |
3636 v_mul_f32_e32 v0, v2, v2 0 0.00 | |
3637 v_mac_f32_e32 v0, v10, v10 0 0.00 | |
3638 v_rsq_f32_e32 v1, v0 0 0.00 | |
3639 v_lshrrev_b32_e32 v0, 2, v6 0 0.00 | |
3640 v_and_b32_e32 v0, 0x3000000, v0 0 0.00 | |
3641 v_mul_f32_e32 v2, v2, v1 0 0.00 | |
3642 v_mul_f32_e32 v3, v10, v1 0 0.00 | |
3643 v_cmp_eq_i32_e32 vcc_lo, 0x2000000, v0 0 0.00 | |
3644 v_mul_f32_e64 v1, v5, v2 div:2 0 0.00 | |
3645 v_mul_f32_e64 v32, v5, v3 div:2 0 0.00 | |
3646 v_subrev_f32_e32 v5, v1, v22 0 0.00 | |
3647 v_add_f32_e32 v15, v32, v23 0 0.00 | |
3648 v_subrev_f32_e32 v27, v32, v23 0 0.00 | |
3649 v_add_f32_e32 v6, v1, v22 0 0.00 | |
3650 s_andn1_saveexec_b32 s3, vcc_lo 0 0.00 | |
3651 s_cbranch_execz _L126 0 0.00 | |
3652 BBF0_126: | |
3653 s_waitcnt lgkmcnt(0) 0 0.00 | |
3654 s_load_dwordx4 s[8:11], s[0:1], 0x80 0 0.00 | |
3655 v_cmp_eq_i32_e32 vcc_lo, 0x1000000, v0 0 0.00 | |
3656 v_cndmask_b32_e64 v0, 1, 3, vcc_lo 0 0.00 | |
3657 s_waitcnt lgkmcnt(0) 0 0.00 | |
3658 s_waitcnt_depctr 0xffe3 0 0.00 | |
3659 buffer_atomic_add v0, off, s[8:11], 0 offset:28 glc 0 0.00 | |
3660 s_waitcnt_depctr 0xffe3 0 0.00 | |
3661 s_and_saveexec_b32 s6, vcc_lo 0 0.00 | |
3662 s_cbranch_execz _L127 0 0.00 | |
3663 BBF0_127: | |
3664 s_load_dwordx4 s[8:11], s[0:1], 0xa0 0 0.00 | |
3665 v_mad_f32 v1, -v20, v3, v5 0 0.00 | |
3666 v_mad_f32 v4, -v20, v2, v15 0 0.00 | |
3667 v_mul_f32_e32 v8, v13, v5 0 0.00 | |
3668 v_mad_f32 v3, -v20, v3, v6 0 0.00 | |
3669 v_mul_f32_e32 v5, v14, v5 0 0.00 | |
3670 v_mul_f32_e32 v9, v14, v1 0 0.00 | |
3671 v_mul_f32_e32 v10, v13, v1 0 0.00 | |
3672 v_mac_f32_e32 v8, v11, v15 0 0.00 | |
3673 v_mad_f32 v17, -v20, v2, v27 0 0.00 | |
3674 v_mul_f32_e32 v2, v13, v3 0 0.00 | |
3675 v_mac_f32_e32 v9, v12, v4 0 0.00 | |
3676 v_mul_f32_e32 v24, v13, v6 0 0.00 | |
3677 v_add_f32_e32 v33, v18, v8 0 0.00 | |
3678 v_mul_f32_e32 v8, v14, v3 0 0.00 | |
3679 v_mac_f32_e32 v10, v11, v4 0 0.00 | |
3680 v_add_f32_e32 v36, v19, v9 0 0.00 | |
3681 v_mul_f32_e32 v9, v14, v6 0 0.00 | |
3682 v_mac_f32_e32 v5, v12, v15 0 0.00 | |
3683 v_mac_f32_e32 v2, v11, v17 0 0.00 | |
3684 v_mac_f32_e32 v8, v12, v17 0 0.00 | |
3685 v_mac_f32_e32 v24, v11, v27 0 0.00 | |
3686 v_mac_f32_e32 v9, v12, v27 0 0.00 | |
3687 v_add_f32_e32 v35, v18, v10 0 0.00 | |
3688 v_add_f32_e32 v34, v19, v5 0 0.00 | |
3689 s_waitcnt vmcnt(0) 0 0.00 | |
3690 v_mul_lo_u32 v16, v0, 24 0 0.00 | |
3691 v_add_f32_e32 v29, v18, v2 0 0.00 | |
3692 v_add_f32_e32 v30, v19, v8 0 0.00 | |
3693 v_add_f32_e32 v31, v18, v24 0 0.00 | |
3694 v_add_f32_e32 v32, v19, v9 0 0.00 | |
3695 v_min3_f32 v9, v33, v35, 0x72fc6f7c 0 0.00 | |
3696 v_min3_f32 v6, v34, v36, 0x72fc6f7c 0 0.00 | |
3697 v_max3_f32 v24, v33, v35, 0xf2fc6f7c 0 0.00 | |
3698 v_max3_f32 v23, v34, v36, 0xf2fc6f7c 0 0.00 | |
3699 s_waitcnt lgkmcnt(0) 0 0.00 | |
3700 s_clause 0x3 0 0.00 | |
3701 buffer_store_dword v7, v16, s[8:11], 0 offen offset:24 glc 0 0.00 | |
3702 buffer_store_dwordx4 v[33:36], v16, s[8:11], 0 offen offset:32 glc 0 0.00 | |
3703 buffer_store_dword v7, v16, s[8:11], 0 offen offset:48 glc 0 0.00 | |
3704 buffer_store_dwordx4 v[29:32], v16, s[8:11], 0 offen offset:56 glc 0 0.00 | |
3705 v_min3_f32 v22, v31, v29, v9 0 0.00 | |
3706 v_max3_f32 v20, v32, v30, v23 0 0.00 | |
3707 v_min3_f32 v21, v32, v30, v6 0 0.00 | |
3708 v_max3_f32 v2, v31, v29, v24 0 0.00 | |
3709 _L127: | |
3710 s_andn2_b32 exec_lo, s6, exec_lo 0 0.00 | |
3711 s_cbranch_execz _L128 0 0.00 | |
3712 BBF0_128: | |
3713 v_mov_b32_e32 v20, 0xf2fc6f7c 0 0.00 | |
3714 v_mov_b32_e32 v2, 0xf2fc6f7c 0 0.00 | |
3715 v_mov_b32_e32 v21, 0x72fc6f7c 0 0.00 | |
3716 v_mov_b32_e32 v22, 0x72fc6f7c 0 0.00 | |
3717 v_mov_b32_e32 v3, v6 0 0.00 | |
3718 v_mov_b32_e32 v17, v27 0 0.00 | |
3719 v_mov_b32_e32 v1, v5 0 0.00 | |
3720 v_mov_b32_e32 v4, v15 0 0.00 | |
3721 _L128: | |
3722 s_mov_b32 exec_lo, s6 0 0.00 | |
3723 s_load_dwordx4 s[8:11], s[0:1], 0xa0 0 0.00 | |
3724 s_waitcnt vmcnt(0) 0 0.00 | |
3725 v_mul_lo_u32 v9, v0, 24 0 0.00 | |
3726 v_mul_f32_e32 v0, v13, v1 0 0.00 | |
3727 v_mul_f32_e32 v15, v14, v1 0 0.00 | |
3728 v_mul_f32_e32 v8, v13, v3 0 0.00 | |
3729 v_mul_f32_e32 v5, v14, v3 0 0.00 | |
3730 v_mac_f32_e32 v0, v11, v4 0 0.00 | |
3731 v_mac_f32_e32 v15, v12, v4 0 0.00 | |
3732 v_mac_f32_e32 v8, v11, v17 0 0.00 | |
3733 v_mac_f32_e32 v5, v12, v17 0 0.00 | |
3734 v_add_f32_e32 v14, v18, v0 0 0.00 | |
3735 v_add_f32_e32 v15, v19, v15 0 0.00 | |
3736 v_add_f32_e32 v16, v18, v8 0 0.00 | |
3737 v_add_f32_e32 v17, v19, v5 0 0.00 | |
3738 s_waitcnt lgkmcnt(0) 0 0.00 | |
3739 s_waitcnt_depctr 0xffe3 0 0.00 | |
3740 s_clause 0x1 0 0.00 | |
3741 buffer_store_dword v7, v9, s[8:11], 0 offen glc 0 0.00 | |
3742 buffer_store_dwordx4 v[14:17], v9, s[8:11], 0 offen offset:8 glc 0 0.00 | |
3743 v_min3_f32 v48, v14, v16, v22 0 0.00 | |
3744 v_min3_f32 v42, v15, v17, v21 0 0.00 | |
3745 v_max3_f32 v107, v14, v16, v2 0 0.00 | |
3746 v_max3_f32 v106, v15, v17, v20 0 0.00 | |
3747 _L126: | |
3748 s_andn2_b32 exec_lo, s3, exec_lo 0 0.00 | |
3749 s_cbranch_execz _L31 0 0.00 | |
3750 BBF0_129: | |
3751 s_waitcnt lgkmcnt(0) 0 0.00 | |
3752 s_load_dwordx4 s[8:11], s[0:1], 0x80 0 0.00 | |
3753 s_load_dwordx4 s[12:15], s[0:1], 0xa0 0 0.00 | |
3754 v_mul_f32_e32 v0, v13, v5 0 0.00 | |
3755 v_mul_f32_e32 v2, v13, v22 0 0.00 | |
3756 v_mul_f32_e32 v5, v14, v5 0 0.00 | |
3757 v_mul_f32_e32 v9, v14, v22 0 0.00 | |
3758 s_mov_b32 s6, 0xbc996e30 0 0.00 | |
3759 v_mac_f32_e32 v0, v11, v15 0 0.00 | |
3760 v_mac_f32_e32 v2, v11, v23 0 0.00 | |
3761 v_mac_f32_e32 v5, v12, v15 0 0.00 | |
3762 v_mac_f32_e32 v9, v12, v23 0 0.00 | |
3763 v_add_f32_e32 v33, v18, v0 0 0.00 | |
3764 v_add_f32_e32 v2, v18, v2 0 0.00 | |
3765 v_add_f32_e32 v34, v19, v5 0 0.00 | |
3766 v_add_f32_e32 v3, v19, v9 0 0.00 | |
3767 v_subrev_f32_e32 v2, v2, v33 0 0.00 | |
3768 v_subrev_f32_e32 v9, v3, v34 0 0.00 | |
3769 v_mul_f32_e32 v2, v2, v2 0 0.00 | |
3770 v_mac_f32_e32 v2, v9, v9 0 0.00 | |
3771 v_sqrt_f32_e32 v2, v2 0 0.00 | |
3772 v_max_f32_e32 v2, 0x3e800000, v2 0 0.00 | |
3773 v_rcp_f32_e32 v2, v2 0 0.00 | |
3774 v_mad_f32 v2, 0xbe800000, v2, 1.0 0 0.00 | |
3775 v_sub_f32_e32 v3, 1.0, v2 0 0.00 | |
3776 v_madak_f32 v8, s6, v2, 0x3d981627 0 0.00 | |
3777 v_sqrt_f32_e32 v3, v3 0 0.00 | |
3778 v_madak_f32 v8, v8, v2, 0xbe593484 0 0.00 | |
3779 v_madak_f32 v2, v8, v2, 0x3fc90da4 0 0.00 | |
3780 v_mul_f32_e64 v2, v2, v3 mul:2 0 0.00 | |
3781 v_max_f32_e32 v2, 0x38d1b717, v2 0 0.00 | |
3782 v_rcp_f32_e32 v3, v2 0 0.00 | |
3783 v_mul_f32_e32 v2, 0.15915494, v2 0 0.00 | |
3784 v_sin_f32_e32 v8, v2 0 0.00 | |
3785 v_mul_f32_e32 v3, 0x40490fdb, v3 0 0.00 | |
3786 v_cos_f32_e32 v16, v2 0 0.00 | |
3787 v_ceil_f32_e32 v3, v3 0 0.00 | |
3788 v_cvt_u32_f32_e32 v3, v3 0 0.00 | |
3789 v_mov_b32_e32 v2, v3 0 0.00 | |
3790 s_waitcnt lgkmcnt(0) 0 0.00 | |
3791 s_waitcnt_depctr 0xffe3 0 0.00 | |
3792 buffer_atomic_add v2, off, s[8:11], 0 offset:28 glc 0 0.00 | |
3793 s_waitcnt_depctr 0xffe3 0 0.00 | |
3794 s_movk_i32 s9, 0xffff 0 0.00 | |
3795 s_mov_b32 s6, exec_lo 0 0.00 | |
3796 s_mov_b32 s8, exec_lo 0 0.00 | |
3797 v_mov_b32_e32 v0, 0 0 0.00 | |
3798 v_mov_b32_e32 v17, 0xf2fc6f7c 0 0.00 | |
3799 v_mov_b32_e32 v20, 0xf2fc6f7c 0 0.00 | |
3800 v_mov_b32_e32 v21, 0x72fc6f7c 0 0.00 | |
3801 v_mov_b32_e32 v24, 0x72fc6f7c 0 0.00 | |
3802 v_add_nc_u32_e32 v9, -1, v3 0 0.00 | |
3803 v_xor_b32_e32 v15, 0x80000000, v1 0 0.00 | |
3804 s_nop 0 0 0.00 | |
3805 s_nop 0 0 0.00 | |
3806 s_nop 0 0 0.00 | |
3807 s_nop 0 0 0.00 | |
3808 s_nop 0 0 0.00 | |
3809 s_nop 0 0 0.00 | |
3810 s_nop 0 0 0.00 | |
3811 _L130: | |
3812 v_cmp_eq_i32_e64 s9, s9, 0 0 0.00 | |
3813 v_add_co_ci_u32_e64 v10, vcc_lo, v0, 0, s9 0 0.00 | |
3814 v_cmp_gt_u32_e32 vcc_lo, v9, v10 0 0.00 | |
3815 s_and_saveexec_b32 s10, vcc_lo 0 0.00 | |
3816 s_andn2_b32 exec_lo, s10, exec_lo 0 0.00 | |
3817 s_andn2_b32 s8, s8, exec_lo 0 0.00 | |
3818 s_cbranch_scc0 _L129 0 0.00 | |
3819 BBF0_130: | |
3820 s_and_b32 exec_lo, s10, s8 0 0.00 | |
3821 v_mul_f32_e64 v25, -v8, v32 0 0.00 | |
3822 v_mul_f32_e32 v1, v16, v32 0 0.00 | |
3823 s_waitcnt vmcnt(0) 0 0.00 | |
3824 v_add_co_ci_u32_e64 v0, vcc_lo, v0, v2, s9 0 0.00 | |
3825 s_movk_i32 s9, 0x0 0 0.00 | |
3826 v_mac_f32_e32 v25, v15, v16 0 0.00 | |
3827 v_mad_f32 v32, v15, v8, v1 0 0.00 | |
3828 v_mul_lo_u32 v0, v0, 24 0 0.00 | |
3829 v_add_f32_e32 v15, v22, v25 0 0.00 | |
3830 v_add_f32_e32 v26, v23, v32 0 0.00 | |
3831 v_mul_f32_e32 v28, v13, v15 0 0.00 | |
3832 v_mul_f32_e32 v29, v14, v15 0 0.00 | |
3833 v_mov_b32_e32 v15, v25 0 0.00 | |
3834 v_mac_f32_e32 v28, v11, v26 0 0.00 | |
3835 v_mac_f32_e32 v29, v12, v26 0 0.00 | |
3836 v_add_f32_e32 v30, v18, v28 0 0.00 | |
3837 v_add_f32_e32 v31, v19, v29 0 0.00 | |
3838 v_mov_b32_e32 v28, v33 0 0.00 | |
3839 v_mov_b32_e32 v29, v34 0 0.00 | |
3840 s_waitcnt_depctr 0xffe3 0 0.00 | |
3841 s_clause 0x1 0 0.00 | |
3842 buffer_store_dword v7, v0, s[12:15], 0 offen glc 0 0.00 | |
3843 buffer_store_dwordx4 v[28:31], v0, s[12:15], 0 offen offset:8 glc 0 0.00 | |
3844 v_mov_b32_e32 v0, v10 0 0.00 | |
3845 v_min3_f32 v24, v33, v30, v24 0 0.00 | |
3846 v_min3_f32 v21, v34, v31, v21 0 0.00 | |
3847 v_max3_f32 v20, v33, v30, v20 0 0.00 | |
3848 v_max3_f32 v17, v34, v31, v17 0 0.00 | |
3849 v_mov_b32_e32 v34, v31 0 0.00 | |
3850 v_mov_b32_e32 v33, v30 0 0.00 | |
3851 s_branch _L130 0 0.00 | |
3852 _L129: | |
3853 s_mov_b32 exec_lo, s6 0 0.00 | |
3854 v_mul_f32_e32 v0, v13, v6 0 0.00 | |
3855 v_mul_f32_e32 v1, v14, v6 0 0.00 | |
3856 v_mac_f32_e32 v0, v11, v27 0 0.00 | |
3857 v_mac_f32_e32 v1, v12, v27 0 0.00 | |
3858 v_add_f32_e32 v35, v18, v0 0 0.00 | |
3859 s_waitcnt vmcnt(0) 0 0.00 | |
3860 v_add_nc_u32_e32 v0, v3, v2 0 0.00 | |
3861 v_add_f32_e32 v36, v19, v1 0 0.00 | |
3862 v_min3_f32 v48, v33, v35, v24 0 0.00 | |
3863 v_mul_lo_u32 v0, v0, 24 0 0.00 | |
3864 v_min3_f32 v42, v34, v36, v21 0 0.00 | |
3865 v_max3_f32 v107, v33, v35, v20 0 0.00 | |
3866 v_max3_f32 v106, v34, v36, v17 0 0.00 | |
3867 v_add_nc_u32_e32 v9, 0xffffffe8, v0 0 0.00 | |
3868 v_add_nc_u32_e32 v10, -16, v0 0 0.00 | |
3869 s_waitcnt_depctr 0xffe3 0 0.00 | |
3870 buffer_store_dword v7, v9, s[12:15], 0 offen glc 0 0.00 | |
3871 buffer_store_dwordx4 v[33:36], v10, s[12:15], 0 offen glc 0 0.00 | |
3872 _L31: | |
3873 s_waitcnt_depctr 0xffe3 4 0.75 132 | |
3874 s_mov_b32 exec_lo, s7 4 0.05 9 | |
3875 v_cmp_gt_f32_sdwa s2, v107, v48 src0_sel:DWORD src1_sel:DWORD 4 0.01 1 | |
3876 v_cmp_gt_f32_e32 vcc_lo, v106, v42 4 0.09 16 | |
3877 s_or_b32 vcc_lo, s2, vcc_lo 4 0.01 2 | |
3878 s_and_saveexec_b32 s2, vcc_lo 4 0.05 9 | |
3879 s_cbranch_execz _L1 4 0.01 1 | |
3880 BBF0_131: | |
3881 s_waitcnt lgkmcnt(0) 4 0.02 4 | |
3882 s_load_dwordx4 s[8:11], s[0:1], 0x60 4 0.01 1 | |
3883 v_floor_f32_e32 v3, v48 4 0.01 1 | |
3884 v_floor_f32_e32 v2, v42 4 0.01 1 | |
3885 v_mul_lo_u32 v4, v7, 24 4 0.02 4 | |
3886 v_ceil_f32_e32 v1, v107 4 0.02 3 | |
3887 v_cvt_i32_f32_e32 v2, v2 4 0.01 1 | |
3888 v_cvt_i32_f32_e32 v3, v3 4 0.01 1 | |
3889 v_ceil_f32_e32 v0, v106 4 0.01 1 | |
3890 v_cvt_i32_f32_e32 v1, v1 4 0.01 1 | |
3891 s_waitcnt lgkmcnt(0) 4 0.10 18 | |
3892 s_waitcnt_depctr 0xffe3 4 0.01 2 | |
3893 buffer_atomic_smin v3, v4, s[8:11], 0 offen 4 0.01 1 | |
3894 buffer_atomic_smin v2, v4, s[8:11], 0 offen offset:4 4 0.01 1 | |
3895 buffer_atomic_smax v1, v4, s[8:11], 0 offen offset:8 4 0.10 18 | |
3896 v_cvt_i32_f32_e32 v1, v0 4 0.08 15 | |
3897 buffer_atomic_smax v1, v4, s[8:11], 0 offen offset:12 4 0.01 1 | |
3898 _L1: | |
3899 s_endpgm 4 38.24 6772 | |
3900 s_code_end 0 0.00 | |
3901 s_code_end 0 0.00 | |
3902 s_code_end 0 0.00 | |
3903 s_code_end 0 0.00 | |
3904 s_code_end 0 0.00 | |
3905 s_code_end 0 0.00 | |
3906 s_code_end 0 0.00 | |
3907 s_code_end 0 0.00 | |
3908 s_code_end 0 0.00 | |
3909 s_code_end 0 0.00 | |
3910 s_code_end 0 0.00 | |
3911 s_code_end 0 0.00 | |
3912 s_code_end 0 0.00 | |
3913 s_code_end 0 0.00 | |
3914 s_code_end 0 0.00 | |
3915 s_code_end 0 0.00 | |
3916 s_code_end 0 0.00 | |
3917 s_code_end 0 0.00 | |
3918 s_code_end 0 0.00 | |
3919 s_code_end 0 0.00 | |
3920 s_code_end 0 0.00 | |
3921 s_code_end 0 0.00 | |
3922 s_code_end 0 0.00 | |
3923 s_code_end 0 0.00 | |
3924 s_code_end 0 0.00 | |
3925 s_code_end 0 0.00 | |
3926 s_code_end 0 0.00 | |
3927 s_code_end 0 0.00 | |
3928 s_code_end 0 0.00 | |
3929 s_code_end 0 0.00 | |
3930 s_code_end 0 0.00 | |
3931 s_code_end 0 0.00 | |
3932 s_code_end 0 0.00 | |
3933 s_code_end 0 0.00 | |
3934 s_code_end 0 0.00 | |
3935 s_code_end 0 0.00 | |
3936 s_code_end 0 0.00 | |
3937 s_code_end 0 0.00 | |
3938 s_code_end 0 0.00 | |
3939 s_code_end 0 0.00 | |
3940 s_code_end 0 0.00 | |
3941 s_code_end 0 0.00 | |
3942 s_code_end 0 0.00 | |
3943 s_code_end 0 0.00 | |
3944 s_code_end 0 0.00 | |
3945 s_code_end 0 0.00 | |
3946 s_code_end 0 0.00 | |
3947 s_code_end 0 0.00 | |
3948 s_code_end 0 0.00 | |
3949 s_code_end 0 0.00 | |
3950 s_code_end 0 0.00 | |
3951 s_code_end 0 0.00 | |
3952 s_code_end 0 0.00 | |
3953 s_code_end 0 0.00 | |
3954 s_code_end 0 0.00 | |
3955 s_code_end 0 0.00 | |
3956 s_code_end 0 0.00 | |
3957 s_code_end 0 0.00 | |
3958 s_code_end 0 0.00 | |
3959 s_code_end 0 0.00 | |
3960 s_code_end 0 0.00 | |
3961 s_code_end 0 0.00 | |
3962 s_code_end 0 0.00 | |
3963 s_code_end 0 0.00 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment