Created
February 14, 2018 17:04
-
-
Save Groovounet/01aa31a61ea81f9f3009a9e5ec7cddb1 to your computer and use it in GitHub Desktop.
Independent texture fetches with ASM
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#version 430 core | |
#define FETCH_COUNT 16 | |
#define FRAG_COLOR 0 | |
#define NORMALIZE_COORD float(1.0 / 2048.0) | |
layout(binding = 0) uniform sampler2D Texture[FETCH_COUNT]; | |
in vec4 gl_FragCoord; | |
layout(location = FRAG_COLOR, index = 0) out vec4 Color; | |
void main() | |
{ | |
vec2 Coord = gl_FragCoord.xy * NORMALIZE_COORD; | |
vec2 Temp = vec2(0); | |
for(int i = 0; i < FETCH_COUNT; ++i) | |
Temp = texture(Texture[i], Coord).xy + Temp; | |
Color = vec4(Temp * (1.0 / float(FETCH_COUNT)), 0.0, 1.0); | |
} | |
1 | 3 | :: : | label_basic_block_1: s_mov_b64 s[56:57], exec | |
2 | 3 | :: : | s_wqm_b64 exec, exec | |
3 | 3 | :: : | s_mov_b32 s0, s1 | |
4 | 3 | :: : | s_movk_i32 s1, 0x0000 | |
5 | 3 | :: : | s_movk_i32 s3, 0x0000 | |
6 | 3 | :: : | s_load_dwordx8 s[8:15], s[0:1], 0x00 | |
7 | 3 | :: : | s_load_dwordx8 s[16:23], s[2:3], 0x00 | |
8 | 3 | :: : | s_load_dwordx8 s[24:31], s[0:1], 0x20 | |
9 | 3 | :: : | s_load_dwordx8 s[32:39], s[0:1], 0x40 | |
10 | 3 | :: : | s_load_dwordx8 s[40:47], s[2:3], 0x20 | |
11 | 3 | :: : | s_load_dwordx8 s[48:55], s[0:1], 0x60 | |
12 | 3 | :: : | s_andn2_b32 s5, s5, 0x3fff0000 | |
13 | 4 | ^ :: : | v_mov_b32 v0, 0 | |
14 | 5 | :^:: : | v_mov_b32 v1, 1.0 | |
15 | 5 | :::: : | s_buffer_load_dwordx4 s[4:7], s[4:7], 0x10 | |
16 | 5 | :::: : | s_waitcnt lgkmcnt(0) | |
17 | 5 | ::x: : | v_add_f32 v2, s4, v2 | |
18 | 6 | ::::^: | v_mov_b32 v4, s5 | |
19 | 6 | :::xv: | v_mad_legacy_f32 v3, v3, s6, v4 | |
20 | 6 | ::x::: | v_mul_f32 v2, 0x3a000000, v2 | |
21 | 6 | :::x:: | v_mul_f32 v3, 0x3a000000, v3 | |
22 | 6 | ::vvxx | image_sample v[4:5], v[2:5], s[8:15], s[16:19] | |
23 | 6 | :::::: | s_nop 0x0000 | |
24 | 8 | ::vvvv^^ | image_sample v[6:7], v[2:5], s[24:31], s[20:23] | |
25 | 8 | :::::::: | s_nop 0x0000 | |
26 | 10 | ::vvvv::^^ | image_sample v[8:9], v[2:5], s[32:39], s[40:43] | |
27 | 10 | :::::::::: | s_nop 0x0000 | |
28 | 12 | ::vvvv::::^^ | image_sample v[10:11], v[2:5], s[48:55], s[44:47] | |
29 | 12 | :::::::::::: | s_load_dwordx8 s[4:11], s[0:1], 0x80 | |
30 | 12 | :::::::::::: | s_load_dwordx8 s[12:19], s[2:3], 0x40 | |
31 | 12 | :::::::::::: | s_load_dwordx8 s[20:27], s[0:1], 0xa0 | |
32 | 12 | :::::::::::: | s_load_dwordx8 s[28:35], s[0:1], 0xc0 | |
33 | 12 | :::::::::::: | s_load_dwordx8 s[36:43], s[2:3], 0x60 | |
34 | 12 | :::::::::::: | s_load_dwordx8 s[44:51], s[0:1], 0xe0 | |
35 | 12 | :::::::::::: | s_waitcnt lgkmcnt(0) | |
36 | 14 | ::vvvv::::::^^ | image_sample v[12:13], v[2:5], s[4:11], s[12:15] | |
37 | 14 | :::::::::::::: | s_nop 0x0000 | |
38 | 16 | ::vvvv::::::::^^ | image_sample v[14:15], v[2:5], s[20:27], s[16:19] | |
39 | 16 | :::::::::::::::: | s_nop 0x0000 | |
40 | 18 | ::vvvv::::::::::^^ | image_sample v[16:17], v[2:5], s[28:35], s[36:39] | |
41 | 18 | :::::::::::::::::: | s_nop 0x0000 | |
42 | 20 | ::vvvv::::::::::::^^ | image_sample v[18:19], v[2:5], s[44:51], s[40:43] | |
43 | 20 | :::::::::::::::::::: | s_load_dwordx8 s[4:11], s[0:1], 0x100 | |
44 | 20 | :::::::::::::::::::: | s_load_dwordx8 s[12:19], s[2:3], 0x80 | |
45 | 20 | :::::::::::::::::::: | s_load_dwordx8 s[20:27], s[0:1], 0x120 | |
46 | 20 | :::::::::::::::::::: | s_load_dwordx8 s[28:35], s[0:1], 0x140 | |
47 | 20 | :::::::::::::::::::: | s_load_dwordx8 s[36:43], s[2:3], 0xa0 | |
48 | 20 | :::::::::::::::::::: | s_load_dwordx8 s[44:51], s[0:1], 0x160 | |
49 | 20 | :::::::::::::::::::: | s_waitcnt lgkmcnt(0) | |
50 | 22 | ::vvvv::::::::::::::^^ | image_sample v[20:21], v[2:5], s[4:11], s[12:15] | |
51 | 22 | :::::::::::::::::::::: | s_nop 0x0000 | |
52 | 24 | ::vvvv::::::::::::::::^^ | image_sample v[22:23], v[2:5], s[20:27], s[16:19] | |
53 | 24 | :::::::::::::::::::::::: | s_nop 0x0000 | |
54 | 26 | ::vvvv::::::::::::::::::^^ | image_sample v[24:25], v[2:5], s[28:35], s[36:39] | |
55 | 26 | :::::::::::::::::::::::::: | s_nop 0x0000 | |
56 | 28 | ::vvvv::::::::::::::::::::^^ | image_sample v[26:27], v[2:5], s[44:51], s[40:43] | |
57 | 28 | :::::::::::::::::::::::::::: | s_load_dwordx8 s[4:11], s[0:1], 0x180 | |
58 | 28 | :::::::::::::::::::::::::::: | s_load_dwordx8 s[12:19], s[2:3], 0xc0 | |
59 | 28 | :::::::::::::::::::::::::::: | s_load_dwordx8 s[20:27], s[0:1], 0x1a0 | |
60 | 28 | :::::::::::::::::::::::::::: | s_load_dwordx8 s[28:35], s[0:1], 0x1c0 | |
61 | 28 | :::::::::::::::::::::::::::: | s_load_dwordx8 s[36:43], s[2:3], 0xe0 | |
62 | 28 | :::::::::::::::::::::::::::: | s_load_dwordx8 s[44:51], s[0:1], 0x1e0 | |
63 | 28 | :::::::::::::::::::::::::::: | s_waitcnt lgkmcnt(0) | |
64 | 30 | ::vvvv::::::::::::::::::::::^^ | image_sample v[28:29], v[2:5], s[4:11], s[12:15] | |
65 | 30 | :::::::::::::::::::::::::::::: | s_nop 0x0000 | |
66 | 32 | ::vvvv::::::::::::::::::::::::^^ | image_sample v[30:31], v[2:5], s[20:27], s[16:19] | |
67 | 32 | :::::::::::::::::::::::::::::::: | s_nop 0x0000 | |
68 | 34 | ::vvvv::::::::::::::::::::::::::^^ | image_sample v[32:33], v[2:5], s[28:35], s[36:39] | |
69 | 34 | :::::::::::::::::::::::::::::::::: | s_nop 0x0000 | |
70 | 34 | ::xxvv:::::::::::::::::::::::::::: | image_sample v[2:3], v[2:5], s[44:51], s[40:43] | |
71 | 34 | :::::::::::::::::::::::::::::::::: | s_waitcnt vmcnt(14) | |
72 | 34 | ::::x:v::::::::::::::::::::::::::: | v_add_f32 v4, v4, v6 | |
73 | 33 | :::::x v:::::::::::::::::::::::::: | v_add_f32 v5, v5, v7 | |
74 | 32 | :::::: :::::::::::::::::::::::::: | s_waitcnt vmcnt(13) | |
75 | 32 | ::::x: v::::::::::::::::::::::::: | v_add_f32 v4, v4, v8 | |
76 | 31 | :::::x v:::::::::::::::::::::::: | v_add_f32 v5, v5, v9 | |
77 | 30 | :::::: :::::::::::::::::::::::: | s_waitcnt vmcnt(12) | |
78 | 30 | ::::x: v::::::::::::::::::::::: | v_add_f32 v4, v4, v10 | |
79 | 29 | :::::x v:::::::::::::::::::::: | v_add_f32 v5, v5, v11 | |
80 | 28 | :::::: :::::::::::::::::::::: | s_waitcnt vmcnt(11) | |
81 | 28 | ::::x: v::::::::::::::::::::: | v_add_f32 v4, v4, v12 | |
82 | 27 | :::::x v:::::::::::::::::::: | v_add_f32 v5, v5, v13 | |
83 | 26 | :::::: :::::::::::::::::::: | s_waitcnt vmcnt(10) | |
84 | 26 | ::::x: v::::::::::::::::::: | v_add_f32 v4, v4, v14 | |
85 | 25 | :::::x v:::::::::::::::::: | v_add_f32 v5, v5, v15 | |
86 | 24 | :::::: :::::::::::::::::: | s_waitcnt vmcnt(9) | |
87 | 24 | ::::x: v::::::::::::::::: | v_add_f32 v4, v4, v16 | |
88 | 23 | :::::x v:::::::::::::::: | v_add_f32 v5, v5, v17 | |
89 | 22 | :::::: :::::::::::::::: | s_waitcnt vmcnt(8) | |
90 | 22 | ::::x: v::::::::::::::: | v_add_f32 v4, v4, v18 | |
91 | 21 | :::::x v:::::::::::::: | v_add_f32 v5, v5, v19 | |
92 | 20 | :::::: :::::::::::::: | s_waitcnt vmcnt(7) | |
93 | 20 | ::::x: v::::::::::::: | v_add_f32 v4, v4, v20 | |
94 | 19 | :::::x v:::::::::::: | v_add_f32 v5, v5, v21 | |
95 | 18 | :::::: :::::::::::: | s_waitcnt vmcnt(6) | |
96 | 18 | ::::x: v::::::::::: | v_add_f32 v4, v4, v22 | |
97 | 17 | :::::x v:::::::::: | v_add_f32 v5, v5, v23 | |
98 | 16 | :::::: :::::::::: | s_waitcnt vmcnt(5) | |
99 | 16 | ::::x: v::::::::: | v_add_f32 v4, v4, v24 | |
100 | 15 | :::::x v:::::::: | v_add_f32 v5, v5, v25 | |
101 | 14 | :::::: :::::::: | s_waitcnt vmcnt(4) | |
102 | 14 | ::::x: v::::::: | v_add_f32 v4, v4, v26 | |
103 | 13 | :::::x v:::::: | v_add_f32 v5, v5, v27 | |
104 | 12 | :::::: :::::: | s_waitcnt vmcnt(3) | |
105 | 12 | ::::x: v::::: | v_add_f32 v4, v4, v28 | |
106 | 11 | :::::x v:::: | v_add_f32 v5, v5, v29 | |
107 | 10 | :::::: :::: | s_waitcnt vmcnt(2) | |
108 | 10 | ::::x: v::: | v_add_f32 v4, v4, v30 | |
109 | 9 | :::::x v:: | v_add_f32 v5, v5, v31 | |
110 | 8 | :::::: :: | s_waitcnt vmcnt(1) | |
111 | 8 | ::::x: v: | v_add_f32 v4, v4, v32 | |
112 | 7 | :::::x v | v_add_f32 v5, v5, v33 | |
113 | 6 | :::::: | s_waitcnt vmcnt(0) | |
114 | 6 | ::x:v: | v_add_f32 v2, v4, v2 | |
115 | 5 | :::x v | v_add_f32 v3, v5, v3 | |
116 | 4 | ::x: | v_mul_f32 v2, 0x3d800000, v2 | |
117 | 4 | :::x | v_mul_f32 v3, 0x3d800000, v3 | |
118 | 4 | :::: | s_mov_b64 exec, s[56:57] | |
119 | 4 | ::xv | v_cvt_pkrtz_f16_f32 v2, v2, v3 | |
120 | 3 | xv: | v_cvt_pkrtz_f16_f32 v0, v0, v1 | |
121 | 2 | v v | exp mrt0, v2, v2, v0, v0 | |
122 | 0 | | s_endpgm | |
Maximum # VGPR used 34, # VGPR allocated: 34 | |
shader main | |
asic(VI) | |
type(PS) | |
// s_ps_state in s0 | |
s_mov_b64 s[56:57], exec // 000000000000: BEB8017E | |
s_wqm_b64 exec, exec // 000000000004: BEFE077E | |
s_mov_b32 s0, s1 // 000000000008: BE800001 | |
s_movk_i32 s1, 0x0000 // 00000000000C: B0010000 | |
s_movk_i32 s3, 0x0000 // 000000000010: B0030000 | |
s_load_dwordx8 s[8:15], s[0:1], 0x00 // 000000000014: C00E0200 00000000 | |
s_load_dwordx8 s[16:23], s[2:3], 0x00 // 00000000001C: C00E0401 00000000 | |
s_load_dwordx8 s[24:31], s[0:1], 0x20 // 000000000024: C00E0600 00000020 | |
s_load_dwordx8 s[32:39], s[0:1], 0x40 // 00000000002C: C00E0800 00000040 | |
s_load_dwordx8 s[40:47], s[2:3], 0x20 // 000000000034: C00E0A01 00000020 | |
s_load_dwordx8 s[48:55], s[0:1], 0x60 // 00000000003C: C00E0C00 00000060 | |
s_andn2_b32 s5, s5, 0x3fff0000 // 000000000044: 8905FF05 3FFF0000 | |
v_mov_b32 v0, 0 // 00000000004C: 7E000280 | |
v_mov_b32 v1, 1.0 // 000000000050: 7E0202F2 | |
s_buffer_load_dwordx4 s[4:7], s[4:7], 0x10 // 000000000054: C02A0102 00000010 | |
s_waitcnt lgkmcnt(0) // 00000000005C: BF8C007F | |
v_add_f32 v2, s4, v2 // 000000000060: 02040404 | |
v_mov_b32 v4, s5 // 000000000064: 7E080205 | |
v_mad_legacy_f32 v3, v3, s6, v4 // 000000000068: D1C00003 04100D03 | |
v_mul_f32 v2, 0x3a000000, v2 // 000000000070: 0A0404FF 3A000000 | |
v_mul_f32 v3, 0x3a000000, v3 // 000000000078: 0A0606FF 3A000000 | |
image_sample v[4:5], v[2:5], s[8:15], s[16:19] dmask:0x3 // 000000000080: F0800300 00820402 | |
s_nop 0x0000 // 000000000088: BF800000 | |
image_sample v[6:7], v[2:5], s[24:31], s[20:23] dmask:0x3 // 00000000008C: F0800300 00A60602 | |
s_nop 0x0000 // 000000000094: BF800000 | |
image_sample v[8:9], v[2:5], s[32:39], s[40:43] dmask:0x3 // 000000000098: F0800300 01480802 | |
s_nop 0x0000 // 0000000000A0: BF800000 | |
image_sample v[10:11], v[2:5], s[48:55], s[44:47] dmask:0x3 // 0000000000A4: F0800300 016C0A02 | |
s_load_dwordx8 s[4:11], s[0:1], 0x80 // 0000000000AC: C00E0100 00000080 | |
s_load_dwordx8 s[12:19], s[2:3], 0x40 // 0000000000B4: C00E0301 00000040 | |
s_load_dwordx8 s[20:27], s[0:1], 0xa0 // 0000000000BC: C00E0500 000000A0 | |
s_load_dwordx8 s[28:35], s[0:1], 0xc0 // 0000000000C4: C00E0700 000000C0 | |
s_load_dwordx8 s[36:43], s[2:3], 0x60 // 0000000000CC: C00E0901 00000060 | |
s_load_dwordx8 s[44:51], s[0:1], 0xe0 // 0000000000D4: C00E0B00 000000E0 | |
s_waitcnt lgkmcnt(0) // 0000000000DC: BF8C007F | |
image_sample v[12:13], v[2:5], s[4:11], s[12:15] dmask:0x3 // 0000000000E0: F0800300 00610C02 | |
s_nop 0x0000 // 0000000000E8: BF800000 | |
image_sample v[14:15], v[2:5], s[20:27], s[16:19] dmask:0x3 // 0000000000EC: F0800300 00850E02 | |
s_nop 0x0000 // 0000000000F4: BF800000 | |
image_sample v[16:17], v[2:5], s[28:35], s[36:39] dmask:0x3 // 0000000000F8: F0800300 01271002 | |
s_nop 0x0000 // 000000000100: BF800000 | |
image_sample v[18:19], v[2:5], s[44:51], s[40:43] dmask:0x3 // 000000000104: F0800300 014B1202 | |
s_load_dwordx8 s[4:11], s[0:1], 0x100 // 00000000010C: C00E0100 00000100 | |
s_load_dwordx8 s[12:19], s[2:3], 0x80 // 000000000114: C00E0301 00000080 | |
s_load_dwordx8 s[20:27], s[0:1], 0x120 // 00000000011C: C00E0500 00000120 | |
s_load_dwordx8 s[28:35], s[0:1], 0x140 // 000000000124: C00E0700 00000140 | |
s_load_dwordx8 s[36:43], s[2:3], 0xa0 // 00000000012C: C00E0901 000000A0 | |
s_load_dwordx8 s[44:51], s[0:1], 0x160 // 000000000134: C00E0B00 00000160 | |
s_waitcnt lgkmcnt(0) // 00000000013C: BF8C007F | |
image_sample v[20:21], v[2:5], s[4:11], s[12:15] dmask:0x3 // 000000000140: F0800300 00611402 | |
s_nop 0x0000 // 000000000148: BF800000 | |
image_sample v[22:23], v[2:5], s[20:27], s[16:19] dmask:0x3 // 00000000014C: F0800300 00851602 | |
s_nop 0x0000 // 000000000154: BF800000 | |
image_sample v[24:25], v[2:5], s[28:35], s[36:39] dmask:0x3 // 000000000158: F0800300 01271802 | |
s_nop 0x0000 // 000000000160: BF800000 | |
image_sample v[26:27], v[2:5], s[44:51], s[40:43] dmask:0x3 // 000000000164: F0800300 014B1A02 | |
s_load_dwordx8 s[4:11], s[0:1], 0x180 // 00000000016C: C00E0100 00000180 | |
s_load_dwordx8 s[12:19], s[2:3], 0xc0 // 000000000174: C00E0301 000000C0 | |
s_load_dwordx8 s[20:27], s[0:1], 0x1a0 // 00000000017C: C00E0500 000001A0 | |
s_load_dwordx8 s[28:35], s[0:1], 0x1c0 // 000000000184: C00E0700 000001C0 | |
s_load_dwordx8 s[36:43], s[2:3], 0xe0 // 00000000018C: C00E0901 000000E0 | |
s_load_dwordx8 s[44:51], s[0:1], 0x1e0 // 000000000194: C00E0B00 000001E0 | |
s_waitcnt lgkmcnt(0) // 00000000019C: BF8C007F | |
image_sample v[28:29], v[2:5], s[4:11], s[12:15] dmask:0x3 // 0000000001A0: F0800300 00611C02 | |
s_nop 0x0000 // 0000000001A8: BF800000 | |
image_sample v[30:31], v[2:5], s[20:27], s[16:19] dmask:0x3 // 0000000001AC: F0800300 00851E02 | |
s_nop 0x0000 // 0000000001B4: BF800000 | |
image_sample v[32:33], v[2:5], s[28:35], s[36:39] dmask:0x3 // 0000000001B8: F0800300 01272002 | |
s_nop 0x0000 // 0000000001C0: BF800000 | |
image_sample v[2:3], v[2:5], s[44:51], s[40:43] dmask:0x3 // 0000000001C4: F0800300 014B0202 | |
s_waitcnt vmcnt(14) // 0000000001CC: BF8C0F7E | |
v_add_f32 v4, v4, v6 // 0000000001D0: 02080D04 | |
v_add_f32 v5, v5, v7 // 0000000001D4: 020A0F05 | |
s_waitcnt vmcnt(13) // 0000000001D8: BF8C0F7D | |
v_add_f32 v4, v4, v8 // 0000000001DC: 02081104 | |
v_add_f32 v5, v5, v9 // 0000000001E0: 020A1305 | |
s_waitcnt vmcnt(12) // 0000000001E4: BF8C0F7C | |
v_add_f32 v4, v4, v10 // 0000000001E8: 02081504 | |
v_add_f32 v5, v5, v11 // 0000000001EC: 020A1705 | |
s_waitcnt vmcnt(11) // 0000000001F0: BF8C0F7B | |
v_add_f32 v4, v4, v12 // 0000000001F4: 02081904 | |
v_add_f32 v5, v5, v13 // 0000000001F8: 020A1B05 | |
s_waitcnt vmcnt(10) // 0000000001FC: BF8C0F7A | |
v_add_f32 v4, v4, v14 // 000000000200: 02081D04 | |
v_add_f32 v5, v5, v15 // 000000000204: 020A1F05 | |
s_waitcnt vmcnt(9) // 000000000208: BF8C0F79 | |
v_add_f32 v4, v4, v16 // 00000000020C: 02082104 | |
v_add_f32 v5, v5, v17 // 000000000210: 020A2305 | |
s_waitcnt vmcnt(8) // 000000000214: BF8C0F78 | |
v_add_f32 v4, v4, v18 // 000000000218: 02082504 | |
v_add_f32 v5, v5, v19 // 00000000021C: 020A2705 | |
s_waitcnt vmcnt(7) // 000000000220: BF8C0F77 | |
v_add_f32 v4, v4, v20 // 000000000224: 02082904 | |
v_add_f32 v5, v5, v21 // 000000000228: 020A2B05 | |
s_waitcnt vmcnt(6) // 00000000022C: BF8C0F76 | |
v_add_f32 v4, v4, v22 // 000000000230: 02082D04 | |
v_add_f32 v5, v5, v23 // 000000000234: 020A2F05 | |
s_waitcnt vmcnt(5) // 000000000238: BF8C0F75 | |
v_add_f32 v4, v4, v24 // 00000000023C: 02083104 | |
v_add_f32 v5, v5, v25 // 000000000240: 020A3305 | |
s_waitcnt vmcnt(4) // 000000000244: BF8C0F74 | |
v_add_f32 v4, v4, v26 // 000000000248: 02083504 | |
v_add_f32 v5, v5, v27 // 00000000024C: 020A3705 | |
s_waitcnt vmcnt(3) // 000000000250: BF8C0F73 | |
v_add_f32 v4, v4, v28 // 000000000254: 02083904 | |
v_add_f32 v5, v5, v29 // 000000000258: 020A3B05 | |
s_waitcnt vmcnt(2) // 00000000025C: BF8C0F72 | |
v_add_f32 v4, v4, v30 // 000000000260: 02083D04 | |
v_add_f32 v5, v5, v31 // 000000000264: 020A3F05 | |
s_waitcnt vmcnt(1) // 000000000268: BF8C0F71 | |
v_add_f32 v4, v4, v32 // 00000000026C: 02084104 | |
v_add_f32 v5, v5, v33 // 000000000270: 020A4305 | |
s_waitcnt vmcnt(0) // 000000000274: BF8C0F70 | |
v_add_f32 v2, v4, v2 // 000000000278: 02040504 | |
v_add_f32 v3, v5, v3 // 00000000027C: 02060705 | |
v_mul_f32 v2, 0x3d800000, v2 // 000000000280: 0A0404FF 3D800000 | |
v_mul_f32 v3, 0x3d800000, v3 // 000000000288: 0A0606FF 3D800000 | |
s_mov_b64 exec, s[56:57] // 000000000290: BEFE0138 | |
v_cvt_pkrtz_f16_f32 v2, v2, v3 // 000000000294: D2960002 00020702 | |
v_cvt_pkrtz_f16_f32 v0, v0, v1 // 00000000029C: D2960000 00020300 | |
exp mrt0, v2, v2, v0, v0 done compr vm // 0000000002A4: C4001C0F 00000002 | |
s_endpgm // 0000000002AC: BF810000 | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment