Last active
February 14, 2018 17:05
-
-
Save Groovounet/eee06640917a027cb7e8a041ce919cd2 to your computer and use it in GitHub Desktop.
Dependent texture fetches with Polaris 10 ASM
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#version 430 core | |
#define FETCH_COUNT 16 | |
uniform sampler2D TextureDiffuse[FETCH_COUNT]; | |
in vec4 gl_FragCoord; | |
layout(location = 0, index = 0) out vec4 Color; | |
void main() | |
{ | |
vec2 Coord = gl_FragCoord.xy * (1.0 / 2048.0); | |
for(int i = 0; i < FETCH_COUNT; ++i) | |
Coord = texture(TextureDiffuse[i], Coord).xy + 0.000001; | |
Color = vec4(Coord * 1.000001, 0.0, 1.0); | |
} | |
1 | 3 | :: : | label_basic_block_1: s_mov_b64 s[36:37], exec | |
2 | 3 | :: : | s_wqm_b64 exec, exec | |
3 | 3 | :: : | s_mov_b32 s0, s1 | |
4 | 3 | :: : | s_movk_i32 s1, 0x0000 | |
5 | 3 | :: : | s_movk_i32 s3, 0x0000 | |
6 | 3 | :: : | s_load_dwordx8 s[8:15], s[0:1], 0x00 | |
7 | 3 | :: : | s_load_dwordx8 s[16:23], s[2:3], 0x00 | |
8 | 3 | :: : | s_load_dwordx8 s[24:31], s[0:1], 0x20 | |
9 | 3 | :: : | s_andn2_b32 s5, s5, 0x3fff0000 | |
10 | 4 | ^ :: : | v_mov_b32 v0, 0 | |
11 | 5 | :^:: : | v_mov_b32 v1, 1.0 | |
12 | 5 | :::: : | s_buffer_load_dwordx4 s[4:7], s[4:7], 0x10 | |
13 | 5 | :::: : | s_waitcnt lgkmcnt(0) | |
14 | 5 | ::x: : | v_add_f32 v2, s4, v2 | |
15 | 6 | ::::^: | v_mov_b32 v4, s5 | |
16 | 6 | :::xv: | v_mad_legacy_f32 v3, v3, s6, v4 | |
17 | 6 | ::x::: | v_mul_f32 v2, 0x3a000000, v2 | |
18 | 6 | :::x:: | v_mul_f32 v3, 0x3a000000, v3 | |
19 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[8:15], s[16:19] | |
20 | 6 | :::::: | s_load_dwordx8 s[4:11], s[0:1], 0x40 | |
21 | 6 | :::::: | s_load_dwordx8 s[12:19], s[2:3], 0x20 | |
22 | 6 | :::::: | s_waitcnt vmcnt(0) | |
23 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2 | |
24 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3 | |
25 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[24:31], s[20:23] | |
26 | 6 | :::::: | s_load_dwordx8 s[20:27], s[0:1], 0x60 | |
27 | 6 | :::::: | s_waitcnt vmcnt(0) | |
28 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2 | |
29 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3 | |
30 | 6 | :::::: | s_waitcnt lgkmcnt(0) | |
31 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[4:11], s[12:15] | |
32 | 6 | :::::: | s_load_dwordx8 s[4:11], s[0:1], 0x80 | |
33 | 6 | :::::: | s_load_dwordx8 s[28:35], s[2:3], 0x40 | |
34 | 6 | :::::: | s_waitcnt vmcnt(0) | |
35 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2 | |
36 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3 | |
37 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[20:27], s[16:19] | |
38 | 6 | :::::: | s_load_dwordx8 s[12:19], s[0:1], 0xa0 | |
39 | 6 | :::::: | s_waitcnt vmcnt(0) | |
40 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2 | |
41 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3 | |
42 | 6 | :::::: | s_waitcnt lgkmcnt(0) | |
43 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[4:11], s[28:31] | |
44 | 6 | :::::: | s_load_dwordx8 s[4:11], s[0:1], 0xc0 | |
45 | 6 | :::::: | s_load_dwordx8 s[20:27], s[2:3], 0x60 | |
46 | 6 | :::::: | s_waitcnt vmcnt(0) | |
47 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2 | |
48 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3 | |
49 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[12:19], s[32:35] | |
50 | 6 | :::::: | s_load_dwordx8 s[12:19], s[0:1], 0xe0 | |
51 | 6 | :::::: | s_waitcnt vmcnt(0) | |
52 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2 | |
53 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3 | |
54 | 6 | :::::: | s_waitcnt lgkmcnt(0) | |
55 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[4:11], s[20:23] | |
56 | 6 | :::::: | s_load_dwordx8 s[4:11], s[0:1], 0x100 | |
57 | 6 | :::::: | s_load_dwordx8 s[28:35], s[2:3], 0x80 | |
58 | 6 | :::::: | s_waitcnt vmcnt(0) | |
59 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2 | |
60 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3 | |
61 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[12:19], s[24:27] | |
62 | 6 | :::::: | s_load_dwordx8 s[12:19], s[0:1], 0x120 | |
63 | 6 | :::::: | s_waitcnt vmcnt(0) | |
64 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2 | |
65 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3 | |
66 | 6 | :::::: | s_waitcnt lgkmcnt(0) | |
67 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[4:11], s[28:31] | |
68 | 6 | :::::: | s_load_dwordx8 s[4:11], s[0:1], 0x140 | |
69 | 6 | :::::: | s_load_dwordx8 s[20:27], s[2:3], 0xa0 | |
70 | 6 | :::::: | s_waitcnt vmcnt(0) | |
71 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2 | |
72 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3 | |
73 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[12:19], s[32:35] | |
74 | 6 | :::::: | s_load_dwordx8 s[12:19], s[0:1], 0x160 | |
75 | 6 | :::::: | s_waitcnt vmcnt(0) | |
76 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2 | |
77 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3 | |
78 | 6 | :::::: | s_waitcnt lgkmcnt(0) | |
79 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[4:11], s[20:23] | |
80 | 6 | :::::: | s_load_dwordx8 s[4:11], s[0:1], 0x180 | |
81 | 6 | :::::: | s_load_dwordx8 s[28:35], s[2:3], 0xc0 | |
82 | 6 | :::::: | s_waitcnt vmcnt(0) | |
83 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2 | |
84 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3 | |
85 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[12:19], s[24:27] | |
86 | 6 | :::::: | s_load_dwordx8 s[12:19], s[0:1], 0x1a0 | |
87 | 6 | :::::: | s_waitcnt vmcnt(0) | |
88 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2 | |
89 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3 | |
90 | 6 | :::::: | s_waitcnt lgkmcnt(0) | |
91 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[4:11], s[28:31] | |
92 | 6 | :::::: | s_load_dwordx8 s[4:11], s[0:1], 0x1c0 | |
93 | 6 | :::::: | s_load_dwordx8 s[20:27], s[2:3], 0xe0 | |
94 | 6 | :::::: | s_waitcnt vmcnt(0) | |
95 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2 | |
96 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3 | |
97 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[12:19], s[32:35] | |
98 | 6 | :::::: | s_load_dwordx8 s[12:19], s[0:1], 0x1e0 | |
99 | 6 | :::::: | s_waitcnt vmcnt(0) | |
100 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2 | |
101 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3 | |
102 | 6 | :::::: | s_waitcnt lgkmcnt(0) | |
103 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[4:11], s[20:23] | |
104 | 6 | :::::: | s_waitcnt vmcnt(0) | |
105 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2 | |
106 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3 | |
107 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[12:19], s[24:27] | |
108 | 4 | :::: | s_waitcnt vmcnt(0) | |
109 | 4 | ::x: | v_add_f32 v2, 0x358637bd, v2 | |
110 | 4 | :::x | v_add_f32 v3, 0x358637bd, v3 | |
111 | 4 | ::x: | v_mul_f32 v2, 0x3f800008, v2 | |
112 | 4 | :::x | v_mul_f32 v3, 0x3f800008, v3 | |
113 | 4 | :::: | s_mov_b64 exec, s[36:37] | |
114 | 4 | ::xv | v_cvt_pkrtz_f16_f32 v2, v2, v3 | |
115 | 3 | xv: | v_cvt_pkrtz_f16_f32 v0, v0, v1 | |
116 | 2 | v v | exp mrt0, v2, v2, v0, v0 | |
117 | 0 | | s_endpgm | |
Maximum # VGPR used 6, # VGPR allocated: 6 | |
shader main | |
asic(VI) | |
type(PS) | |
// s_ps_state in s0 | |
s_mov_b64 s[36:37], exec // 000000000000: BEA4017E | |
s_wqm_b64 exec, exec // 000000000004: BEFE077E | |
s_mov_b32 s0, s1 // 000000000008: BE800001 | |
s_movk_i32 s1, 0x0000 // 00000000000C: B0010000 | |
s_movk_i32 s3, 0x0000 // 000000000010: B0030000 | |
s_load_dwordx8 s[8:15], s[0:1], 0x00 // 000000000014: C00E0200 00000000 | |
s_load_dwordx8 s[16:23], s[2:3], 0x00 // 00000000001C: C00E0401 00000000 | |
s_load_dwordx8 s[24:31], s[0:1], 0x20 // 000000000024: C00E0600 00000020 | |
s_andn2_b32 s5, s5, 0x3fff0000 // 00000000002C: 8905FF05 3FFF0000 | |
v_mov_b32 v0, 0 // 000000000034: 7E000280 | |
v_mov_b32 v1, 1.0 // 000000000038: 7E0202F2 | |
s_buffer_load_dwordx4 s[4:7], s[4:7], 0x10 // 00000000003C: C02A0102 00000010 | |
s_waitcnt lgkmcnt(0) // 000000000044: BF8C007F | |
v_add_f32 v2, s4, v2 // 000000000048: 02040404 | |
v_mov_b32 v4, s5 // 00000000004C: 7E080205 | |
v_mad_legacy_f32 v3, v3, s6, v4 // 000000000050: D1C00003 04100D03 | |
v_mul_f32 v2, 0x3a000000, v2 // 000000000058: 0A0404FF 3A000000 | |
v_mul_f32 v3, 0x3a000000, v3 // 000000000060: 0A0606FF 3A000000 | |
image_sample v[2:3], v[2:5], s[8:15], s[16:19] dmask:0x3 // 000000000068: F0800300 00820202 | |
s_load_dwordx8 s[4:11], s[0:1], 0x40 // 000000000070: C00E0100 00000040 | |
s_load_dwordx8 s[12:19], s[2:3], 0x20 // 000000000078: C00E0301 00000020 | |
s_waitcnt vmcnt(0) // 000000000080: BF8C0F70 | |
v_add_f32 v2, 0x358637bd, v2 // 000000000084: 020404FF 358637BD | |
v_add_f32 v3, 0x358637bd, v3 // 00000000008C: 020606FF 358637BD | |
image_sample v[2:3], v[2:5], s[24:31], s[20:23] dmask:0x3 // 000000000094: F0800300 00A60202 | |
s_load_dwordx8 s[20:27], s[0:1], 0x60 // 00000000009C: C00E0500 00000060 | |
s_waitcnt vmcnt(0) // 0000000000A4: BF8C0F70 | |
v_add_f32 v2, 0x358637bd, v2 // 0000000000A8: 020404FF 358637BD | |
v_add_f32 v3, 0x358637bd, v3 // 0000000000B0: 020606FF 358637BD | |
s_waitcnt lgkmcnt(0) // 0000000000B8: BF8C007F | |
image_sample v[2:3], v[2:5], s[4:11], s[12:15] dmask:0x3 // 0000000000BC: F0800300 00610202 | |
s_load_dwordx8 s[4:11], s[0:1], 0x80 // 0000000000C4: C00E0100 00000080 | |
s_load_dwordx8 s[28:35], s[2:3], 0x40 // 0000000000CC: C00E0701 00000040 | |
s_waitcnt vmcnt(0) // 0000000000D4: BF8C0F70 | |
v_add_f32 v2, 0x358637bd, v2 // 0000000000D8: 020404FF 358637BD | |
v_add_f32 v3, 0x358637bd, v3 // 0000000000E0: 020606FF 358637BD | |
image_sample v[2:3], v[2:5], s[20:27], s[16:19] dmask:0x3 // 0000000000E8: F0800300 00850202 | |
s_load_dwordx8 s[12:19], s[0:1], 0xa0 // 0000000000F0: C00E0300 000000A0 | |
s_waitcnt vmcnt(0) // 0000000000F8: BF8C0F70 | |
v_add_f32 v2, 0x358637bd, v2 // 0000000000FC: 020404FF 358637BD | |
v_add_f32 v3, 0x358637bd, v3 // 000000000104: 020606FF 358637BD | |
s_waitcnt lgkmcnt(0) // 00000000010C: BF8C007F | |
image_sample v[2:3], v[2:5], s[4:11], s[28:31] dmask:0x3 // 000000000110: F0800300 00E10202 | |
s_load_dwordx8 s[4:11], s[0:1], 0xc0 // 000000000118: C00E0100 000000C0 | |
s_load_dwordx8 s[20:27], s[2:3], 0x60 // 000000000120: C00E0501 00000060 | |
s_waitcnt vmcnt(0) // 000000000128: BF8C0F70 | |
v_add_f32 v2, 0x358637bd, v2 // 00000000012C: 020404FF 358637BD | |
v_add_f32 v3, 0x358637bd, v3 // 000000000134: 020606FF 358637BD | |
image_sample v[2:3], v[2:5], s[12:19], s[32:35] dmask:0x3 // 00000000013C: F0800300 01030202 | |
s_load_dwordx8 s[12:19], s[0:1], 0xe0 // 000000000144: C00E0300 000000E0 | |
s_waitcnt vmcnt(0) // 00000000014C: BF8C0F70 | |
v_add_f32 v2, 0x358637bd, v2 // 000000000150: 020404FF 358637BD | |
v_add_f32 v3, 0x358637bd, v3 // 000000000158: 020606FF 358637BD | |
s_waitcnt lgkmcnt(0) // 000000000160: BF8C007F | |
image_sample v[2:3], v[2:5], s[4:11], s[20:23] dmask:0x3 // 000000000164: F0800300 00A10202 | |
s_load_dwordx8 s[4:11], s[0:1], 0x100 // 00000000016C: C00E0100 00000100 | |
s_load_dwordx8 s[28:35], s[2:3], 0x80 // 000000000174: C00E0701 00000080 | |
s_waitcnt vmcnt(0) // 00000000017C: BF8C0F70 | |
v_add_f32 v2, 0x358637bd, v2 // 000000000180: 020404FF 358637BD | |
v_add_f32 v3, 0x358637bd, v3 // 000000000188: 020606FF 358637BD | |
image_sample v[2:3], v[2:5], s[12:19], s[24:27] dmask:0x3 // 000000000190: F0800300 00C30202 | |
s_load_dwordx8 s[12:19], s[0:1], 0x120 // 000000000198: C00E0300 00000120 | |
s_waitcnt vmcnt(0) // 0000000001A0: BF8C0F70 | |
v_add_f32 v2, 0x358637bd, v2 // 0000000001A4: 020404FF 358637BD | |
v_add_f32 v3, 0x358637bd, v3 // 0000000001AC: 020606FF 358637BD | |
s_waitcnt lgkmcnt(0) // 0000000001B4: BF8C007F | |
image_sample v[2:3], v[2:5], s[4:11], s[28:31] dmask:0x3 // 0000000001B8: F0800300 00E10202 | |
s_load_dwordx8 s[4:11], s[0:1], 0x140 // 0000000001C0: C00E0100 00000140 | |
s_load_dwordx8 s[20:27], s[2:3], 0xa0 // 0000000001C8: C00E0501 000000A0 | |
s_waitcnt vmcnt(0) // 0000000001D0: BF8C0F70 | |
v_add_f32 v2, 0x358637bd, v2 // 0000000001D4: 020404FF 358637BD | |
v_add_f32 v3, 0x358637bd, v3 // 0000000001DC: 020606FF 358637BD | |
image_sample v[2:3], v[2:5], s[12:19], s[32:35] dmask:0x3 // 0000000001E4: F0800300 01030202 | |
s_load_dwordx8 s[12:19], s[0:1], 0x160 // 0000000001EC: C00E0300 00000160 | |
s_waitcnt vmcnt(0) // 0000000001F4: BF8C0F70 | |
v_add_f32 v2, 0x358637bd, v2 // 0000000001F8: 020404FF 358637BD | |
v_add_f32 v3, 0x358637bd, v3 // 000000000200: 020606FF 358637BD | |
s_waitcnt lgkmcnt(0) // 000000000208: BF8C007F | |
image_sample v[2:3], v[2:5], s[4:11], s[20:23] dmask:0x3 // 00000000020C: F0800300 00A10202 | |
s_load_dwordx8 s[4:11], s[0:1], 0x180 // 000000000214: C00E0100 00000180 | |
s_load_dwordx8 s[28:35], s[2:3], 0xc0 // 00000000021C: C00E0701 000000C0 | |
s_waitcnt vmcnt(0) // 000000000224: BF8C0F70 | |
v_add_f32 v2, 0x358637bd, v2 // 000000000228: 020404FF 358637BD | |
v_add_f32 v3, 0x358637bd, v3 // 000000000230: 020606FF 358637BD | |
image_sample v[2:3], v[2:5], s[12:19], s[24:27] dmask:0x3 // 000000000238: F0800300 00C30202 | |
s_load_dwordx8 s[12:19], s[0:1], 0x1a0 // 000000000240: C00E0300 000001A0 | |
s_waitcnt vmcnt(0) // 000000000248: BF8C0F70 | |
v_add_f32 v2, 0x358637bd, v2 // 00000000024C: 020404FF 358637BD | |
v_add_f32 v3, 0x358637bd, v3 // 000000000254: 020606FF 358637BD | |
s_waitcnt lgkmcnt(0) // 00000000025C: BF8C007F | |
image_sample v[2:3], v[2:5], s[4:11], s[28:31] dmask:0x3 // 000000000260: F0800300 00E10202 | |
s_load_dwordx8 s[4:11], s[0:1], 0x1c0 // 000000000268: C00E0100 000001C0 | |
s_load_dwordx8 s[20:27], s[2:3], 0xe0 // 000000000270: C00E0501 000000E0 | |
s_waitcnt vmcnt(0) // 000000000278: BF8C0F70 | |
v_add_f32 v2, 0x358637bd, v2 // 00000000027C: 020404FF 358637BD | |
v_add_f32 v3, 0x358637bd, v3 // 000000000284: 020606FF 358637BD | |
image_sample v[2:3], v[2:5], s[12:19], s[32:35] dmask:0x3 // 00000000028C: F0800300 01030202 | |
s_load_dwordx8 s[12:19], s[0:1], 0x1e0 // 000000000294: C00E0300 000001E0 | |
s_waitcnt vmcnt(0) // 00000000029C: BF8C0F70 | |
v_add_f32 v2, 0x358637bd, v2 // 0000000002A0: 020404FF 358637BD | |
v_add_f32 v3, 0x358637bd, v3 // 0000000002A8: 020606FF 358637BD | |
s_waitcnt lgkmcnt(0) // 0000000002B0: BF8C007F | |
image_sample v[2:3], v[2:5], s[4:11], s[20:23] dmask:0x3 // 0000000002B4: F0800300 00A10202 | |
s_waitcnt vmcnt(0) // 0000000002BC: BF8C0F70 | |
v_add_f32 v2, 0x358637bd, v2 // 0000000002C0: 020404FF 358637BD | |
v_add_f32 v3, 0x358637bd, v3 // 0000000002C8: 020606FF 358637BD | |
image_sample v[2:3], v[2:5], s[12:19], s[24:27] dmask:0x3 // 0000000002D0: F0800300 00C30202 | |
s_waitcnt vmcnt(0) // 0000000002D8: BF8C0F70 | |
v_add_f32 v2, 0x358637bd, v2 // 0000000002DC: 020404FF 358637BD | |
v_add_f32 v3, 0x358637bd, v3 // 0000000002E4: 020606FF 358637BD | |
v_mul_f32 v2, 0x3f800008, v2 // 0000000002EC: 0A0404FF 3F800008 | |
v_mul_f32 v3, 0x3f800008, v3 // 0000000002F4: 0A0606FF 3F800008 | |
s_mov_b64 exec, s[36:37] // 0000000002FC: BEFE0124 | |
v_cvt_pkrtz_f16_f32 v2, v2, v3 // 000000000300: D2960002 00020702 | |
v_cvt_pkrtz_f16_f32 v0, v0, v1 // 000000000308: D2960000 00020300 | |
exp mrt0, v2, v2, v0, v0 done compr vm // 000000000310: C4001C0F 00000002 | |
s_endpgm // 000000000318: BF810000 | |
end | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment