Created
April 19, 2024 20:42
-
-
Save pashu123/d012c905e616495af0b52312635b0a48 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.text | |
.intel_syntax noprefix | |
.file "mmt3d_kernel_linked_llvm_cpu" | |
.section .text.turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32,"ax",@progbits | |
.p2align 4, 0x90 | |
.type turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32,@function | |
turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32: | |
.Lfunc_begin0: | |
.file 1 "-" | |
.loc 1 1 0 | |
.cfi_startproc | |
push rbp | |
.cfi_def_cfa_offset 16 | |
.cfi_offset rbp, -16 | |
mov rbp, rsp | |
.cfi_def_cfa_register rbp | |
.Ltmp0: | |
push r15 | |
push r14 | |
push r13 | |
push r12 | |
push rbx | |
sub rsp, 184 | |
.cfi_offset rbx, -56 | |
.cfi_offset r12, -48 | |
.cfi_offset r13, -40 | |
.cfi_offset r14, -32 | |
.cfi_offset r15, -24 | |
.loc 1 4 3 prologue_end | |
mov rdi, qword ptr [rsi + 24] | |
mov ecx, dword ptr [rdi + 4] | |
mov eax, dword ptr [rdi] | |
mov r12d, dword ptr [rdi + 12] | |
mov r8, rcx | |
shl r8, 32 | |
lea r10, [r8 + rax] | |
lea r8, [r8 + rax - 1] | |
mov r9, r10 | |
neg r9 | |
test r10, r10 | |
mov qword ptr [rbp - 248], r10 | |
cmovle r8, r9 | |
lea r15, [r8 + 15] | |
test r8, r8 | |
cmovns r15, r8 | |
sar r15, 4 | |
mov r8, r15 | |
neg r8 | |
inc r15 | |
test r10, r10 | |
mov r10d, dword ptr [rdi + 8] | |
cmovle r15, r8 | |
movzx r8d, word ptr [rdx + 8] | |
shl r12, 32 | |
lea r9, [r12 + r10] | |
mov r11d, r8d | |
shl r11d, 6 | |
mov qword ptr [rbp - 144], r9 | |
mov qword ptr [rbp - 64], r11 | |
cmp r11, r9 | |
jge .LBB0_17 | |
.loc 1 0 3 is_stmt 0 | |
mov r9, qword ptr [rsi + 32] | |
mov rdi, qword ptr [rdi + 16] | |
mov qword ptr [rbp - 112], rcx | |
.loc 1 4 3 | |
or r12, r10 | |
mov r13d, dword ptr [rsi + 12] | |
sub r12, qword ptr [rbp - 64] | |
mov qword ptr [rbp - 240], r15 | |
mov qword ptr [rbp - 120], r9 | |
mov r11, qword ptr [r9 + 8] | |
mov r9, rcx | |
mov ecx, dword ptr [rdx] | |
mov edx, dword ptr [rdx + 4] | |
mov r10, rdi | |
imul r10, r8 | |
imul r10, r10, 13107200 | |
imul rbx, rdx, 819200 | |
mov r14, rcx | |
shl r14, 13 | |
mov qword ptr [rbp - 232], rcx | |
add r14, rbx | |
add r14, r10 | |
lea r10, [r11 + r14 + 960] | |
movabs r11, 13743895347200 | |
imul r14, rax, 3200 | |
imul rax, rax, 12800 | |
imul r11, r9 | |
mov qword ptr [rbp - 136], r10 | |
mov r10d, dword ptr [rsi + 16] | |
movzx esi, word ptr [rsi + 20] | |
add r14, r11 | |
mov r11, rcx | |
shl r11, 9 | |
shl rcx, 7 | |
add r11, rbx | |
mov rbx, qword ptr [rbp - 120] | |
imul r8, r14 | |
mov qword ptr [rbp - 208], rcx | |
mov rcx, r15 | |
imul r14, rsi | |
shl r8, 8 | |
add r11, r8 | |
imul r8, rdi, 204800 | |
imul rdi, rsi | |
shl esi, 6 | |
mov r9, qword ptr [rbx] | |
mov qword ptr [rbp - 184], rsi | |
lea rsi, [4*rdx] | |
shl r14, 8 | |
mov qword ptr [rbp - 192], r14 | |
mov qword ptr [rbp - 320], r8 | |
movabs r8, 54975581388800 | |
imul r8, qword ptr [rbp - 112] | |
sub rcx, rsi | |
mov qword ptr [rbp - 128], rsi | |
mov qword ptr [rbp - 168], rcx | |
lea rcx, [4*r10] | |
lea rdx, [r9 + r11 + 192000] | |
mov qword ptr [rbp - 224], rcx | |
mov rcx, r13 | |
shl rcx, 13 | |
mov qword ptr [rbp - 272], rcx | |
mov qword ptr [rbp - 72], rdx | |
mov rdx, r13 | |
shl rdx, 7 | |
shl r13, 9 | |
mov qword ptr [rbp - 280], rdx | |
imul rdx, rdi, 13107200 | |
mov qword ptr [rbp - 288], r13 | |
add rax, r8 | |
mov qword ptr [rbp - 312], rax | |
mov qword ptr [rbp - 176], rdx | |
imul rdx, r10, 819200 | |
mov qword ptr [rbp - 216], rdx | |
jmp .LBB0_2 | |
.p2align 4, 0x90 | |
.LBB0_16: | |
.loc 1 0 3 | |
mov rsi, qword ptr [rbp - 176] | |
mov rdx, qword ptr [rbp - 72] | |
mov rax, qword ptr [rbp - 64] | |
mov rcx, qword ptr [rbp - 184] | |
mov r12, qword ptr [rbp - 200] | |
.loc 1 4 3 | |
add rdx, qword ptr [rbp - 192] | |
add qword ptr [rbp - 136], rsi | |
add rax, rcx | |
sub r12, rcx | |
mov qword ptr [rbp - 72], rdx | |
mov qword ptr [rbp - 64], rax | |
cmp rax, qword ptr [rbp - 144] | |
jge .LBB0_17 | |
.LBB0_2: | |
cmp r12, 64 | |
mov ecx, 64 | |
mov eax, 1 | |
mov qword ptr [rbp - 200], r12 | |
cmovl rcx, r12 | |
cmp rcx, 2 | |
cmovl rcx, rax | |
mov qword ptr [rbp - 328], rcx | |
cmp r15, qword ptr [rbp - 128] | |
jle .LBB0_16 | |
.loc 1 0 3 | |
mov rdx, qword ptr [rbp - 72] | |
mov rcx, qword ptr [rbp - 136] | |
mov rax, qword ptr [rbp - 144] | |
.loc 1 4 3 | |
sub rax, qword ptr [rbp - 64] | |
mov qword ptr [rbp - 160], rdx | |
mov qword ptr [rbp - 152], rcx | |
mov rcx, qword ptr [rbp - 168] | |
mov rdx, qword ptr [rbp - 128] | |
mov qword ptr [rbp - 296], rax | |
jmp .LBB0_4 | |
.p2align 4, 0x90 | |
.LBB0_15: | |
.loc 1 0 3 | |
mov rsi, qword ptr [rbp - 216] | |
mov rax, qword ptr [rbp - 224] | |
mov rdx, qword ptr [rbp - 256] | |
mov rcx, qword ptr [rbp - 264] | |
mov r15, qword ptr [rbp - 240] | |
.loc 1 4 3 | |
add qword ptr [rbp - 152], rsi | |
add qword ptr [rbp - 160], rsi | |
add rdx, rax | |
sub rcx, rax | |
cmp rdx, r15 | |
jge .LBB0_16 | |
.LBB0_4: | |
cmp rcx, 4 | |
mov esi, 4 | |
mov rdi, qword ptr [rbp - 248] | |
mov eax, 1 | |
mov qword ptr [rbp - 256], rdx | |
mov qword ptr [rbp - 264], rcx | |
cmovl rsi, rcx | |
cmp rsi, 2 | |
cmovl rsi, rax | |
sub r15, rdx | |
mov eax, 4 | |
cmp r15, 4 | |
mov qword ptr [rbp - 352], rsi | |
mov qword ptr [rbp - 336], r15 | |
cmovl rax, r15 | |
shl rdx, 4 | |
shl rax, 4 | |
sub rdi, rdx | |
cmp rax, rdi | |
cmovl rdi, rax | |
cmp dword ptr [rbp - 232], 24 | |
ja .LBB0_15 | |
.loc 1 0 3 | |
mov rcx, qword ptr [rbp - 152] | |
mov rax, qword ptr [rbp - 160] | |
mov qword ptr [rbp - 80], rcx | |
mov rcx, qword ptr [rbp - 208] | |
mov qword ptr [rbp - 88], rax | |
jmp .LBB0_6 | |
.p2align 4, 0x90 | |
.LBB0_14: | |
mov rcx, qword ptr [rbp - 304] | |
mov rax, qword ptr [rbp - 80] | |
mov rdx, qword ptr [rbp - 88] | |
.loc 1 4 3 | |
add rax, qword ptr [rbp - 272] | |
add rdx, qword ptr [rbp - 288] | |
add rcx, qword ptr [rbp - 280] | |
mov qword ptr [rbp - 80], rax | |
mov qword ptr [rbp - 88], rdx | |
cmp rcx, 3200 | |
jge .LBB0_15 | |
.LBB0_6: | |
.loc 1 0 3 | |
cmp qword ptr [rbp - 296], 0 | |
mov qword ptr [rbp - 304], rcx | |
.loc 1 4 3 | |
jle .LBB0_14 | |
.loc 1 0 3 | |
mov rax, qword ptr [rbp - 88] | |
mov rdx, qword ptr [rbp - 80] | |
xor ecx, ecx | |
mov qword ptr [rbp - 104], rax | |
mov qword ptr [rbp - 96], rdx | |
jmp .LBB0_8 | |
.p2align 4, 0x90 | |
.LBB0_13: | |
mov rcx, qword ptr [rbp - 344] | |
mov rax, qword ptr [rbp - 96] | |
mov rdx, qword ptr [rbp - 104] | |
.loc 1 4 3 | |
add rax, qword ptr [rbp - 320] | |
add rdx, qword ptr [rbp - 312] | |
inc rcx | |
mov qword ptr [rbp - 96], rax | |
mov qword ptr [rbp - 104], rdx | |
cmp rcx, qword ptr [rbp - 328] | |
je .LBB0_14 | |
.LBB0_8: | |
.loc 1 0 3 | |
cmp qword ptr [rbp - 336], 0 | |
mov qword ptr [rbp - 344], rcx | |
.loc 1 4 3 | |
jle .LBB0_13 | |
.loc 1 0 3 | |
mov r13, qword ptr [rbp - 104] | |
mov r12, qword ptr [rbp - 96] | |
xor r8d, r8d | |
.p2align 4, 0x90 | |
.LBB0_10: | |
.loc 1 4 3 | |
mov rax, r8 | |
shl rax, 4 | |
mov r14, rdi | |
mov rdx, r12 | |
sub r14, rax | |
xor eax, eax | |
test r14, r14 | |
setg al | |
neg eax | |
kmovd k1, eax | |
xor eax, eax | |
cmp r14, 2 | |
setge al | |
kmovw word ptr [rbp - 120], k1 | |
neg eax | |
kmovd k1, eax | |
xor eax, eax | |
cmp r14, 3 | |
setge al | |
kmovw word ptr [rbp - 112], k1 | |
neg eax | |
kmovd k1, eax | |
xor eax, eax | |
cmp r14, 4 | |
setge al | |
kmovw word ptr [rbp - 56], k1 | |
neg eax | |
kmovd k1, eax | |
xor eax, eax | |
cmp r14, 5 | |
setge al | |
kmovw word ptr [rbp - 54], k1 | |
neg eax | |
kmovd k1, eax | |
xor eax, eax | |
cmp r14, 6 | |
setge al | |
kmovw word ptr [rbp - 52], k1 | |
neg eax | |
kmovd k1, eax | |
xor eax, eax | |
cmp r14, 7 | |
setge al | |
kmovw word ptr [rbp - 50], k1 | |
neg eax | |
kmovd k1, eax | |
xor eax, eax | |
cmp r14, 8 | |
setge al | |
xor esi, esi | |
kmovw word ptr [rbp - 48], k1 | |
neg eax | |
cmp r14, 9 | |
setge sil | |
xor ebx, ebx | |
kmovd k1, eax | |
neg esi | |
cmp r14, 10 | |
kmovw word ptr [rbp - 46], k1 | |
setge bl | |
xor r9d, r9d | |
kmovd k1, esi | |
neg ebx | |
cmp r14, 11 | |
kmovw word ptr [rbp - 44], k1 | |
setge r9b | |
xor r15d, r15d | |
kmovd k1, ebx | |
neg r9d | |
cmp r14, 12 | |
kmovw word ptr [rbp - 42], k1 | |
setge r15b | |
xor r11d, r11d | |
kmovd k4, r9d | |
neg r15d | |
cmp r14, 13 | |
setge r11b | |
xor eax, eax | |
kmovd k5, r15d | |
neg r11d | |
cmp r14, 14 | |
setge al | |
xor r10d, r10d | |
kmovd k6, r11d | |
neg eax | |
cmp r14, 15 | |
setge r10b | |
xor ecx, ecx | |
kmovd k7, eax | |
neg r10d | |
cmp r14, 16 | |
mov r14, -16 | |
setge cl | |
kmovd k1, r10d | |
neg ecx | |
kmovd k2, ecx | |
.p2align 4, 0x90 | |
.LBB0_11: | |
.loc 1 0 3 | |
kmovw k3, word ptr [rbp - 120] | |
.loc 1 4 3 | |
vmovups zmm10 {k4} {z}, zmmword ptr [r13 + 4*r14 - 63936] | |
vmovups zmm11 {k5} {z}, zmmword ptr [r13 + 4*r14 - 51136] | |
vmovups zmm12 {k6} {z}, zmmword ptr [r13 + 4*r14 - 38336] | |
vmovups zmm13 {k7} {z}, zmmword ptr [r13 + 4*r14 - 25536] | |
vmovups zmm14 {k1} {z}, zmmword ptr [r13 + 4*r14 - 12736] | |
vmovups zmm15 {k2} {z}, zmmword ptr [r13 + 4*r14 + 64] | |
vmovups zmm0 {k3} {z}, zmmword ptr [r13 + 4*r14 - 191936] | |
kmovw k3, word ptr [rbp - 112] | |
vunpcklps zmm17, zmm10, zmm11 | |
vunpckhps zmm10, zmm10, zmm11 | |
vunpcklps zmm11, zmm12, zmm13 | |
vunpckhps zmm12, zmm12, zmm13 | |
vunpcklps zmm13, zmm14, zmm15 | |
vunpckhps zmm14, zmm14, zmm15 | |
vunpcklpd zmm20, zmm11, zmm13 | |
vunpckhpd zmm11, zmm11, zmm13 | |
vunpcklpd zmm13, zmm12, zmm14 | |
vunpckhpd zmm12, zmm12, zmm14 | |
vmovups zmm1 {k3} {z}, zmmword ptr [r13 + 4*r14 - 179136] | |
kmovw k3, word ptr [rbp - 56] | |
vmovups zmm2 {k3} {z}, zmmword ptr [r13 + 4*r14 - 166336] | |
kmovw k3, word ptr [rbp - 54] | |
vunpcklps zmm16, zmm0, zmm1 | |
vunpckhps zmm0, zmm0, zmm1 | |
vmovups zmm3 {k3} {z}, zmmword ptr [r13 + 4*r14 - 153536] | |
kmovw k3, word ptr [rbp - 52] | |
vmovups zmm4 {k3} {z}, zmmword ptr [r13 + 4*r14 - 140736] | |
kmovw k3, word ptr [rbp - 50] | |
vunpcklps zmm1, zmm2, zmm3 | |
vunpckhps zmm2, zmm2, zmm3 | |
vunpcklpd zmm15, zmm16, zmm1 | |
vunpckhpd zmm1, zmm16, zmm1 | |
vunpcklpd zmm16, zmm0, zmm2 | |
vunpckhpd zmm0, zmm0, zmm2 | |
vmovups zmm5 {k3} {z}, zmmword ptr [r13 + 4*r14 - 127936] | |
kmovw k3, word ptr [rbp - 48] | |
vmovups zmm6 {k3} {z}, zmmword ptr [r13 + 4*r14 - 115136] | |
kmovw k3, word ptr [rbp - 46] | |
vunpcklps zmm3, zmm4, zmm5 | |
vunpckhps zmm4, zmm4, zmm5 | |
vmovups zmm7 {k3} {z}, zmmword ptr [r13 + 4*r14 - 102336] | |
kmovw k3, word ptr [rbp - 44] | |
vmovups zmm8 {k3} {z}, zmmword ptr [r13 + 4*r14 - 89536] | |
kmovw k3, word ptr [rbp - 42] | |
vunpcklps zmm5, zmm6, zmm7 | |
vunpckhps zmm6, zmm6, zmm7 | |
vunpcklpd zmm2, zmm3, zmm5 | |
vunpckhpd zmm3, zmm3, zmm5 | |
vunpcklpd zmm5, zmm4, zmm6 | |
vunpckhpd zmm4, zmm4, zmm6 | |
vmovups zmm9 {k3} {z}, zmmword ptr [r13 + 4*r14 - 76736] | |
vshuff64x2 zmm14, zmm15, zmm2, 136 | |
vshuff64x2 zmm19, zmm0, zmm4, 136 | |
vshuff64x2 zmm18, zmm16, zmm5, 136 | |
vshuff64x2 zmm2, zmm15, zmm2, 221 | |
vshuff64x2 zmm0, zmm0, zmm4, 221 | |
add r14, 16 | |
vunpcklps zmm7, zmm8, zmm9 | |
vunpckhps zmm8, zmm8, zmm9 | |
vunpcklpd zmm21, zmm8, zmm10 | |
vunpcklpd zmm6, zmm7, zmm17 | |
vunpckhpd zmm7, zmm7, zmm17 | |
vunpckhpd zmm8, zmm8, zmm10 | |
vshuff64x2 zmm17, zmm1, zmm3, 136 | |
vshuff64x2 zmm1, zmm1, zmm3, 221 | |
vshuff64x2 zmm3, zmm16, zmm5, 221 | |
vshuff64x2 zmm4, zmm6, zmm20, 136 | |
vshuff64x2 zmm6, zmm6, zmm20, 221 | |
vshuff64x2 zmm5, zmm7, zmm11, 136 | |
vshuff64x2 zmm16, zmm8, zmm12, 136 | |
vshuff64x2 zmm7, zmm7, zmm11, 221 | |
vshuff64x2 zmm9, zmm21, zmm13, 221 | |
vshuff64x2 zmm15, zmm21, zmm13, 136 | |
vshuff64x2 zmm8, zmm8, zmm12, 221 | |
vshuff64x2 zmm10, zmm14, zmm4, 136 | |
vshuff64x2 zmm11, zmm17, zmm5, 136 | |
vshuff64x2 zmm20, zmm2, zmm6, 136 | |
vshuff64x2 zmm21, zmm1, zmm7, 136 | |
vshuff64x2 zmm22, zmm3, zmm9, 136 | |
vshuff64x2 zmm23, zmm0, zmm8, 136 | |
vshuff64x2 zmm4, zmm14, zmm4, 221 | |
vshuff64x2 zmm12, zmm18, zmm15, 136 | |
vshuff64x2 zmm14, zmm18, zmm15, 221 | |
vshuff64x2 zmm13, zmm19, zmm16, 136 | |
vshuff64x2 zmm5, zmm17, zmm5, 221 | |
vshuff64x2 zmm15, zmm19, zmm16, 221 | |
vshuff64x2 zmm2, zmm2, zmm6, 221 | |
vshuff64x2 zmm1, zmm1, zmm7, 221 | |
vshuff64x2 zmm3, zmm3, zmm9, 221 | |
vshuff64x2 zmm0, zmm0, zmm8, 221 | |
vmovapd zmmword ptr [rdx - 960], zmm10 | |
vmovapd zmmword ptr [rdx - 896], zmm11 | |
vmovapd zmmword ptr [rdx - 832], zmm12 | |
vmovapd zmmword ptr [rdx - 768], zmm13 | |
vmovapd zmmword ptr [rdx - 704], zmm20 | |
vmovapd zmmword ptr [rdx - 640], zmm21 | |
vmovapd zmmword ptr [rdx - 576], zmm22 | |
vmovapd zmmword ptr [rdx - 512], zmm23 | |
vmovapd zmmword ptr [rdx - 448], zmm4 | |
vmovapd zmmword ptr [rdx - 384], zmm5 | |
vmovapd zmmword ptr [rdx - 320], zmm14 | |
vmovapd zmmword ptr [rdx - 256], zmm15 | |
vmovapd zmmword ptr [rdx - 192], zmm2 | |
vmovapd zmmword ptr [rdx - 128], zmm1 | |
vmovapd zmmword ptr [rdx - 64], zmm3 | |
vmovapd zmmword ptr [rdx], zmm0 | |
add rdx, 1024 | |
cmp r14, 112 | |
jb .LBB0_11 | |
inc r8 | |
add r12, 204800 | |
add r13, 204800 | |
cmp r8, qword ptr [rbp - 352] | |
jne .LBB0_10 | |
jmp .LBB0_13 | |
.LBB0_17: | |
xor eax, eax | |
.loc 1 4 3 epilogue_begin | |
add rsp, 184 | |
pop rbx | |
pop r12 | |
pop r13 | |
pop r14 | |
pop r15 | |
pop rbp | |
.cfi_def_cfa rsp, 8 | |
vzeroupper | |
ret | |
.Ltmp1: | |
.Lfunc_end0: | |
.size turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32, .Lfunc_end0-turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32 | |
.cfi_endproc | |
.section .rodata.cst16,"aM",@progbits,16 | |
.p2align 4, 0x0 | |
.LCPI1_0: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 26 | |
.short 2 | |
.short 26 | |
.short 3 | |
.short 27 | |
.LCPI1_1: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 25 | |
.short 2 | |
.short 0 | |
.short 0 | |
.short 24 | |
.LCPI1_6: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 9 | |
.short 2 | |
.short 0 | |
.short 0 | |
.short 8 | |
.LCPI1_15: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 10 | |
.short 2 | |
.short 10 | |
.short 3 | |
.short 11 | |
.LCPI1_95: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 16 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_96: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 16 | |
.byte 0 | |
.byte 0 | |
.LCPI1_97: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 17 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_98: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 17 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_99: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 16 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_101: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 17 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_103: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 20 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_104: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 17 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_105: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 20 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_107: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 18 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_108: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 18 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_109: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 18 | |
.byte 0 | |
.byte 0 | |
.LCPI1_111: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 20 | |
.byte 0 | |
.byte 0 | |
.LCPI1_112: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 22 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_113: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 22 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_114: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 22 | |
.byte 0 | |
.byte 0 | |
.LCPI1_115: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 24 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_116: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 24 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_117: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 24 | |
.byte 0 | |
.byte 0 | |
.LCPI1_118: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 17 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_119: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 26 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_120: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 26 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_121: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 26 | |
.byte 0 | |
.byte 0 | |
.LCPI1_122: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 16 | |
.LCPI1_123: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 19 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_124: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 21 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_125: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 23 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_126: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 29 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_127: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 31 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_128: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 27 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_129: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 28 | |
.byte 0 | |
.byte 0 | |
.LCPI1_130: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 28 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_131: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 28 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_132: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 29 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_133: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 29 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_134: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 29 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_135: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 29 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_136: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 30 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_137: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 30 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_138: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 19 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_139: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 21 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_140: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 23 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_141: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 31 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_142: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 19 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_143: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 21 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_144: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 23 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_145: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 25 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_146: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 31 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_147: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 19 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_148: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 31 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_149: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 21 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_150: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 23 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_151: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 25 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_152: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 19 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_153: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 21 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_154: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 23 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_155: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 25 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_156: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 27 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_157: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 31 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_158: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 17 | |
.byte 0 | |
.byte 0 | |
.LCPI1_159: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 17 | |
.byte 0 | |
.LCPI1_160: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 19 | |
.byte 0 | |
.byte 0 | |
.LCPI1_161: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 21 | |
.byte 0 | |
.byte 0 | |
.LCPI1_162: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 23 | |
.byte 0 | |
.byte 0 | |
.LCPI1_163: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 25 | |
.byte 0 | |
.byte 0 | |
.LCPI1_164: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 27 | |
.byte 0 | |
.byte 0 | |
.LCPI1_165: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 30 | |
.byte 0 | |
.byte 0 | |
.LCPI1_166: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 31 | |
.byte 0 | |
.byte 0 | |
.LCPI1_167: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 19 | |
.byte 0 | |
.LCPI1_168: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 31 | |
.byte 0 | |
.LCPI1_169: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 21 | |
.byte 0 | |
.LCPI1_170: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 23 | |
.byte 0 | |
.LCPI1_171: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 25 | |
.byte 0 | |
.LCPI1_172: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 27 | |
.byte 0 | |
.LCPI1_173: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 29 | |
.byte 0 | |
.LCPI1_174: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 17 | |
.LCPI1_175: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 18 | |
.LCPI1_176: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 19 | |
.LCPI1_177: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 20 | |
.LCPI1_178: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 21 | |
.LCPI1_179: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 22 | |
.LCPI1_180: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 23 | |
.LCPI1_181: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 24 | |
.LCPI1_182: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 25 | |
.LCPI1_183: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 26 | |
.LCPI1_184: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 27 | |
.LCPI1_185: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 28 | |
.LCPI1_186: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 29 | |
.LCPI1_187: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 30 | |
.section .rodata.cst32,"aM",@progbits,32 | |
.p2align 5, 0x0 | |
.LCPI1_2: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 16 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_3: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 16 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_4: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 16 | |
.zero 2 | |
.zero 2 | |
.LCPI1_5: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 16 | |
.LCPI1_7: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 17 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_8: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 17 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_9: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 17 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_10: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 17 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_11: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 17 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_12: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 17 | |
.zero 2 | |
.zero 2 | |
.LCPI1_13: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 17 | |
.zero 2 | |
.LCPI1_14: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 17 | |
.LCPI1_16: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 18 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_17: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 18 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_18: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 18 | |
.zero 2 | |
.zero 2 | |
.LCPI1_19: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 18 | |
.LCPI1_20: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 19 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_21: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 19 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_22: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 19 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_23: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 19 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_24: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 19 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_25: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 19 | |
.zero 2 | |
.zero 2 | |
.LCPI1_26: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 19 | |
.zero 2 | |
.LCPI1_27: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 19 | |
.LCPI1_28: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 20 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_29: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 20 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_30: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 20 | |
.zero 2 | |
.zero 2 | |
.LCPI1_31: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 20 | |
.LCPI1_32: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 21 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_33: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 21 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_34: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 21 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_35: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 21 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_36: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 21 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_37: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 21 | |
.zero 2 | |
.zero 2 | |
.LCPI1_38: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 21 | |
.zero 2 | |
.LCPI1_39: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 21 | |
.LCPI1_40: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 22 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_41: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 22 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_42: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 22 | |
.zero 2 | |
.zero 2 | |
.LCPI1_43: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 22 | |
.LCPI1_44: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 23 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_45: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 23 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_46: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 23 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_47: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 23 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_48: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 23 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_49: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 23 | |
.zero 2 | |
.zero 2 | |
.LCPI1_50: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 23 | |
.zero 2 | |
.LCPI1_51: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 23 | |
.LCPI1_52: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 24 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_53: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 24 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_54: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 24 | |
.zero 2 | |
.zero 2 | |
.LCPI1_55: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 24 | |
.LCPI1_56: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 25 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_57: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 25 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_58: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 25 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_59: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 25 | |
.zero 2 | |
.zero 2 | |
.LCPI1_60: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 25 | |
.zero 2 | |
.LCPI1_61: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 25 | |
.LCPI1_62: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 26 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_63: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 26 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_64: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 26 | |
.zero 2 | |
.zero 2 | |
.LCPI1_65: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 26 | |
.LCPI1_67: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 27 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_68: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 27 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_69: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 27 | |
.zero 2 | |
.zero 2 | |
.LCPI1_70: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 27 | |
.zero 2 | |
.LCPI1_71: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 27 | |
.LCPI1_72: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 28 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_73: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 28 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_74: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 28 | |
.zero 2 | |
.zero 2 | |
.LCPI1_75: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 28 | |
.LCPI1_76: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 29 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_77: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 29 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_78: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 29 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_79: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 29 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_80: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 29 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_81: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 29 | |
.zero 2 | |
.LCPI1_82: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 29 | |
.LCPI1_83: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 30 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_84: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 30 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_85: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 30 | |
.zero 2 | |
.zero 2 | |
.LCPI1_86: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 30 | |
.LCPI1_87: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 31 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_88: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 31 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_89: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 31 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_90: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 31 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_91: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 31 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_92: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 31 | |
.zero 2 | |
.zero 2 | |
.LCPI1_93: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 31 | |
.zero 2 | |
.LCPI1_94: | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.short 11 | |
.short 27 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.section .rodata.cst4,"aM",@progbits,4 | |
.p2align 2, 0x0 | |
.LCPI1_66: | |
.short 11 | |
.short 27 | |
.section .rodata.cst8,"aM",@progbits,8 | |
.LCPI1_100: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 26 | |
.byte 2 | |
.byte 26 | |
.byte 3 | |
.byte 27 | |
.LCPI1_102: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 9 | |
.byte 2 | |
.byte 0 | |
.byte 0 | |
.byte 8 | |
.LCPI1_106: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 10 | |
.byte 2 | |
.byte 10 | |
.byte 3 | |
.byte 11 | |
.LCPI1_110: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 25 | |
.byte 2 | |
.byte 0 | |
.byte 0 | |
.byte 24 | |
.section .text.turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack,"ax",@progbits | |
.p2align 4, 0x90 | |
.type turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack,@function | |
turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack: | |
.Lfunc_begin1: | |
.loc 1 1 0 is_stmt 1 | |
.cfi_startproc | |
push rbp | |
.cfi_def_cfa_offset 16 | |
.cfi_offset rbp, -16 | |
mov rbp, rsp | |
.cfi_def_cfa_register rbp | |
.Ltmp2: | |
push r15 | |
push r14 | |
push r13 | |
push r12 | |
push rbx | |
and rsp, -32 | |
sub rsp, 1792 | |
.cfi_offset rbx, -56 | |
.cfi_offset r12, -48 | |
.cfi_offset r13, -40 | |
.cfi_offset r14, -32 | |
.cfi_offset r15, -24 | |
.loc 1 4 3 prologue_end | |
mov r8, qword ptr [rsi + 24] | |
movzx edi, word ptr [rdx + 8] | |
mov r14d, dword ptr [r8 + 12] | |
mov ecx, dword ptr [r8 + 8] | |
mov r9d, edi | |
shl r9d, 6 | |
mov qword ptr [rsp + 16], r9 | |
shl r14, 32 | |
lea rax, [r14 + rcx] | |
mov qword ptr [rsp + 32], rax | |
cmp r9, rax | |
jge .LBB1_14 | |
.loc 1 0 3 is_stmt 0 | |
mov rbx, qword ptr [rsi + 32] | |
mov r9d, 15361 | |
mov r11d, dword ptr [rsi + 12] | |
mov eax, dword ptr [rsi + 16] | |
movzx r10d, word ptr [rsi + 20] | |
mov r15d, 3538944000 | |
.loc 1 4 3 | |
or r14, rcx | |
vpmovsxbw ymm28, xmmword ptr [rip + .LCPI1_95] | |
vpmovsxbw ymm11, xmmword ptr [rip + .LCPI1_96] | |
vpmovsxbw ymm26, xmmword ptr [rip + .LCPI1_97] | |
vpmovsxbw ymm22, xmmword ptr [rip + .LCPI1_98] | |
sub r14, qword ptr [rsp + 16] | |
bextr r9, qword ptr [r8], r9 | |
imul rdi, r15 | |
mov rsi, qword ptr [rbx] | |
mov r8, qword ptr [rbx + 8] | |
mov ebx, dword ptr [rdx] | |
mov edx, dword ptr [rdx + 4] | |
imul r15, r10 | |
shl r10d, 6 | |
mov r12, r11 | |
shl r12, 6 | |
mov qword ptr [rsp + 64], r10 | |
mov qword ptr [rsp + 72], r15 | |
imul rcx, rdx, 409600 | |
lea r10, [4*rdx] | |
mov qword ptr [rsp + 80], rdx | |
mov rdx, rbx | |
shl rdx, 11 | |
mov qword ptr [rsp + 120], rbx | |
mov qword ptr [rsp + 56], r10 | |
add rdi, rcx | |
add rdx, rdi | |
lea rdi, [4*rax] | |
imul rax, rax, 409600 | |
lea rdx, [rdx + 2*r9] | |
mov qword ptr [rsp + 112], rdi | |
mov rdi, rbx | |
shl rdi, 6 | |
lea rdx, [r8 + rdx + 307680] | |
mov qword ptr [rsp + 104], rax | |
mov r8, r11 | |
shl r8, 11 | |
shl r11, 7 | |
mov qword ptr [rsp + 96], rdi | |
mov qword ptr [rsp + 24], rdx | |
mov rdx, rbx | |
shl rdx, 7 | |
add rdx, rcx | |
lea rax, [rsi + rdx + 403200] | |
mov qword ptr [rsp + 48], rax | |
jmp .LBB1_2 | |
.p2align 4, 0x90 | |
.LBB1_13: | |
.loc 1 0 3 | |
mov rdx, qword ptr [rsp + 24] | |
mov rax, qword ptr [rsp + 16] | |
mov rcx, qword ptr [rsp + 64] | |
mov r14, qword ptr [rsp + 88] | |
.loc 1 4 3 | |
add rdx, qword ptr [rsp + 72] | |
add rax, rcx | |
sub r14, rcx | |
mov qword ptr [rsp + 24], rdx | |
mov qword ptr [rsp + 16], rax | |
cmp rax, qword ptr [rsp + 32] | |
jge .LBB1_14 | |
.LBB1_2: | |
cmp r14, 64 | |
mov edx, 64 | |
mov eax, 1 | |
mov qword ptr [rsp + 88], r14 | |
cmovl rdx, r14 | |
cmp rdx, 2 | |
cmovl rdx, rax | |
cmp dword ptr [rsp + 80], 134 | |
ja .LBB1_13 | |
.loc 1 0 3 | |
mov rcx, qword ptr [rsp + 32] | |
mov rax, qword ptr [rsp + 48] | |
mov rdi, qword ptr [rsp + 24] | |
mov r10, qword ptr [rsp + 56] | |
.loc 1 4 3 | |
sub rcx, qword ptr [rsp + 16] | |
mov qword ptr [rsp + 40], rax | |
jmp .LBB1_4 | |
.p2align 4, 0x90 | |
.LBB1_12: | |
.loc 1 0 3 | |
mov rax, qword ptr [rsp + 104] | |
.loc 1 4 3 | |
add r10, qword ptr [rsp + 112] | |
add qword ptr [rsp + 40], rax | |
add rdi, rax | |
cmp r10, 540 | |
jge .LBB1_13 | |
.LBB1_4: | |
.loc 1 0 3 | |
cmp dword ptr [rsp + 120], 49 | |
.loc 1 4 3 | |
ja .LBB1_12 | |
.loc 1 0 3 | |
mov r9, qword ptr [rsp + 40] | |
mov rax, qword ptr [rsp + 96] | |
mov rbx, rdi | |
jmp .LBB1_6 | |
.p2align 4, 0x90 | |
.LBB1_11: | |
.loc 1 4 3 | |
add rax, r12 | |
add rbx, r8 | |
add r9, r11 | |
cmp rax, 3200 | |
jge .LBB1_12 | |
.LBB1_6: | |
.loc 1 0 3 | |
test rcx, rcx | |
.loc 1 4 3 | |
jle .LBB1_11 | |
.loc 1 0 3 | |
mov r14, rbx | |
xor esi, esi | |
.p2align 4, 0x90 | |
.LBB1_8: | |
mov r13, -16 | |
mov r15, r14 | |
.p2align 4, 0x90 | |
.LBB1_9: | |
.loc 1 4 3 | |
vpbroadcastw ymm0, word ptr [r9 + 2*r13 - 403148] | |
vpbroadcastw xmm1, word ptr [r9 + 2*r13 - 396748] | |
vmovdqa64 xmm23, xmmword ptr [r9 + 2*r13 - 403152] | |
vmovdqa64 xmm16, xmmword ptr [r9 + 2*r13 - 390352] | |
vpbroadcastw xmm2, word ptr [r9 + 2*r13 - 191948] | |
vmovdqa xmm13, xmmword ptr [r9 + 2*r13 - 403168] | |
vmovdqa xmm12, xmmword ptr [r9 + 2*r13 - 396768] | |
vmovdqa xmm5, xmmword ptr [r9 + 2*r13 - 390368] | |
vmovdqa xmm6, xmmword ptr [r9 + 2*r13 - 383968] | |
vmovdqa xmm14, xmmword ptr [r9 + 2*r13 - 377568] | |
vpmovsxbw ymm27, xmmword ptr [rip + .LCPI1_99] | |
vmovdqa ymm10, ymmword ptr [r9 + 2*r13 - 345568] | |
vmovdqa64 ymm7, ymm22 | |
vmovdqa64 xmm22, xmmword ptr [r9 + 2*r13 - 326368] | |
vmovdqa ymm15, ymmword ptr [r9 + 2*r13 - 319968] | |
vpmovsxbw xmm25, qword ptr [rip + .LCPI1_100] | |
vmovdqa64 ymm17, ymm26 | |
vpmovsxbw ymm29, xmmword ptr [rip + .LCPI1_101] | |
vpunpcklwd xmm0, xmm0, xmm1 | |
vpbroadcastd ymm1, dword ptr [r9 + 2*r13 - 390348] | |
vmovdqa64 ymm21, ymm27 | |
vpblendd xmm0, xmm0, xmm1, 2 | |
vmovdqa xmm1, xmmword ptr [r9 + 2*r13 - 396752] | |
vmovdqa ymmword ptr [rsp + 320], ymm0 | |
vpsrld xmm0, xmm23, 16 | |
vpblendw xmm0, xmm1, xmm0, 1 | |
vmovdqa64 xmm31, xmm1 | |
vpsrld xmm1, xmm16, 16 | |
vpunpckldq xmm24, xmm0, xmm1 | |
vpbroadcastw ymm0, word ptr [r9 + 2*r13 - 300748] | |
vpbroadcastw xmm1, word ptr [r9 + 2*r13 - 294348] | |
vpunpcklwd xmm0, xmm0, xmm1 | |
vpbroadcastd ymm1, dword ptr [r9 + 2*r13 - 287948] | |
vpblendd xmm0, xmm0, xmm1, 2 | |
vmovdqa xmm1, xmmword ptr [r9 + 2*r13 - 294352] | |
vmovdqa ymmword ptr [rsp + 288], ymm0 | |
vmovdqa xmm0, xmmword ptr [r9 + 2*r13 - 300752] | |
vmovdqa xmmword ptr [rsp + 192], xmm1 | |
vmovdqa xmmword ptr [rsp + 224], xmm0 | |
vpsrld xmm0, xmm0, 16 | |
vpblendw xmm0, xmm1, xmm0, 1 | |
vmovdqa xmm1, xmmword ptr [r9 + 2*r13 - 287952] | |
vmovdqa xmmword ptr [rsp + 160], xmm1 | |
vpsrld xmm1, xmm1, 16 | |
vpunpckldq xmm0, xmm0, xmm1 | |
vpbroadcastd ymm1, dword ptr [r9 + 2*r13 - 185548] | |
vmovdqa ymmword ptr [rsp + 256], ymm0 | |
vpbroadcastw ymm0, word ptr [r9 + 2*r13 - 198348] | |
vpunpcklwd xmm0, xmm0, xmm2 | |
vpunpcklwd xmm2, xmm13, xmm12 | |
vpblendd xmm0, xmm0, xmm1, 2 | |
vmovdqa xmm1, xmmword ptr [r9 + 2*r13 - 191952] | |
vpunpckldq xmm4, xmm2, xmm5 | |
vmovaps xmm2, xmmword ptr [r9 + 2*r13 - 364768] | |
vmovdqa ymmword ptr [rsp + 128], ymm0 | |
vmovdqa xmm0, xmmword ptr [r9 + 2*r13 - 198352] | |
insertq xmm4, xmm6, 16, 48 | |
vmovdqa xmmword ptr [rsp + 384], xmm1 | |
vmovdqa xmmword ptr [rsp + 448], xmm0 | |
vpsrld xmm0, xmm0, 16 | |
vpblendw xmm0, xmm1, xmm0, 1 | |
vmovdqa xmm1, xmmword ptr [r9 + 2*r13 - 185552] | |
vmovdqa xmmword ptr [rsp + 352], xmm1 | |
vpsrld xmm1, xmm1, 16 | |
vpunpckldq xmm0, xmm0, xmm1 | |
vpbroadcastw xmm1, word ptr [r9 + 2*r13 - 89548] | |
vmovdqa ymmword ptr [rsp + 416], ymm0 | |
vpbroadcastw ymm0, word ptr [r9 + 2*r13 - 95948] | |
vpunpcklwd xmm0, xmm0, xmm1 | |
vpbroadcastd ymm1, dword ptr [r9 + 2*r13 - 83148] | |
vpblendd xmm0, xmm0, xmm1, 2 | |
vmovdqa xmm1, xmmword ptr [r9 + 2*r13 - 89552] | |
vmovdqa ymmword ptr [rsp + 1728], ymm0 | |
vmovdqa xmm0, xmmword ptr [r9 + 2*r13 - 95952] | |
vmovdqa xmmword ptr [rsp + 768], xmm1 | |
vmovdqa xmmword ptr [rsp + 784], xmm0 | |
vpsrld xmm0, xmm0, 16 | |
vpblendw xmm0, xmm1, xmm0, 1 | |
vmovdqa xmm1, xmmword ptr [r9 + 2*r13 - 83152] | |
vpsrld xmm3, xmm1, 16 | |
vmovdqa xmmword ptr [rsp + 752], xmm1 | |
vpmovsxbw xmm1, qword ptr [rip + .LCPI1_102] | |
vpunpckldq xmm0, xmm0, xmm3 | |
vmovdqa xmm3, xmmword ptr [r9 + 2*r13 - 371168] | |
vmovdqa ymmword ptr [rsp + 1696], ymm0 | |
vpunpcklqdq xmm0, xmm4, xmm14 | |
vpbroadcastw xmm4, xmm3 | |
vpblendw xmm4, xmm0, xmm4, 32 | |
vpbroadcastd ymm0, dword ptr [r9 + 2*r13 - 339168] | |
vinsertps xmm8, xmm4, xmm2, 48 | |
vmovdqa xmm4, xmmword ptr [r9 + 2*r13 - 358368] | |
vpbroadcastw xmm9, xmm4 | |
vpblendw xmm8, xmm8, xmm9, 128 | |
vinserti128 ymm9, ymm8, xmmword ptr [r9 + 2*r13 - 351968], 1 | |
vinserti32x4 ymm8, ymm8, xmm22, 1 | |
vpermt2w ymm9, ymm27, ymm10 | |
vmovdqa64 ymm27, ymm7 | |
vpmovsxbw ymm7, xmmword ptr [rip + .LCPI1_103] | |
vpblendd ymm0, ymm9, ymm0, 32 | |
vmovdqa ymm9, ymmword ptr [r9 + 2*r13 - 332768] | |
vpermt2w ymm0, ymm28, ymm9 | |
vpmovsxbw ymm28, xmmword ptr [rip + .LCPI1_104] | |
vshufpd ymm0, ymm0, ymm8, 2 | |
vpmovsxbw ymm8, xmmword ptr [rip + .LCPI1_105] | |
vpermt2w ymm0, ymm11, ymm15 | |
vpbroadcastd ymm11, dword ptr [r9 + 2*r13 - 313568] | |
vpblendd ymm0, ymm0, ymm11, 128 | |
vpsrld xmm11, xmm5, 16 | |
vmovdqa ymmword ptr [rsp + 704], ymm0 | |
vpsrld xmm0, xmm13, 16 | |
vpblendw xmm0, xmm0, xmm12, 2 | |
vpunpckldq xmm0, xmm0, xmm11 | |
vpsrld xmm11, xmm14, 16 | |
vpermt2w xmm0, xmm1, xmm6 | |
vpmovsxbw xmm1, qword ptr [rip + .LCPI1_106] | |
vpunpcklqdq xmm0, xmm0, xmm11 | |
vpbroadcastw xmm11, word ptr [r9 + 2*r13 - 371166] | |
vpblendw xmm0, xmm0, xmm11, 32 | |
vpslldq xmm11, xmm2, 10 | |
vpblendd xmm0, xmm0, xmm11, 8 | |
vpbroadcastw xmm11, word ptr [r9 + 2*r13 - 358366] | |
vpblendw xmm0, xmm0, xmm11, 128 | |
vpbroadcastw xmm11, word ptr [r9 + 2*r13 - 403164] | |
vmovdqa64 ymm30, ymm0 | |
vpbroadcastw xmm0, word ptr [r9 + 2*r13 - 396764] | |
vpunpcklwd xmm0, xmm11, xmm0 | |
vpslldq xmm11, xmm3, 6 | |
vpblendd xmm0, xmm0, xmm5, 2 | |
vpermt2w xmm0, xmm1, xmm6 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_107] | |
vshufps xmm0, xmm0, xmm14, 212 | |
vpblendw xmm0, xmm0, xmm11, 32 | |
vpslldq xmm11, xmm4, 10 | |
vinsertps xmm0, xmm0, xmm2, 112 | |
vpblendw xmm0, xmm0, xmm11, 128 | |
vpbroadcastd ymm11, dword ptr [r9 + 2*r13 - 351964] | |
vpblendd ymm11, ymm0, ymm11, 240 | |
vpermt2w ymm11, ymm1, ymm10 | |
vinserti128 ymm1, ymm0, xmmword ptr [r9 + 2*r13 - 339168], 1 | |
vpbroadcastd ymm0, dword ptr [r9 + 2*r13 - 326364] | |
vpblendd ymm1, ymm11, ymm1, 34 | |
vpmovsxbw ymm11, xmmword ptr [rip + .LCPI1_108] | |
vpermt2w ymm1, ymm11, ymm9 | |
vpmovsxbw ymm11, xmmword ptr [rip + .LCPI1_109] | |
vpblendd ymm0, ymm1, ymm0, 192 | |
vpbroadcastd ymm1, dword ptr [r9 + 2*r13 - 313564] | |
vpermt2w ymm0, ymm11, ymm15 | |
vpbroadcastq ymm11, qword ptr [r9 + 2*r13 - 351960] | |
vpblendd ymm0, ymm0, ymm1, 128 | |
vpsrlq xmm1, xmm13, 48 | |
vmovdqa ymmword ptr [rsp + 672], ymm0 | |
vpsrlq xmm0, xmm12, 48 | |
vpunpcklwd xmm0, xmm1, xmm0 | |
vpsrlq xmm1, xmm5, 48 | |
vpunpckldq xmm0, xmm0, xmm1 | |
vpsrlq xmm1, xmm14, 48 | |
vpblendw xmm0, xmm0, xmm6, 8 | |
vpunpcklqdq xmm0, xmm0, xmm1 | |
vpbroadcastw xmm1, word ptr [r9 + 2*r13 - 371162] | |
vpblendw xmm0, xmm0, xmm1, 32 | |
vpmovzxwd xmm1, xmm2 | |
vpblendd xmm0, xmm0, xmm1, 8 | |
vpbroadcastw xmm1, word ptr [r9 + 2*r13 - 358362] | |
vpblendw xmm0, xmm0, xmm1, 128 | |
vpbroadcastw xmm1, word ptr [r9 + 2*r13 - 403160] | |
vmovdqa64 ymm20, ymm0 | |
vpbroadcastw xmm0, word ptr [r9 + 2*r13 - 396760] | |
vpunpcklwd xmm0, xmm1, xmm0 | |
vpsrldq xmm1, xmm6, 2 | |
vinsertps xmm0, xmm0, xmm5, 156 | |
vpblendw xmm0, xmm0, xmm1, 8 | |
vpslld xmm1, xmm3, 16 | |
vpblendd xmm0, xmm14, xmm0, 3 | |
vpblendw xmm0, xmm0, xmm1, 32 | |
vpsllq xmm1, xmm4, 48 | |
vinsertps xmm0, xmm0, xmm2, 176 | |
vpblendw xmm0, xmm0, xmm1, 128 | |
vpblendd ymm1, ymm0, ymm11, 240 | |
vpbroadcastd ymm11, dword ptr [r9 + 2*r13 - 339160] | |
vinserti32x4 ymm0, ymm0, xmm22, 1 | |
vpermt2w ymm1, ymm8, ymm10 | |
vpmovsxbw xmm8, qword ptr [rip + .LCPI1_110] | |
vpblendd ymm1, ymm1, ymm11, 32 | |
vpbroadcastd ymm11, dword ptr [r9 + 2*r13 - 313560] | |
vmovdqa64 ymm26, ymm8 | |
vpermt2w ymm1, ymm7, ymm9 | |
vmovdqa xmm7, xmmword ptr [r9 + 2*r13 - 371152] | |
vpblendd ymm0, ymm1, ymm0, 204 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_111] | |
vpermt2w ymm0, ymm1, ymm15 | |
vpsrldq xmm1, xmm13, 10 | |
vpblendd ymm0, ymm0, ymm11, 128 | |
vmovdqa64 ymm22, ymm0 | |
vpsrldq xmm0, xmm12, 10 | |
vpunpcklwd xmm0, xmm1, xmm0 | |
vpsrldq xmm1, xmm5, 10 | |
vpunpckldq xmm0, xmm0, xmm1 | |
vpbroadcastw xmm1, word ptr [r9 + 2*r13 - 383958] | |
vpblendw xmm0, xmm0, xmm1, 8 | |
vpsrldq xmm1, xmm14, 10 | |
vpunpcklqdq xmm0, xmm0, xmm1 | |
vpsllq xmm1, xmm2, 16 | |
vpblendw xmm0, xmm0, xmm3, 32 | |
vpblendd xmm0, xmm0, xmm1, 8 | |
vpbroadcastw xmm1, word ptr [r9 + 2*r13 - 358358] | |
vpblendw xmm0, xmm0, xmm1, 128 | |
vpbroadcastw xmm1, word ptr [r9 + 2*r13 - 403156] | |
vmovdqa64 ymm18, ymm0 | |
vpbroadcastw xmm0, word ptr [r9 + 2*r13 - 396756] | |
vpunpcklwd xmm0, xmm1, xmm0 | |
vpsrldq xmm1, xmm6, 6 | |
vpmovsxbw ymm6, xmmword ptr [rip + .LCPI1_112] | |
vinsertps xmm0, xmm0, xmm5, 220 | |
vpblendw xmm0, xmm0, xmm1, 8 | |
vpsrlq xmm1, xmm3, 16 | |
vpslld xmm3, xmm4, 16 | |
vshufps xmm0, xmm0, xmm14, 244 | |
vpblendw xmm0, xmm0, xmm1, 32 | |
vpbroadcastd ymm1, dword ptr [r9 + 2*r13 - 351956] | |
vpblendd xmm0, xmm0, xmm2, 8 | |
vpblendw xmm0, xmm0, xmm3, 128 | |
vpbroadcastd ymm3, dword ptr [r9 + 2*r13 - 339156] | |
vpblendd ymm1, ymm0, ymm1, 240 | |
vinserti128 ymm0, ymm0, xmmword ptr [r9 + 2*r13 - 313568], 1 | |
vpermt2w ymm1, ymm6, ymm10 | |
vpsrldq xmm6, xmm31, 10 | |
vpblendd ymm1, ymm1, ymm3, 32 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_113] | |
vpermt2w ymm1, ymm3, ymm9 | |
vpbroadcastd ymm3, dword ptr [r9 + 2*r13 - 326356] | |
vpblendd ymm1, ymm1, ymm3, 192 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_114] | |
vpermt2w ymm1, ymm3, ymm15 | |
vpsrldq xmm3, xmm13, 14 | |
vpblendd ymm0, ymm1, ymm0, 136 | |
vpsrldq xmm1, xmm5, 14 | |
vmovdqa ymmword ptr [rsp + 640], ymm0 | |
vpsrldq xmm0, xmm12, 14 | |
vmovdqa xmm12, xmmword ptr [r9 + 2*r13 - 383952] | |
vpunpcklwd xmm0, xmm3, xmm0 | |
vpbroadcastd ymm3, dword ptr [r9 + 2*r13 - 339152] | |
vpunpckldq xmm0, xmm0, xmm1 | |
vpbroadcastw xmm1, word ptr [r9 + 2*r13 - 383954] | |
vpblendw xmm0, xmm0, xmm1, 8 | |
vpsrldq xmm1, xmm14, 14 | |
vmovdqa xmm14, xmmword ptr [r9 + 2*r13 - 377552] | |
vpunpcklqdq xmm0, xmm0, xmm1 | |
vpbroadcastw xmm1, word ptr [r9 + 2*r13 - 371154] | |
vpblendw xmm0, xmm0, xmm1, 32 | |
vpsrld xmm1, xmm2, 16 | |
vpblendd xmm0, xmm0, xmm1, 8 | |
vpbroadcastw xmm1, xmm7 | |
vpblendw xmm13, xmm0, xmm4, 128 | |
vpbroadcastw ymm0, word ptr [r9 + 2*r13 - 403152] | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_115] | |
vpunpcklwd xmm0, xmm0, xmm31 | |
vpunpckldq xmm0, xmm0, xmm16 | |
insertq xmm0, xmm12, 16, 48 | |
vpunpcklqdq xmm0, xmm0, xmm14 | |
vpblendw xmm0, xmm0, xmm1, 32 | |
vpbroadcastd xmm1, dword ptr [r9 + 2*r13 - 364752] | |
vpblendd xmm1, xmm0, xmm1, 8 | |
vmovdqa xmm0, xmmword ptr [r9 + 2*r13 - 358352] | |
vpbroadcastw xmm2, xmm0 | |
vpblendw xmm1, xmm1, xmm2, 128 | |
vmovdqa ymm2, ymmword ptr [r9 + 2*r13 - 351968] | |
vpblendd ymm1, ymm1, ymm2, 240 | |
vpermt2w ymm30, ymm17, ymm2 | |
vmovdqa64 ymm17, ymm18 | |
vmovdqa64 ymm18, ymm13 | |
vpermt2w ymm1, ymm4, ymm10 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_116] | |
vpermt2w ymm30, ymm27, ymm10 | |
vpblendd ymm1, ymm1, ymm3, 32 | |
vpbroadcastq ymm3, qword ptr [r9 + 2*r13 - 326352] | |
vpermt2w ymm1, ymm4, ymm9 | |
vpbroadcastw xmm4, word ptr [r9 + 2*r13 - 371150] | |
vpblendd ymm1, ymm1, ymm3, 192 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_117] | |
vpermt2w ymm1, ymm3, ymm15 | |
vpbroadcastd ymm3, dword ptr [r9 + 2*r13 - 313552] | |
vpblendd ymm1, ymm1, ymm3, 128 | |
vpsrld xmm3, xmm14, 16 | |
vmovdqa ymmword ptr [rsp + 608], ymm1 | |
vmovdqa ymm1, ymmword ptr [r9 + 2*r13 - 383968] | |
vpermt2w ymm24, ymm8, ymm1 | |
vpunpcklqdq xmm3, xmm24, xmm3 | |
vpmovsxbw ymm24, xmmword ptr [rip + .LCPI1_118] | |
vpblendw xmm4, xmm3, xmm4, 32 | |
vmovapd xmm3, xmmword ptr [r9 + 2*r13 - 364752] | |
vpslldq xmm5, xmm3, 10 | |
vpblendd xmm4, xmm4, xmm5, 8 | |
vpbroadcastw xmm5, word ptr [r9 + 2*r13 - 358350] | |
vpblendw xmm4, xmm4, xmm5, 128 | |
vpshuflw ymm5, ymm2, 85 | |
vpblendw ymm5, ymm5, ymm10, 2 | |
vpblendd ymm4, ymm4, ymm5, 240 | |
vpmovsxbw ymm5, xmmword ptr [rip + .LCPI1_119] | |
vmovdqa64 ymm19, ymm4 | |
vmovdqa ymm4, ymmword ptr [rsp + 320] | |
vpermt2w ymm4, ymm25, ymm1 | |
vmovdqa64 ymm25, ymm21 | |
vshufps xmm1, xmm4, xmm14, 212 | |
vpslldq xmm4, xmm7, 6 | |
vpblendw xmm1, xmm1, xmm4, 32 | |
vpbroadcastd ymm4, dword ptr [r9 + 2*r13 - 364748] | |
vpblendd xmm1, xmm1, xmm4, 8 | |
vpslldq xmm4, xmm0, 10 | |
vpblendw xmm1, xmm1, xmm4, 128 | |
vpbroadcastd ymm4, dword ptr [r9 + 2*r13 - 351948] | |
vpblendd ymm4, ymm1, ymm4, 240 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_120] | |
vpermt2w ymm4, ymm1, ymm10 | |
vmovdqa ymm1, ymmword ptr [r9 + 2*r13 - 339168] | |
vpblendd ymm4, ymm4, ymm1, 32 | |
vpermt2w ymm30, ymm28, ymm1 | |
vpmovsxbw xmm28, qword ptr [rip + .LCPI1_100] | |
vpermt2w ymm4, ymm5, ymm9 | |
vpbroadcastd ymm5, dword ptr [r9 + 2*r13 - 326348] | |
vpermt2w ymm30, ymm29, ymm9 | |
vpblendd ymm4, ymm4, ymm5, 192 | |
vpmovsxbw ymm5, xmmword ptr [rip + .LCPI1_121] | |
vpermt2w ymm4, ymm5, ymm15 | |
vpbroadcastd ymm5, dword ptr [r9 + 2*r13 - 313548] | |
vpblendd ymm4, ymm4, ymm5, 128 | |
vpsrlq xmm5, xmm31, 48 | |
vmovdqa ymmword ptr [rsp + 320], ymm4 | |
vpsrlq xmm4, xmm23, 48 | |
vpunpcklwd xmm4, xmm4, xmm5 | |
vpsrlq xmm5, xmm16, 48 | |
vpunpckldq xmm4, xmm4, xmm5 | |
vpsrlq xmm5, xmm14, 48 | |
vpblendw xmm4, xmm4, xmm12, 8 | |
vpunpcklqdq xmm4, xmm4, xmm5 | |
vpbroadcastw xmm5, word ptr [r9 + 2*r13 - 371146] | |
vpblendw xmm4, xmm4, xmm5, 32 | |
vpmovzxwd xmm5, xmm3 | |
vpblendd xmm4, xmm4, xmm5, 8 | |
vpbroadcastw xmm5, word ptr [r9 + 2*r13 - 358346] | |
vpblendw xmm4, xmm4, xmm5, 128 | |
vpsrldq xmm5, xmm23, 10 | |
vpunpcklwd xmm5, xmm5, xmm6 | |
vpsrldq xmm6, xmm16, 10 | |
vpunpckldq xmm5, xmm5, xmm6 | |
vpbroadcastw xmm6, word ptr [r9 + 2*r13 - 383942] | |
vpblendw xmm5, xmm5, xmm6, 8 | |
vpsrldq xmm6, xmm14, 10 | |
vpunpcklqdq xmm5, xmm5, xmm6 | |
vpsllq xmm6, xmm3, 16 | |
vpsrld xmm3, xmm3, 16 | |
vpblendw xmm5, xmm5, xmm7, 32 | |
vpblendd xmm5, xmm5, xmm6, 8 | |
vpbroadcastw xmm6, word ptr [r9 + 2*r13 - 358342] | |
vpblendw xmm11, xmm5, xmm6, 128 | |
vpsrldq xmm6, xmm23, 14 | |
vpsrldq xmm5, xmm31, 14 | |
vpmovsxbw ymm23, xmmword ptr [rip + .LCPI1_122] | |
vpunpcklwd xmm5, xmm6, xmm5 | |
vpsrldq xmm6, xmm16, 14 | |
vpunpckldq xmm5, xmm5, xmm6 | |
vpbroadcastw xmm6, word ptr [r9 + 2*r13 - 383938] | |
vpblendw xmm5, xmm5, xmm6, 8 | |
vpsrldq xmm6, xmm14, 14 | |
vpunpcklqdq xmm5, xmm5, xmm6 | |
vpbroadcastw xmm6, word ptr [r9 + 2*r13 - 371138] | |
vpblendw xmm5, xmm5, xmm6, 32 | |
vpbroadcastw xmm6, word ptr [r9 + 2*r13 - 396740] | |
vpblendd xmm3, xmm5, xmm3, 8 | |
vpmovsxbw ymm5, xmmword ptr [rip + .LCPI1_123] | |
vpermt2w ymm20, ymm5, ymm2 | |
vpmovsxbw ymm5, xmmword ptr [rip + .LCPI1_124] | |
vpermt2w ymm17, ymm5, ymm2 | |
vpmovsxbw ymm5, xmmword ptr [rip + .LCPI1_125] | |
vpermt2w ymm18, ymm5, ymm2 | |
vpmovsxbw ymm5, xmmword ptr [rip + .LCPI1_126] | |
vpermt2w ymm11, ymm5, ymm2 | |
vpblendw xmm5, xmm3, xmm0, 128 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_127] | |
vpermt2w ymm5, ymm3, ymm2 | |
vpbroadcastd ymm3, dword ptr [rip + .LCPI1_66] | |
vpermt2w ymm2, ymm3, ymm10 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_128] | |
vpermt2w ymm2, ymm3, ymm1 | |
vpbroadcastw xmm3, word ptr [r9 + 2*r13 - 396744] | |
vpblendw ymm2, ymm2, ymm9, 8 | |
vpblendd ymm13, ymm4, ymm2, 240 | |
vpbroadcastw ymm2, word ptr [r9 + 2*r13 - 403144] | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_129] | |
vpunpcklwd xmm2, xmm2, xmm3 | |
vunpcklps xmm2, xmm2, dword ptr [r9 + 2*r13 - 390344]{1to4} | |
vpsrldq xmm3, xmm12, 2 | |
vpblendw xmm2, xmm2, xmm3, 8 | |
vpslld xmm3, xmm7, 16 | |
vpblendd xmm2, xmm14, xmm2, 3 | |
vpblendw xmm2, xmm2, xmm3, 32 | |
vpbroadcastd xmm3, dword ptr [r9 + 2*r13 - 364744] | |
vpblendd xmm2, xmm2, xmm3, 8 | |
vpsllq xmm3, xmm0, 48 | |
vpslld xmm0, xmm0, 16 | |
vpblendw xmm2, xmm2, xmm3, 128 | |
vpbroadcastq ymm3, qword ptr [r9 + 2*r13 - 351944] | |
vpblendd ymm2, ymm2, ymm3, 240 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_130] | |
vpermt2w ymm2, ymm3, ymm10 | |
vpbroadcastd ymm3, dword ptr [r9 + 2*r13 - 339144] | |
vpblendd ymm3, ymm2, ymm3, 32 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_131] | |
vpermt2w ymm3, ymm2, ymm9 | |
vmovdqa ymm2, ymmword ptr [r9 + 2*r13 - 326368] | |
vpblendd ymm3, ymm3, ymm2, 192 | |
vpermt2w ymm30, ymm24, ymm2 | |
vpmovsxbw ymm24, xmmword ptr [rip + .LCPI1_105] | |
vpermt2w ymm3, ymm4, ymm15 | |
vpbroadcastd ymm4, dword ptr [r9 + 2*r13 - 313544] | |
vpblendd ymm8, ymm3, ymm4, 128 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_132] | |
vmovdqa ymm3, ymm11 | |
vpermt2w ymm3, ymm4, ymm10 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_133] | |
vpermt2w ymm3, ymm4, ymm1 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_134] | |
vpermt2w ymm3, ymm4, ymm9 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_135] | |
vpermt2w ymm3, ymm4, ymm2 | |
vpbroadcastd ymm4, dword ptr [r9 + 2*r13 - 390340] | |
vpblendw ymm3, ymm3, ymm15, 32 | |
vpblendd ymm11, ymm11, ymm3, 240 | |
vpbroadcastw ymm3, word ptr [r9 + 2*r13 - 403140] | |
vpunpcklwd xmm3, xmm3, xmm6 | |
vpblendd xmm3, xmm3, xmm4, 2 | |
vpsrldq xmm4, xmm12, 6 | |
vmovdqa64 ymm12, ymm18 | |
vmovdqa64 xmm18, xmmword ptr [rsp + 160] | |
vpblendw xmm3, xmm3, xmm4, 8 | |
vpsrlq xmm4, xmm7, 16 | |
vmovdqa64 ymm7, ymm17 | |
vshufps xmm3, xmm3, xmm14, 244 | |
vpmovsxbw ymm14, xmmword ptr [rip + .LCPI1_107] | |
vpblendw xmm3, xmm3, xmm4, 32 | |
vpbroadcastd ymm4, dword ptr [r9 + 2*r13 - 364740] | |
vpblendd xmm3, xmm3, xmm4, 8 | |
vpblendw xmm0, xmm3, xmm0, 128 | |
vpbroadcastd ymm3, dword ptr [r9 + 2*r13 - 351940] | |
vpblendd ymm0, ymm0, ymm3, 240 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_136] | |
vpermt2w ymm0, ymm3, ymm10 | |
vpbroadcastd ymm3, dword ptr [r9 + 2*r13 - 339140] | |
vpblendd ymm0, ymm0, ymm3, 32 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_137] | |
vpermt2w ymm0, ymm3, ymm9 | |
vpbroadcastd ymm3, dword ptr [r9 + 2*r13 - 326340] | |
vpblendd ymm0, ymm0, ymm3, 192 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_138] | |
vpermt2w ymm20, ymm3, ymm10 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_139] | |
vpermt2w ymm7, ymm3, ymm10 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_140] | |
vpermt2w ymm12, ymm3, ymm10 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_141] | |
vpermt2w ymm5, ymm3, ymm10 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_142] | |
vmovdqa ymm10, ymmword ptr [r9 + 2*r13 - 217568] | |
vpermt2w ymm20, ymm3, ymm1 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_143] | |
vpermt2w ymm7, ymm3, ymm1 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_144] | |
vpermt2w ymm12, ymm3, ymm1 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_145] | |
vpermt2w ymm19, ymm3, ymm1 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_146] | |
vpermt2w ymm5, ymm3, ymm1 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_147] | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_148] | |
vpermt2w ymm20, ymm1, ymm9 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_149] | |
vpermt2w ymm7, ymm1, ymm9 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_150] | |
vpermt2w ymm12, ymm1, ymm9 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_151] | |
vpermt2w ymm19, ymm1, ymm9 | |
vmovdqa ymm1, ymm5 | |
vpermt2w ymm1, ymm3, ymm9 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_152] | |
vpermt2w ymm20, ymm3, ymm2 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_153] | |
vpermt2w ymm7, ymm3, ymm2 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_154] | |
vpermt2w ymm12, ymm3, ymm2 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_155] | |
vpermt2w ymm19, ymm3, ymm2 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_156] | |
vpermt2w ymm13, ymm3, ymm2 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_157] | |
vpermt2w ymm1, ymm3, ymm2 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_158] | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_159] | |
vpermt2w ymm30, ymm2, ymm15 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_160] | |
vpermt2w ymm20, ymm2, ymm15 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_161] | |
vpermt2w ymm7, ymm2, ymm15 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_162] | |
vpermt2w ymm12, ymm2, ymm15 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_163] | |
vpermt2w ymm19, ymm2, ymm15 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_164] | |
vpermt2w ymm13, ymm2, ymm15 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_165] | |
vpermt2w ymm0, ymm2, ymm15 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_166] | |
vpermt2w ymm1, ymm2, ymm15 | |
vmovdqa ymm2, ymmword ptr [r9 + 2*r13 - 313568] | |
vmovdqa xmm15, xmmword ptr [r9 + 2*r13 - 255968] | |
vpermt2w ymm30, ymm3, ymm2 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_167] | |
vpblendd ymm9, ymm0, ymm2, 128 | |
vpmovsxbw ymm0, xmmword ptr [rip + .LCPI1_168] | |
vpbroadcastw xmm6, xmm15 | |
vpermt2w ymm20, ymm3, ymm2 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_169] | |
vpermt2w ymm1, ymm0, ymm2 | |
vmovdqa ymm0, ymmword ptr [r9 + 2*r13 - 307168] | |
vpermt2w ymm7, ymm3, ymm2 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_170] | |
vpermt2w ymm12, ymm3, ymm2 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_171] | |
vpermt2w ymm19, ymm3, ymm2 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_172] | |
vpermt2w ymm13, ymm3, ymm2 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_173] | |
vpermt2w ymm11, ymm3, ymm2 | |
vmovdqa ymm2, ymmword ptr [rsp + 704] | |
vmovdqa ymm3, ymmword ptr [rsp + 672] | |
vpermt2w ymm2, ymm23, ymm0 | |
vpbroadcastd ymm23, dword ptr [rip + .LCPI1_66] | |
vmovdqa ymmword ptr [rsp + 704], ymm2 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_174] | |
vpermt2w ymm30, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_175] | |
vmovdqa64 ymmword ptr [rsp + 1344], ymm30 | |
vpermt2w ymm3, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_176] | |
vmovdqa ymmword ptr [rsp + 672], ymm3 | |
vmovdqa ymm3, ymmword ptr [rsp + 640] | |
vpermt2w ymm20, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_177] | |
vmovdqa64 ymmword ptr [rsp + 1664], ymm20 | |
vpmovsxbw ymm20, xmmword ptr [rip + .LCPI1_126] | |
vpermt2w ymm22, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_178] | |
vmovdqa64 ymmword ptr [rsp + 1632], ymm22 | |
vpermt2w ymm7, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_179] | |
vmovdqa ymmword ptr [rsp + 1600], ymm7 | |
vmovdqa ymm7, ymmword ptr [r9 + 2*r13 - 243168] | |
vpermt2w ymm3, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_180] | |
vmovdqa ymmword ptr [rsp + 640], ymm3 | |
vmovdqa ymm3, ymmword ptr [rsp + 608] | |
vpermt2w ymm12, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_181] | |
vmovdqa ymmword ptr [rsp + 1568], ymm12 | |
vpermt2w ymm3, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_182] | |
vmovdqa ymmword ptr [rsp + 608], ymm3 | |
vmovdqa ymm3, ymmword ptr [rsp + 320] | |
vpermt2w ymm19, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_183] | |
vmovdqa64 ymmword ptr [rsp + 1536], ymm19 | |
vpmovsxbw ymm19, xmmword ptr [rip + .LCPI1_97] | |
vpermt2w ymm3, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_184] | |
vmovdqa ymmword ptr [rsp + 320], ymm3 | |
vpermt2w ymm13, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_185] | |
vmovdqa ymmword ptr [rsp + 1504], ymm13 | |
vpmovsxbw ymm13, xmmword ptr [rip + .LCPI1_95] | |
vpermt2w ymm8, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_186] | |
vmovdqa ymmword ptr [rsp + 1472], ymm8 | |
vmovdqa xmm8, xmmword ptr [r9 + 2*r13 - 287968] | |
vpermt2w ymm11, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_187] | |
vmovdqa ymmword ptr [rsp + 1440], ymm11 | |
vpbroadcastd ymm11, dword ptr [r9 + 2*r13 - 236768] | |
vpermt2w ymm9, ymm2, ymm0 | |
vpblendw ymm2, ymm1, ymm0, 128 | |
vmovdqa xmm0, xmmword ptr [r9 + 2*r13 - 300768] | |
vmovdqa xmm1, xmmword ptr [r9 + 2*r13 - 294368] | |
vpblendd ymm2, ymm5, ymm2, 240 | |
vmovdqa ymmword ptr [rsp + 1376], ymm2 | |
vmovdqa ymmword ptr [rsp + 1408], ymm9 | |
vmovdqa xmm9, xmmword ptr [r9 + 2*r13 - 275168] | |
vpunpcklwd xmm3, xmm0, xmm1 | |
vpunpckldq xmm4, xmm3, xmm8 | |
vmovdqa xmm3, xmmword ptr [r9 + 2*r13 - 281568] | |
insertq xmm4, xmm3, 16, 48 | |
vpunpcklqdq xmm2, xmm4, xmm9 | |
vmovdqa xmm4, xmmword ptr [r9 + 2*r13 - 268768] | |
vpbroadcastw xmm5, xmm4 | |
vpblendw xmm5, xmm2, xmm5, 32 | |
vmovaps xmm2, xmmword ptr [r9 + 2*r13 - 262368] | |
vinsertps xmm5, xmm5, xmm2, 48 | |
vpblendw xmm5, xmm5, xmm6, 128 | |
vinserti128 ymm6, ymm5, xmmword ptr [r9 + 2*r13 - 249568], 1 | |
vpermt2w ymm6, ymm21, ymm7 | |
vpblendd ymm12, ymm6, ymm11, 32 | |
vmovdqa ymm6, ymmword ptr [r9 + 2*r13 - 230368] | |
vmovdqa xmm11, xmmword ptr [r9 + 2*r13 - 223968] | |
vpermt2w ymm12, ymm13, ymm6 | |
vpmovsxbw ymm13, xmmword ptr [rip + .LCPI1_96] | |
vinserti128 ymm5, ymm5, xmm11, 1 | |
vshufpd ymm12, ymm12, ymm5, 2 | |
vpermt2w ymm12, ymm13, ymm10 | |
vpbroadcastd ymm13, dword ptr [r9 + 2*r13 - 211168] | |
vpblendd ymm12, ymm12, ymm13, 128 | |
vpsrld xmm13, xmm8, 16 | |
vmovdqa64 ymm17, ymm12 | |
vpsrld xmm12, xmm0, 16 | |
vpblendw xmm12, xmm12, xmm1, 2 | |
vpunpckldq xmm12, xmm12, xmm13 | |
vpmovsxbw xmm13, qword ptr [rip + .LCPI1_102] | |
vpermt2w xmm12, xmm13, xmm3 | |
vpsrld xmm13, xmm9, 16 | |
vpunpcklqdq xmm12, xmm12, xmm13 | |
vpbroadcastw xmm13, word ptr [r9 + 2*r13 - 268766] | |
vpblendw xmm12, xmm12, xmm13, 32 | |
vpslldq xmm13, xmm2, 10 | |
vpblendd xmm12, xmm12, xmm13, 8 | |
vpbroadcastw xmm13, word ptr [r9 + 2*r13 - 255966] | |
vpblendw xmm5, xmm12, xmm13, 128 | |
vpbroadcastw xmm12, word ptr [r9 + 2*r13 - 294364] | |
vpbroadcastw xmm13, word ptr [r9 + 2*r13 - 300764] | |
vmovdqa64 ymm22, ymm5 | |
vpunpcklwd xmm12, xmm13, xmm12 | |
vpmovsxbw xmm13, qword ptr [rip + .LCPI1_106] | |
vpblendd xmm12, xmm12, xmm8, 2 | |
vpermt2w xmm12, xmm13, xmm3 | |
vpslldq xmm13, xmm4, 6 | |
vshufps xmm12, xmm12, xmm9, 212 | |
vpblendw xmm12, xmm12, xmm13, 32 | |
vpslldq xmm13, xmm15, 10 | |
vinsertps xmm12, xmm12, xmm2, 112 | |
vpblendw xmm12, xmm12, xmm13, 128 | |
vpbroadcastd ymm13, dword ptr [r9 + 2*r13 - 249564] | |
vpblendd ymm13, ymm12, ymm13, 240 | |
vinserti128 ymm12, ymm12, xmmword ptr [r9 + 2*r13 - 236768], 1 | |
vpermt2w ymm13, ymm14, ymm7 | |
vpbroadcastd ymm14, dword ptr [r9 + 2*r13 - 223964] | |
vpblendd ymm12, ymm13, ymm12, 34 | |
vpmovsxbw ymm13, xmmword ptr [rip + .LCPI1_108] | |
vpermt2w ymm12, ymm13, ymm6 | |
vpbroadcastd ymm13, dword ptr [r9 + 2*r13 - 211164] | |
vpblendd ymm12, ymm12, ymm14, 192 | |
vpmovsxbw ymm14, xmmword ptr [rip + .LCPI1_109] | |
vpermt2w ymm12, ymm14, ymm10 | |
vpbroadcastq ymm14, qword ptr [r9 + 2*r13 - 249560] | |
vpblendd ymm12, ymm12, ymm13, 128 | |
vpsrlq xmm13, xmm0, 48 | |
vmovdqa ymmword ptr [rsp + 576], ymm12 | |
vpsrlq xmm12, xmm1, 48 | |
vpunpcklwd xmm12, xmm13, xmm12 | |
vpsrlq xmm13, xmm8, 48 | |
vpunpckldq xmm12, xmm12, xmm13 | |
vpsrlq xmm13, xmm9, 48 | |
vpblendw xmm12, xmm12, xmm3, 8 | |
vpunpcklqdq xmm12, xmm12, xmm13 | |
vpbroadcastw xmm13, word ptr [r9 + 2*r13 - 268762] | |
vpblendw xmm12, xmm12, xmm13, 32 | |
vpmovzxwd xmm13, xmm2 | |
vpblendd xmm12, xmm12, xmm13, 8 | |
vpbroadcastw xmm13, word ptr [r9 + 2*r13 - 255962] | |
vpblendw xmm5, xmm12, xmm13, 128 | |
vpbroadcastw xmm12, word ptr [r9 + 2*r13 - 294360] | |
vpbroadcastw xmm13, word ptr [r9 + 2*r13 - 300760] | |
vmovdqa64 ymm29, ymm5 | |
vpmovsxbw ymm5, xmmword ptr [rip + .LCPI1_105] | |
vpunpcklwd xmm12, xmm13, xmm12 | |
vpsrldq xmm13, xmm3, 2 | |
vpsrldq xmm3, xmm3, 6 | |
vinsertps xmm12, xmm12, xmm8, 156 | |
vpblendw xmm12, xmm12, xmm13, 8 | |
vpslld xmm13, xmm4, 16 | |
vpblendd xmm12, xmm9, xmm12, 3 | |
vpblendw xmm12, xmm12, xmm13, 32 | |
vpsllq xmm13, xmm15, 48 | |
vinsertps xmm12, xmm12, xmm2, 176 | |
vpblendw xmm12, xmm12, xmm13, 128 | |
vpblendd ymm13, ymm12, ymm14, 240 | |
vpbroadcastd ymm14, dword ptr [r9 + 2*r13 - 236760] | |
vinserti128 ymm11, ymm12, xmm11, 1 | |
vpmovsxbw ymm12, xmmword ptr [rip + .LCPI1_103] | |
vpermt2w ymm13, ymm5, ymm7 | |
vpblendd ymm13, ymm13, ymm14, 32 | |
vmovdqa xmm14, xmmword ptr [r9 + 2*r13 - 281552] | |
vpermt2w ymm13, ymm12, ymm6 | |
vpbroadcastd ymm12, dword ptr [r9 + 2*r13 - 211160] | |
vpblendd ymm11, ymm13, ymm11, 204 | |
vpmovsxbw ymm13, xmmword ptr [rip + .LCPI1_111] | |
vpermt2w ymm11, ymm13, ymm10 | |
vmovapd xmm13, xmmword ptr [rsp + 192] | |
vpblendd ymm11, ymm11, ymm12, 128 | |
vpsrldq xmm12, xmm0, 10 | |
vpsrldq xmm0, xmm0, 14 | |
vmovdqa64 ymm31, ymm11 | |
vpsrldq xmm11, xmm1, 10 | |
vpsrldq xmm1, xmm1, 14 | |
vpunpcklwd xmm11, xmm12, xmm11 | |
vpsrldq xmm12, xmm8, 10 | |
vpunpcklwd xmm0, xmm0, xmm1 | |
vpbroadcastw xmm1, word ptr [r9 + 2*r13 - 281554] | |
vpunpckldq xmm11, xmm11, xmm12 | |
vpbroadcastw xmm12, word ptr [r9 + 2*r13 - 281558] | |
vpblendw xmm11, xmm11, xmm12, 8 | |
vpsrldq xmm12, xmm9, 10 | |
vpunpcklqdq xmm11, xmm11, xmm12 | |
vpsllq xmm12, xmm2, 16 | |
vpblendw xmm11, xmm11, xmm4, 32 | |
vpsrlq xmm4, xmm4, 16 | |
vpblendd xmm11, xmm11, xmm12, 8 | |
vpbroadcastw xmm12, word ptr [r9 + 2*r13 - 255958] | |
vpblendw xmm5, xmm11, xmm12, 128 | |
vpbroadcastw xmm11, word ptr [r9 + 2*r13 - 294356] | |
vpbroadcastw xmm12, word ptr [r9 + 2*r13 - 300756] | |
vmovdqa64 ymm30, ymm5 | |
vmovdqa64 ymm21, ymm30 | |
vpunpcklwd xmm11, xmm12, xmm11 | |
vpmovsxbw ymm12, xmmword ptr [rip + .LCPI1_112] | |
vinsertps xmm11, xmm11, xmm8, 220 | |
vpblendw xmm3, xmm11, xmm3, 8 | |
vpslld xmm11, xmm15, 16 | |
vshufps xmm3, xmm3, xmm9, 244 | |
vpblendw xmm3, xmm3, xmm4, 32 | |
vpbroadcastd ymm4, dword ptr [r9 + 2*r13 - 249556] | |
vpblendd xmm3, xmm3, xmm2, 8 | |
vpblendw xmm3, xmm3, xmm11, 128 | |
vpbroadcastd ymm11, dword ptr [r9 + 2*r13 - 236756] | |
vpblendd ymm4, ymm3, ymm4, 240 | |
vinserti128 ymm3, ymm3, xmmword ptr [r9 + 2*r13 - 211168], 1 | |
vpermt2w ymm4, ymm12, ymm7 | |
vpsrldq xmm12, xmm13, 10 | |
vpblendd ymm4, ymm4, ymm11, 32 | |
vpmovsxbw ymm11, xmmword ptr [rip + .LCPI1_113] | |
vpermt2w ymm4, ymm11, ymm6 | |
vpbroadcastd ymm11, dword ptr [r9 + 2*r13 - 223956] | |
vpblendd ymm4, ymm4, ymm11, 192 | |
vpmovsxbw ymm11, xmmword ptr [rip + .LCPI1_114] | |
vpermt2w ymm4, ymm11, ymm10 | |
vpblendd ymm3, ymm4, ymm3, 136 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_115] | |
vmovdqa ymmword ptr [rsp + 544], ymm3 | |
vpsrldq xmm3, xmm8, 14 | |
vmovdqa xmm8, xmmword ptr [rsp + 224] | |
vpunpckldq xmm0, xmm0, xmm3 | |
vpbroadcastd ymm3, dword ptr [r9 + 2*r13 - 236752] | |
vpblendw xmm0, xmm0, xmm1, 8 | |
vpsrldq xmm1, xmm9, 14 | |
vmovdqa xmm9, xmmword ptr [r9 + 2*r13 - 275152] | |
vpunpcklqdq xmm0, xmm0, xmm1 | |
vpbroadcastw xmm1, word ptr [r9 + 2*r13 - 268754] | |
vpblendw xmm0, xmm0, xmm1, 32 | |
vpsrld xmm1, xmm2, 16 | |
vpblendd xmm0, xmm0, xmm1, 8 | |
vpblendw xmm5, xmm0, xmm15, 128 | |
vpbroadcastw ymm0, word ptr [r9 + 2*r13 - 300752] | |
vmovdqa xmm15, xmmword ptr [r9 + 2*r13 - 268752] | |
vpunpcklwd xmm0, xmm0, xmm13 | |
vpbroadcastw xmm1, xmm15 | |
vpunpckldq xmm0, xmm0, xmm18 | |
insertq xmm0, xmm14, 16, 48 | |
vpunpcklqdq xmm0, xmm0, xmm9 | |
vpblendw xmm0, xmm0, xmm1, 32 | |
vpbroadcastd xmm1, dword ptr [r9 + 2*r13 - 262352] | |
vpblendd xmm1, xmm0, xmm1, 8 | |
vmovdqa xmm0, xmmword ptr [r9 + 2*r13 - 255952] | |
vpbroadcastw xmm2, xmm0 | |
vpblendw xmm1, xmm1, xmm2, 128 | |
vmovdqa ymm2, ymmword ptr [r9 + 2*r13 - 249568] | |
vpblendd ymm1, ymm1, ymm2, 240 | |
vpermt2w ymm1, ymm4, ymm7 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_116] | |
vpblendd ymm1, ymm1, ymm3, 32 | |
vpbroadcastq ymm3, qword ptr [r9 + 2*r13 - 223952] | |
vpermt2w ymm1, ymm4, ymm6 | |
vmovdqa ymm4, ymmword ptr [rsp + 256] | |
vpblendd ymm1, ymm1, ymm3, 192 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_117] | |
vpermt2w ymm1, ymm3, ymm10 | |
vpbroadcastd ymm3, dword ptr [r9 + 2*r13 - 211152] | |
vpblendd ymm1, ymm1, ymm3, 128 | |
vpsrld xmm3, xmm9, 16 | |
vmovdqa ymmword ptr [rsp + 512], ymm1 | |
vmovdqa ymm1, ymmword ptr [r9 + 2*r13 - 281568] | |
vpermt2w ymm4, ymm26, ymm1 | |
vpunpcklqdq xmm3, xmm4, xmm3 | |
vpbroadcastw xmm4, word ptr [r9 + 2*r13 - 268750] | |
vpblendw xmm4, xmm3, xmm4, 32 | |
vmovapd xmm3, xmmword ptr [r9 + 2*r13 - 262352] | |
vpslldq xmm11, xmm3, 10 | |
vpblendd xmm4, xmm4, xmm11, 8 | |
vpbroadcastw xmm11, word ptr [r9 + 2*r13 - 255950] | |
vpblendw xmm4, xmm4, xmm11, 128 | |
vpshuflw ymm11, ymm2, 85 | |
vpblendw ymm11, ymm11, ymm7, 2 | |
vpblendd ymm4, ymm4, ymm11, 240 | |
vpmovsxbw ymm11, xmmword ptr [rip + .LCPI1_119] | |
vmovdqa64 ymm16, ymm4 | |
vmovdqa ymm4, ymmword ptr [rsp + 288] | |
vpermt2w ymm4, ymm28, ymm1 | |
vshufps xmm1, xmm4, xmm9, 212 | |
vpslldq xmm4, xmm15, 6 | |
vpblendw xmm1, xmm1, xmm4, 32 | |
vpbroadcastd ymm4, dword ptr [r9 + 2*r13 - 262348] | |
vpblendd xmm1, xmm1, xmm4, 8 | |
vpslldq xmm4, xmm0, 10 | |
vpblendw xmm1, xmm1, xmm4, 128 | |
vpbroadcastd ymm4, dword ptr [r9 + 2*r13 - 249548] | |
vpblendd ymm4, ymm1, ymm4, 240 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_120] | |
vpermt2w ymm4, ymm1, ymm7 | |
vmovdqa ymm1, ymmword ptr [r9 + 2*r13 - 236768] | |
vpblendd ymm4, ymm4, ymm1, 32 | |
vpermt2w ymm4, ymm11, ymm6 | |
vpbroadcastd ymm11, dword ptr [r9 + 2*r13 - 223948] | |
vpblendd ymm4, ymm4, ymm11, 192 | |
vpmovsxbw ymm11, xmmword ptr [rip + .LCPI1_121] | |
vpermt2w ymm4, ymm11, ymm10 | |
vpbroadcastd ymm11, dword ptr [r9 + 2*r13 - 211148] | |
vpblendd ymm4, ymm4, ymm11, 128 | |
vpsrlq xmm11, xmm13, 48 | |
vmovdqa ymmword ptr [rsp + 288], ymm4 | |
vpsrlq xmm4, xmm8, 48 | |
vpunpcklwd xmm4, xmm4, xmm11 | |
vpsrlq xmm11, xmm18, 48 | |
vpunpckldq xmm4, xmm4, xmm11 | |
vpsrlq xmm11, xmm9, 48 | |
vpblendw xmm4, xmm4, xmm14, 8 | |
vpunpcklqdq xmm4, xmm4, xmm11 | |
vpbroadcastw xmm11, word ptr [r9 + 2*r13 - 268746] | |
vpblendw xmm4, xmm4, xmm11, 32 | |
vpmovzxwd xmm11, xmm3 | |
vpblendd xmm4, xmm4, xmm11, 8 | |
vpbroadcastw xmm11, word ptr [r9 + 2*r13 - 255946] | |
vpblendw xmm4, xmm4, xmm11, 128 | |
vpsrldq xmm11, xmm8, 10 | |
vpunpcklwd xmm11, xmm11, xmm12 | |
vpsrldq xmm12, xmm18, 10 | |
vpunpckldq xmm11, xmm11, xmm12 | |
vpbroadcastw xmm12, word ptr [r9 + 2*r13 - 281542] | |
vpblendw xmm11, xmm11, xmm12, 8 | |
vpsrldq xmm12, xmm9, 10 | |
vpunpcklqdq xmm11, xmm11, xmm12 | |
vpsllq xmm12, xmm3, 16 | |
vpsrld xmm3, xmm3, 16 | |
vpblendw xmm11, xmm11, xmm15, 32 | |
vpblendd xmm11, xmm11, xmm12, 8 | |
vpbroadcastw xmm12, word ptr [r9 + 2*r13 - 255942] | |
vpblendw xmm12, xmm11, xmm12, 128 | |
vpsrldq xmm11, xmm13, 14 | |
vpsrldq xmm13, xmm8, 14 | |
vpunpcklwd xmm11, xmm13, xmm11 | |
vpsrldq xmm13, xmm18, 14 | |
vmovdqa64 ymm18, ymm22 | |
vmovdqa64 ymm22, ymm5 | |
vpermt2w ymm12, ymm20, ymm2 | |
vpermt2w ymm18, ymm19, ymm2 | |
vpmovsxbw ymm20, xmmword ptr [rip + .LCPI1_104] | |
vpunpckldq xmm11, xmm11, xmm13 | |
vpbroadcastw xmm13, word ptr [r9 + 2*r13 - 281538] | |
vpblendw xmm11, xmm11, xmm13, 8 | |
vpsrldq xmm13, xmm9, 14 | |
vpunpcklqdq xmm11, xmm11, xmm13 | |
vpbroadcastw xmm13, word ptr [r9 + 2*r13 - 268738] | |
vpblendw xmm11, xmm11, xmm13, 32 | |
vpblendd xmm3, xmm11, xmm3, 8 | |
vpmovsxbw ymm11, xmmword ptr [rip + .LCPI1_123] | |
vpermt2w ymm29, ymm11, ymm2 | |
vpmovsxbw ymm11, xmmword ptr [rip + .LCPI1_124] | |
vpermt2w ymm21, ymm11, ymm2 | |
vpmovsxbw ymm11, xmmword ptr [rip + .LCPI1_125] | |
vpermt2w ymm22, ymm11, ymm2 | |
vpblendw xmm11, xmm3, xmm0, 128 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_127] | |
vpermt2w ymm11, ymm3, ymm2 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_128] | |
vpermt2w ymm2, ymm23, ymm7 | |
vpmovsxbw ymm23, xmmword ptr [rip + .LCPI1_118] | |
vpermt2w ymm2, ymm3, ymm1 | |
vpbroadcastw xmm3, word ptr [r9 + 2*r13 - 294344] | |
vpblendw ymm2, ymm2, ymm6, 8 | |
vpblendd ymm13, ymm4, ymm2, 240 | |
vpbroadcastw ymm2, word ptr [r9 + 2*r13 - 300744] | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_129] | |
vpunpcklwd xmm2, xmm2, xmm3 | |
vunpcklps xmm2, xmm2, dword ptr [r9 + 2*r13 - 287944]{1to4} | |
vpsrldq xmm3, xmm14, 2 | |
vpblendw xmm2, xmm2, xmm3, 8 | |
vpslld xmm3, xmm15, 16 | |
vpblendd xmm2, xmm9, xmm2, 3 | |
vpblendw xmm2, xmm2, xmm3, 32 | |
vpbroadcastd xmm3, dword ptr [r9 + 2*r13 - 262344] | |
vpblendd xmm2, xmm2, xmm3, 8 | |
vpsllq xmm3, xmm0, 48 | |
vpslld xmm0, xmm0, 16 | |
vpblendw xmm2, xmm2, xmm3, 128 | |
vpbroadcastq ymm3, qword ptr [r9 + 2*r13 - 249544] | |
vpblendd ymm2, ymm2, ymm3, 240 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_130] | |
vpermt2w ymm2, ymm3, ymm7 | |
vpbroadcastd ymm3, dword ptr [r9 + 2*r13 - 236744] | |
vpblendd ymm3, ymm2, ymm3, 32 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_131] | |
vpermt2w ymm3, ymm2, ymm6 | |
vmovdqa ymm2, ymmword ptr [r9 + 2*r13 - 223968] | |
vpblendd ymm3, ymm3, ymm2, 192 | |
vpermt2w ymm3, ymm4, ymm10 | |
vpbroadcastd ymm4, dword ptr [r9 + 2*r13 - 211144] | |
vpblendd ymm5, ymm3, ymm4, 128 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_132] | |
vmovdqa ymm3, ymm12 | |
vpermt2w ymm3, ymm4, ymm7 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_133] | |
vpermt2w ymm3, ymm4, ymm1 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_134] | |
vpermt2w ymm3, ymm4, ymm6 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_135] | |
vpermt2w ymm3, ymm4, ymm2 | |
vpbroadcastd ymm4, dword ptr [r9 + 2*r13 - 287940] | |
vpblendw ymm3, ymm3, ymm10, 32 | |
vpblendd ymm8, ymm12, ymm3, 240 | |
vpbroadcastw ymm3, word ptr [r9 + 2*r13 - 300740] | |
vpbroadcastw xmm12, word ptr [r9 + 2*r13 - 294340] | |
vpunpcklwd xmm3, xmm3, xmm12 | |
vpblendd xmm3, xmm3, xmm4, 2 | |
vpsrldq xmm4, xmm14, 6 | |
vmovdqa64 ymm14, ymm21 | |
vpblendw xmm3, xmm3, xmm4, 8 | |
vpsrlq xmm4, xmm15, 16 | |
vmovdqa64 ymm15, ymm18 | |
vpermt2w ymm15, ymm27, ymm7 | |
vpmovsxbw ymm18, xmmword ptr [rip + .LCPI1_107] | |
vshufps xmm3, xmm3, xmm9, 244 | |
vmovdqa xmm9, xmmword ptr [r9 + 2*r13 - 179168] | |
vpblendw xmm3, xmm3, xmm4, 32 | |
vpbroadcastd ymm4, dword ptr [r9 + 2*r13 - 262340] | |
vpermt2w ymm15, ymm20, ymm1 | |
vpblendd xmm3, xmm3, xmm4, 8 | |
vmovdqa64 ymm4, ymm22 | |
vpmovsxbw ymm22, xmmword ptr [rip + .LCPI1_122] | |
vpblendw xmm0, xmm3, xmm0, 128 | |
vpbroadcastd ymm3, dword ptr [r9 + 2*r13 - 249540] | |
vpblendd ymm0, ymm0, ymm3, 240 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_136] | |
vpermt2w ymm0, ymm3, ymm7 | |
vpbroadcastd ymm3, dword ptr [r9 + 2*r13 - 236740] | |
vpblendd ymm0, ymm0, ymm3, 32 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_137] | |
vpermt2w ymm0, ymm3, ymm6 | |
vpbroadcastd ymm3, dword ptr [r9 + 2*r13 - 223940] | |
vpblendd ymm0, ymm0, ymm3, 192 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_138] | |
vpermt2w ymm29, ymm3, ymm7 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_139] | |
vpermt2w ymm14, ymm3, ymm7 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_140] | |
vpermt2w ymm4, ymm3, ymm7 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_141] | |
vpermt2w ymm11, ymm3, ymm7 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_142] | |
vmovaps xmm7, xmmword ptr [r9 + 2*r13 - 159968] | |
vpermt2w ymm29, ymm3, ymm1 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_143] | |
vpermt2w ymm14, ymm3, ymm1 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_144] | |
vpermt2w ymm4, ymm3, ymm1 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_145] | |
vpermt2w ymm16, ymm3, ymm1 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_146] | |
vpermt2w ymm11, ymm3, ymm1 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_101] | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_148] | |
vpermt2w ymm15, ymm1, ymm6 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_147] | |
vpermt2w ymm15, ymm23, ymm2 | |
vpermt2w ymm29, ymm1, ymm6 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_149] | |
vpermt2w ymm14, ymm1, ymm6 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_150] | |
vpermt2w ymm4, ymm1, ymm6 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_151] | |
vpermt2w ymm16, ymm1, ymm6 | |
vmovdqa ymm1, ymm11 | |
vpermt2w ymm1, ymm3, ymm6 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_152] | |
vpermt2w ymm29, ymm3, ymm2 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_153] | |
vpermt2w ymm14, ymm3, ymm2 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_154] | |
vpermt2w ymm4, ymm3, ymm2 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_155] | |
vpermt2w ymm16, ymm3, ymm2 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_156] | |
vpermt2w ymm13, ymm3, ymm2 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_157] | |
vpermt2w ymm1, ymm3, ymm2 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_158] | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_159] | |
vpermt2w ymm15, ymm2, ymm10 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_160] | |
vpermt2w ymm29, ymm2, ymm10 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_161] | |
vpermt2w ymm14, ymm2, ymm10 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_162] | |
vpermt2w ymm4, ymm2, ymm10 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_163] | |
vpermt2w ymm16, ymm2, ymm10 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_164] | |
vpermt2w ymm13, ymm2, ymm10 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_165] | |
vpermt2w ymm0, ymm2, ymm10 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_166] | |
vpermt2w ymm1, ymm2, ymm10 | |
vmovdqa ymm2, ymmword ptr [r9 + 2*r13 - 211168] | |
vmovdqa xmm10, xmmword ptr [r9 + 2*r13 - 166368] | |
vpermt2w ymm15, ymm3, ymm2 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_167] | |
vpblendd ymm6, ymm0, ymm2, 128 | |
vpmovsxbw ymm0, xmmword ptr [rip + .LCPI1_168] | |
vpermt2w ymm29, ymm3, ymm2 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_169] | |
vpermt2w ymm1, ymm0, ymm2 | |
vmovdqa ymm0, ymmword ptr [r9 + 2*r13 - 204768] | |
vpermt2w ymm14, ymm3, ymm2 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_170] | |
vpermt2w ymm17, ymm22, ymm0 | |
vmovdqa64 ymmword ptr [rsp + 1088], ymm17 | |
vpmovsxbw xmm17, qword ptr [rip + .LCPI1_106] | |
vpermt2w ymm4, ymm3, ymm2 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_171] | |
vpermt2w ymm16, ymm3, ymm2 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_172] | |
vpermt2w ymm13, ymm3, ymm2 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_173] | |
vpermt2w ymm8, ymm3, ymm2 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_174] | |
vmovdqa ymm3, ymmword ptr [rsp + 576] | |
vpermt2w ymm15, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_175] | |
vmovdqa ymmword ptr [rsp + 992], ymm15 | |
vmovdqa xmm15, xmmword ptr [rsp + 352] | |
vpermt2w ymm3, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_176] | |
vmovdqa ymmword ptr [rsp + 576], ymm3 | |
vmovdqa ymm3, ymmword ptr [rsp + 544] | |
vpermt2w ymm29, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_177] | |
vmovdqa64 ymmword ptr [rsp + 864], ymm29 | |
vmovdqa64 ymm29, ymm25 | |
vpermt2w ymm31, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_178] | |
vmovdqa64 ymmword ptr [rsp + 1312], ymm31 | |
vpermt2w ymm14, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_179] | |
vmovdqa ymmword ptr [rsp + 1024], ymm14 | |
vpmovsxbw ymm14, xmmword ptr [rip + .LCPI1_107] | |
vpermt2w ymm3, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_180] | |
vmovdqa ymmword ptr [rsp + 544], ymm3 | |
vmovdqa ymm3, ymmword ptr [rsp + 512] | |
vpermt2w ymm4, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_181] | |
vmovdqa ymmword ptr [rsp + 1280], ymm4 | |
vmovdqa xmm4, xmmword ptr [r9 + 2*r13 - 185568] | |
vpermt2w ymm3, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_182] | |
vmovdqa ymmword ptr [rsp + 512], ymm3 | |
vmovdqa ymm3, ymmword ptr [rsp + 288] | |
vpermt2w ymm16, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_183] | |
vmovdqa64 ymmword ptr [rsp + 1248], ymm16 | |
vpermt2w ymm3, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_184] | |
vmovdqa ymmword ptr [rsp + 288], ymm3 | |
vmovdqa xmm3, xmmword ptr [r9 + 2*r13 - 198368] | |
vpermt2w ymm13, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_185] | |
vmovdqa ymmword ptr [rsp + 1216], ymm13 | |
vpmovsxbw ymm13, xmmword ptr [rip + .LCPI1_95] | |
vpermt2w ymm5, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_186] | |
vmovdqa ymmword ptr [rsp + 1184], ymm5 | |
vmovdqa xmm5, xmmword ptr [r9 + 2*r13 - 172768] | |
vpermt2w ymm8, ymm2, ymm0 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_187] | |
vmovdqa ymmword ptr [rsp + 1152], ymm8 | |
vmovdqa xmm8, xmmword ptr [r9 + 2*r13 - 153568] | |
vpermt2w ymm6, ymm2, ymm0 | |
vpblendw ymm0, ymm1, ymm0, 128 | |
vmovdqa ymm2, ymmword ptr [r9 + 2*r13 - 140768] | |
vpblendd ymm0, ymm11, ymm0, 240 | |
vpbroadcastd ymm11, dword ptr [r9 + 2*r13 - 134368] | |
vmovdqa ymmword ptr [rsp + 1056], ymm0 | |
vmovdqa ymmword ptr [rsp + 1120], ymm6 | |
vmovdqa xmm6, xmmword ptr [r9 + 2*r13 - 191968] | |
vpunpcklwd xmm1, xmm3, xmm6 | |
vpunpckldq xmm1, xmm1, xmm4 | |
insertq xmm1, xmm9, 16, 48 | |
vpunpcklqdq xmm0, xmm1, xmm5 | |
vpbroadcastw xmm1, xmm10 | |
vpblendw xmm0, xmm0, xmm1, 32 | |
vpbroadcastw xmm1, xmm8 | |
vinsertps xmm0, xmm0, xmm7, 48 | |
vpblendw xmm0, xmm0, xmm1, 128 | |
vinserti128 ymm1, ymm0, xmmword ptr [r9 + 2*r13 - 147168], 1 | |
vpermt2w ymm1, ymm25, ymm2 | |
vpblendd ymm12, ymm1, ymm11, 32 | |
vmovdqa ymm1, ymmword ptr [r9 + 2*r13 - 127968] | |
vmovdqa xmm11, xmmword ptr [r9 + 2*r13 - 121568] | |
vpermt2w ymm12, ymm13, ymm1 | |
vinserti128 ymm0, ymm0, xmm11, 1 | |
vpmovsxbw ymm13, xmmword ptr [rip + .LCPI1_96] | |
vshufpd ymm12, ymm12, ymm0, 2 | |
vmovdqa ymm0, ymmword ptr [r9 + 2*r13 - 115168] | |
vpermt2w ymm12, ymm13, ymm0 | |
vpbroadcastd ymm13, dword ptr [r9 + 2*r13 - 108768] | |
vpblendd ymm12, ymm12, ymm13, 128 | |
vpsrld xmm13, xmm4, 16 | |
vmovdqa ymmword ptr [rsp + 256], ymm12 | |
vpsrld xmm12, xmm3, 16 | |
vpblendw xmm12, xmm12, xmm6, 2 | |
vpunpckldq xmm12, xmm12, xmm13 | |
vpmovsxbw xmm13, qword ptr [rip + .LCPI1_102] | |
vpermt2w xmm12, xmm13, xmm9 | |
vpsrld xmm13, xmm5, 16 | |
vpunpcklqdq xmm12, xmm12, xmm13 | |
vpbroadcastw xmm13, word ptr [r9 + 2*r13 - 166366] | |
vpblendw xmm12, xmm12, xmm13, 32 | |
vpslldq xmm13, xmm7, 10 | |
vpblendd xmm12, xmm12, xmm13, 8 | |
vpbroadcastw xmm13, word ptr [r9 + 2*r13 - 153566] | |
vpblendw xmm12, xmm12, xmm13, 128 | |
vpbroadcastw xmm13, word ptr [r9 + 2*r13 - 198364] | |
vmovdqa64 ymm21, ymm12 | |
vpbroadcastw xmm12, word ptr [r9 + 2*r13 - 191964] | |
vpunpcklwd xmm12, xmm13, xmm12 | |
vpmovsxbw xmm13, qword ptr [rip + .LCPI1_106] | |
vpblendd xmm12, xmm12, xmm4, 2 | |
vpermt2w xmm12, xmm13, xmm9 | |
vpslldq xmm13, xmm10, 6 | |
vshufps xmm12, xmm12, xmm5, 212 | |
vpblendw xmm12, xmm12, xmm13, 32 | |
vpslldq xmm13, xmm8, 10 | |
vinsertps xmm12, xmm12, xmm7, 112 | |
vpblendw xmm12, xmm12, xmm13, 128 | |
vpbroadcastd ymm13, dword ptr [r9 + 2*r13 - 147164] | |
vpblendd ymm13, ymm12, ymm13, 240 | |
vinserti128 ymm12, ymm12, xmmword ptr [r9 + 2*r13 - 134368], 1 | |
vpermt2w ymm13, ymm14, ymm2 | |
vpbroadcastd ymm14, dword ptr [r9 + 2*r13 - 121564] | |
vpblendd ymm12, ymm13, ymm12, 34 | |
vpmovsxbw ymm13, xmmword ptr [rip + .LCPI1_108] | |
vpermt2w ymm12, ymm13, ymm1 | |
vpbroadcastd ymm13, dword ptr [r9 + 2*r13 - 108764] | |
vpblendd ymm12, ymm12, ymm14, 192 | |
vpmovsxbw ymm14, xmmword ptr [rip + .LCPI1_109] | |
vpermt2w ymm12, ymm14, ymm0 | |
vpbroadcastq ymm14, qword ptr [r9 + 2*r13 - 147160] | |
vpblendd ymm12, ymm12, ymm13, 128 | |
vpsrlq xmm13, xmm3, 48 | |
vmovdqa ymmword ptr [rsp + 224], ymm12 | |
vpsrlq xmm12, xmm6, 48 | |
vpunpcklwd xmm12, xmm13, xmm12 | |
vpsrlq xmm13, xmm4, 48 | |
vpunpckldq xmm12, xmm12, xmm13 | |
vpsrlq xmm13, xmm5, 48 | |
vpblendw xmm12, xmm12, xmm9, 8 | |
vpunpcklqdq xmm12, xmm12, xmm13 | |
vpbroadcastw xmm13, word ptr [r9 + 2*r13 - 166362] | |
vpblendw xmm12, xmm12, xmm13, 32 | |
vpmovzxwd xmm13, xmm7 | |
vpblendd xmm12, xmm12, xmm13, 8 | |
vpbroadcastw xmm13, word ptr [r9 + 2*r13 - 153562] | |
vpblendw xmm12, xmm12, xmm13, 128 | |
vpbroadcastw xmm13, word ptr [r9 + 2*r13 - 198360] | |
vmovdqa64 ymm31, ymm12 | |
vpbroadcastw xmm12, word ptr [r9 + 2*r13 - 191960] | |
vpunpcklwd xmm12, xmm13, xmm12 | |
vpsrldq xmm13, xmm9, 2 | |
vpsrldq xmm9, xmm9, 6 | |
vinsertps xmm12, xmm12, xmm4, 156 | |
vpblendw xmm12, xmm12, xmm13, 8 | |
vpslld xmm13, xmm10, 16 | |
vpblendd xmm12, xmm5, xmm12, 3 | |
vpblendw xmm12, xmm12, xmm13, 32 | |
vpsllq xmm13, xmm8, 48 | |
vinsertps xmm12, xmm12, xmm7, 176 | |
vpblendw xmm12, xmm12, xmm13, 128 | |
vpblendd ymm13, ymm12, ymm14, 240 | |
vpbroadcastd ymm14, dword ptr [r9 + 2*r13 - 134360] | |
vinserti128 ymm11, ymm12, xmm11, 1 | |
vpmovsxbw ymm12, xmmword ptr [rip + .LCPI1_103] | |
vpermt2w ymm13, ymm24, ymm2 | |
vpblendd ymm13, ymm13, ymm14, 32 | |
vmovdqa xmm14, xmmword ptr [rsp + 384] | |
vpermt2w ymm13, ymm12, ymm1 | |
vpbroadcastd ymm12, dword ptr [r9 + 2*r13 - 108760] | |
vpblendd ymm11, ymm13, ymm11, 204 | |
vpmovsxbw ymm13, xmmword ptr [rip + .LCPI1_111] | |
vpermt2w ymm11, ymm13, ymm0 | |
vmovdqa xmm13, xmmword ptr [rsp + 448] | |
vpblendd ymm11, ymm11, ymm12, 128 | |
vpsrldq xmm12, xmm3, 10 | |
vpsrldq xmm3, xmm3, 14 | |
vmovdqa ymmword ptr [rsp + 192], ymm11 | |
vpsrldq xmm11, xmm6, 10 | |
vpsrldq xmm6, xmm6, 14 | |
vpunpcklwd xmm11, xmm12, xmm11 | |
vpsrldq xmm12, xmm4, 10 | |
vpunpcklwd xmm3, xmm3, xmm6 | |
vpunpckldq xmm11, xmm11, xmm12 | |
vpbroadcastw xmm12, word ptr [r9 + 2*r13 - 179158] | |
vpblendw xmm11, xmm11, xmm12, 8 | |
vpsrldq xmm12, xmm5, 10 | |
vpunpcklqdq xmm11, xmm11, xmm12 | |
vpsllq xmm12, xmm7, 16 | |
vpblendw xmm11, xmm11, xmm10, 32 | |
vpsrlq xmm10, xmm10, 16 | |
vpblendd xmm11, xmm11, xmm12, 8 | |
vpbroadcastw xmm12, word ptr [r9 + 2*r13 - 153558] | |
vpblendw xmm11, xmm11, xmm12, 128 | |
vpbroadcastw xmm12, word ptr [r9 + 2*r13 - 198356] | |
vmovdqa64 ymm16, ymm11 | |
vpbroadcastw xmm11, word ptr [r9 + 2*r13 - 191956] | |
vpunpcklwd xmm11, xmm12, xmm11 | |
vpmovsxbw ymm12, xmmword ptr [rip + .LCPI1_112] | |
vinsertps xmm11, xmm11, xmm4, 220 | |
vpsrldq xmm4, xmm4, 14 | |
vpunpckldq xmm3, xmm3, xmm4 | |
vpbroadcastw xmm4, word ptr [r9 + 2*r13 - 179154] | |
vpblendw xmm9, xmm11, xmm9, 8 | |
vpslld xmm11, xmm8, 16 | |
vshufps xmm9, xmm9, xmm5, 244 | |
vpblendw xmm9, xmm9, xmm10, 32 | |
vpbroadcastd ymm10, dword ptr [r9 + 2*r13 - 147156] | |
vpblendd xmm9, xmm9, xmm7, 8 | |
vpblendw xmm9, xmm9, xmm11, 128 | |
vpbroadcastd ymm11, dword ptr [r9 + 2*r13 - 134356] | |
vpblendw xmm3, xmm3, xmm4, 8 | |
vpsrldq xmm4, xmm5, 14 | |
vpunpcklqdq xmm3, xmm3, xmm4 | |
vpbroadcastw xmm4, word ptr [r9 + 2*r13 - 166354] | |
vpblendd ymm10, ymm9, ymm10, 240 | |
vinserti128 ymm9, ymm9, xmmword ptr [r9 + 2*r13 - 108768], 1 | |
vpermt2w ymm10, ymm12, ymm2 | |
vpblendd ymm10, ymm10, ymm11, 32 | |
vpmovsxbw ymm11, xmmword ptr [rip + .LCPI1_113] | |
vpblendw xmm3, xmm3, xmm4, 32 | |
vpsrld xmm4, xmm7, 16 | |
vpblendd xmm3, xmm3, xmm4, 8 | |
vmovdqa xmm4, xmmword ptr [r9 + 2*r13 - 172752] | |
vpblendw xmm3, xmm3, xmm8, 128 | |
vmovdqa64 ymm25, ymm3 | |
vpbroadcastw ymm3, word ptr [r9 + 2*r13 - 198352] | |
vpermt2w ymm10, ymm11, ymm1 | |
vpbroadcastd ymm11, dword ptr [r9 + 2*r13 - 121556] | |
vpunpcklwd xmm3, xmm3, xmm14 | |
vpunpckldq xmm5, xmm3, xmm15 | |
vmovdqa xmm3, xmmword ptr [r9 + 2*r13 - 179152] | |
vpblendd ymm10, ymm10, ymm11, 192 | |
vpmovsxbw ymm11, xmmword ptr [rip + .LCPI1_114] | |
insertq xmm5, xmm3, 16, 48 | |
vpunpcklqdq xmm6, xmm5, xmm4 | |
vmovdqa xmm5, xmmword ptr [r9 + 2*r13 - 166352] | |
vpermt2w ymm10, ymm11, ymm0 | |
vpblendd ymm9, ymm10, ymm9, 136 | |
vpmovsxbw ymm10, xmmword ptr [rip + .LCPI1_115] | |
vmovdqa ymmword ptr [rsp + 160], ymm9 | |
vpbroadcastd ymm9, dword ptr [r9 + 2*r13 - 134352] | |
vpbroadcastw xmm7, xmm5 | |
vmovdqa xmm12, xmm5 | |
vmovdqa xmm5, xmmword ptr [r9 + 2*r13 - 153552] | |
vpblendw xmm6, xmm6, xmm7, 32 | |
vpbroadcastd xmm7, dword ptr [r9 + 2*r13 - 159952] | |
vpbroadcastw xmm8, xmm5 | |
vpblendd xmm7, xmm6, xmm7, 8 | |
vpblendw xmm7, xmm7, xmm8, 128 | |
vmovdqa ymm8, ymmword ptr [r9 + 2*r13 - 147168] | |
vpblendd ymm7, ymm7, ymm8, 240 | |
vpermt2w ymm21, ymm19, ymm8 | |
vpermt2w ymm7, ymm10, ymm2 | |
vpmovsxbw ymm10, xmmword ptr [rip + .LCPI1_116] | |
vpblendd ymm7, ymm7, ymm9, 32 | |
vpbroadcastq ymm9, qword ptr [r9 + 2*r13 - 121552] | |
vpermt2w ymm7, ymm10, ymm1 | |
vmovdqa ymm10, ymmword ptr [rsp + 416] | |
vpblendd ymm7, ymm7, ymm9, 192 | |
vpmovsxbw ymm9, xmmword ptr [rip + .LCPI1_117] | |
vpermt2w ymm7, ymm9, ymm0 | |
vpbroadcastd ymm9, dword ptr [r9 + 2*r13 - 108752] | |
vpblendd ymm7, ymm7, ymm9, 128 | |
vpsrld xmm9, xmm4, 16 | |
vmovdqa ymmword ptr [rsp + 480], ymm7 | |
vmovdqa ymm7, ymmword ptr [r9 + 2*r13 - 179168] | |
vpermt2w ymm10, ymm26, ymm7 | |
vpunpcklqdq xmm9, xmm10, xmm9 | |
vpbroadcastw xmm10, word ptr [r9 + 2*r13 - 166350] | |
vpblendw xmm10, xmm9, xmm10, 32 | |
vmovapd xmm9, xmmword ptr [r9 + 2*r13 - 159952] | |
vpslldq xmm11, xmm9, 10 | |
vpblendd xmm10, xmm10, xmm11, 8 | |
vpbroadcastw xmm11, word ptr [r9 + 2*r13 - 153550] | |
vpblendw xmm10, xmm10, xmm11, 128 | |
vpshuflw ymm11, ymm8, 85 | |
vpblendw ymm11, ymm11, ymm2, 2 | |
vpblendd ymm6, ymm10, ymm11, 240 | |
vmovdqa ymm10, ymmword ptr [rsp + 128] | |
vpmovsxbw ymm11, xmmword ptr [rip + .LCPI1_119] | |
vmovdqa64 ymm30, ymm6 | |
vmovdqa xmm6, xmm12 | |
vmovdqa64 xmm26, xmm6 | |
vpermt2w ymm10, ymm28, ymm7 | |
vshufps xmm7, xmm10, xmm4, 212 | |
vpslldq xmm10, xmm12, 6 | |
vpsrldq xmm12, xmm14, 10 | |
vpblendw xmm7, xmm7, xmm10, 32 | |
vpbroadcastd ymm10, dword ptr [r9 + 2*r13 - 159948] | |
vpblendd xmm7, xmm7, xmm10, 8 | |
vpslldq xmm10, xmm5, 10 | |
vpblendw xmm7, xmm7, xmm10, 128 | |
vpbroadcastd ymm10, dword ptr [r9 + 2*r13 - 147148] | |
vpblendd ymm10, ymm7, ymm10, 240 | |
vpmovsxbw ymm7, xmmword ptr [rip + .LCPI1_120] | |
vpermt2w ymm10, ymm7, ymm2 | |
vmovdqa ymm7, ymmword ptr [r9 + 2*r13 - 134368] | |
vpblendd ymm10, ymm10, ymm7, 32 | |
vpermt2w ymm10, ymm11, ymm1 | |
vpbroadcastd ymm11, dword ptr [r9 + 2*r13 - 121548] | |
vpblendd ymm10, ymm10, ymm11, 192 | |
vpmovsxbw ymm11, xmmword ptr [rip + .LCPI1_121] | |
vpermt2w ymm10, ymm11, ymm0 | |
vpbroadcastd ymm11, dword ptr [r9 + 2*r13 - 108748] | |
vpblendd ymm10, ymm10, ymm11, 128 | |
vpsrlq xmm11, xmm14, 48 | |
vmovdqa ymmword ptr [rsp + 128], ymm10 | |
vpsrlq xmm10, xmm13, 48 | |
vpunpcklwd xmm10, xmm10, xmm11 | |
vpsrlq xmm11, xmm15, 48 | |
vpunpckldq xmm10, xmm10, xmm11 | |
vpsrlq xmm11, xmm4, 48 | |
vpblendw xmm10, xmm10, xmm3, 8 | |
vpunpcklqdq xmm10, xmm10, xmm11 | |
vpbroadcastw xmm11, word ptr [r9 + 2*r13 - 166346] | |
vpblendw xmm10, xmm10, xmm11, 32 | |
vpmovzxwd xmm11, xmm9 | |
vpblendd xmm10, xmm10, xmm11, 8 | |
vpbroadcastw xmm11, word ptr [r9 + 2*r13 - 153546] | |
vpblendw xmm10, xmm10, xmm11, 128 | |
vpsrldq xmm11, xmm13, 10 | |
vpsrldq xmm13, xmm13, 14 | |
vpunpcklwd xmm11, xmm11, xmm12 | |
vpsrldq xmm12, xmm15, 10 | |
vpunpckldq xmm11, xmm11, xmm12 | |
vpbroadcastw xmm12, word ptr [r9 + 2*r13 - 179142] | |
vpblendw xmm11, xmm11, xmm12, 8 | |
vpsrldq xmm12, xmm4, 10 | |
vpunpcklqdq xmm11, xmm11, xmm12 | |
vpsllq xmm12, xmm9, 16 | |
vpsrld xmm9, xmm9, 16 | |
vpblendw xmm11, xmm11, xmm6, 32 | |
vpblendd xmm11, xmm11, xmm12, 8 | |
vpbroadcastw xmm12, word ptr [r9 + 2*r13 - 153542] | |
vpblendw xmm11, xmm11, xmm12, 128 | |
vpsrldq xmm12, xmm14, 14 | |
vpunpcklwd xmm12, xmm13, xmm12 | |
vpsrldq xmm13, xmm15, 14 | |
vpmovsxbw xmm15, qword ptr [rip + .LCPI1_102] | |
vpunpckldq xmm12, xmm12, xmm13 | |
vpbroadcastw xmm13, word ptr [r9 + 2*r13 - 179138] | |
vpblendw xmm12, xmm12, xmm13, 8 | |
vpsrldq xmm13, xmm4, 14 | |
vpunpcklqdq xmm12, xmm12, xmm13 | |
vpbroadcastw xmm13, word ptr [r9 + 2*r13 - 166338] | |
vpblendw xmm12, xmm12, xmm13, 32 | |
vpblendd xmm9, xmm12, xmm9, 8 | |
vpmovsxbw ymm12, xmmword ptr [rip + .LCPI1_123] | |
vpblendw xmm9, xmm9, xmm5, 128 | |
vpermt2w ymm31, ymm12, ymm8 | |
vpmovsxbw ymm12, xmmword ptr [rip + .LCPI1_124] | |
vpermt2w ymm16, ymm12, ymm8 | |
vpmovsxbw ymm12, xmmword ptr [rip + .LCPI1_125] | |
vpermt2w ymm25, ymm12, ymm8 | |
vpmovsxbw ymm12, xmmword ptr [rip + .LCPI1_126] | |
vpermt2w ymm11, ymm12, ymm8 | |
vpmovsxbw ymm12, xmmword ptr [rip + .LCPI1_127] | |
vpermt2w ymm9, ymm12, ymm8 | |
vpbroadcastd ymm12, dword ptr [rip + .LCPI1_66] | |
vpermt2w ymm8, ymm12, ymm2 | |
vpmovsxbw ymm12, xmmword ptr [rip + .LCPI1_128] | |
vpermt2w ymm8, ymm12, ymm7 | |
vpmovsxbw ymm12, xmmword ptr [rip + .LCPI1_129] | |
vpblendw ymm8, ymm8, ymm1, 8 | |
vpblendd ymm13, ymm10, ymm8, 240 | |
vpbroadcastw ymm8, word ptr [r9 + 2*r13 - 198344] | |
vpbroadcastw xmm10, word ptr [r9 + 2*r13 - 191944] | |
vpunpcklwd xmm8, xmm8, xmm10 | |
vunpcklps xmm8, xmm8, dword ptr [r9 + 2*r13 - 185544]{1to4} | |
vpsrldq xmm10, xmm3, 2 | |
vpsrldq xmm3, xmm3, 6 | |
vpblendw xmm8, xmm8, xmm10, 8 | |
vpslld xmm10, xmm6, 16 | |
vpblendd xmm8, xmm4, xmm8, 3 | |
vpblendw xmm8, xmm8, xmm10, 32 | |
vpbroadcastd xmm10, dword ptr [r9 + 2*r13 - 159944] | |
vpblendd xmm8, xmm8, xmm10, 8 | |
vpsllq xmm10, xmm5, 48 | |
vpblendw xmm8, xmm8, xmm10, 128 | |
vpbroadcastq ymm10, qword ptr [r9 + 2*r13 - 147144] | |
vpblendd ymm8, ymm8, ymm10, 240 | |
vpmovsxbw ymm10, xmmword ptr [rip + .LCPI1_130] | |
vpermt2w ymm8, ymm10, ymm2 | |
vpbroadcastd ymm10, dword ptr [r9 + 2*r13 - 134344] | |
vpblendd ymm10, ymm8, ymm10, 32 | |
vpmovsxbw ymm8, xmmword ptr [rip + .LCPI1_131] | |
vpermt2w ymm10, ymm8, ymm1 | |
vmovdqa ymm8, ymmword ptr [r9 + 2*r13 - 121568] | |
vpblendd ymm10, ymm10, ymm8, 192 | |
vpermt2w ymm10, ymm12, ymm0 | |
vpbroadcastd ymm12, dword ptr [r9 + 2*r13 - 108744] | |
vpblendd ymm14, ymm10, ymm12, 128 | |
vpmovsxbw ymm12, xmmword ptr [rip + .LCPI1_132] | |
vmovdqa ymm10, ymm11 | |
vpermt2w ymm10, ymm12, ymm2 | |
vpmovsxbw ymm12, xmmword ptr [rip + .LCPI1_133] | |
vpermt2w ymm10, ymm12, ymm7 | |
vpmovsxbw ymm12, xmmword ptr [rip + .LCPI1_134] | |
vpermt2w ymm10, ymm12, ymm1 | |
vpmovsxbw ymm12, xmmword ptr [rip + .LCPI1_135] | |
vpermt2w ymm10, ymm12, ymm8 | |
vpbroadcastw xmm12, word ptr [r9 + 2*r13 - 191940] | |
vpblendw ymm10, ymm10, ymm0, 32 | |
vpblendd ymm6, ymm11, ymm10, 240 | |
vpbroadcastw ymm10, word ptr [r9 + 2*r13 - 198340] | |
vpbroadcastd ymm11, dword ptr [r9 + 2*r13 - 185540] | |
vpunpcklwd xmm10, xmm10, xmm12 | |
vpmovsxbw ymm12, xmmword ptr [rip + .LCPI1_109] | |
vpblendd xmm10, xmm10, xmm11, 2 | |
vmovdqa ymm11, ymmword ptr [r9 + 2*r13 - 38368] | |
vpblendw xmm3, xmm10, xmm3, 8 | |
vmovaps xmm10, xmmword ptr [r9 + 2*r13 - 57568] | |
vshufps xmm3, xmm3, xmm4, 244 | |
vpsrlq xmm4, xmm26, 16 | |
vpblendw xmm3, xmm3, xmm4, 32 | |
vpbroadcastd ymm4, dword ptr [r9 + 2*r13 - 159940] | |
vpblendd xmm3, xmm3, xmm4, 8 | |
vpslld xmm4, xmm5, 16 | |
vmovdqa64 ymm5, ymm21 | |
vpermt2w ymm5, ymm27, ymm2 | |
vpblendw xmm3, xmm3, xmm4, 128 | |
vpbroadcastd ymm4, dword ptr [r9 + 2*r13 - 147140] | |
vpermt2w ymm5, ymm20, ymm7 | |
vmovdqa64 ymm20, ymm30 | |
vpmovsxbw ymm30, xmmword ptr [rip + .LCPI1_101] | |
vpblendd ymm3, ymm3, ymm4, 240 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_136] | |
vpermt2w ymm5, ymm30, ymm1 | |
vmovdqa64 xmm30, xmmword ptr [r9 + 2*r13 - 51152] | |
vpermt2w ymm5, ymm23, ymm8 | |
vmovdqa64 ymm23, ymm13 | |
vmovdqa ymm13, ymmword ptr [r9 + 2*r13 - 25568] | |
vpermt2w ymm3, ymm4, ymm2 | |
vpbroadcastd ymm4, dword ptr [r9 + 2*r13 - 134340] | |
vpblendd ymm3, ymm3, ymm4, 32 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_137] | |
vpermt2w ymm3, ymm4, ymm1 | |
vpbroadcastd ymm4, dword ptr [r9 + 2*r13 - 121540] | |
vpblendd ymm3, ymm3, ymm4, 192 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_138] | |
vpermt2w ymm31, ymm4, ymm2 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_139] | |
vpermt2w ymm16, ymm4, ymm2 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_140] | |
vpermt2w ymm25, ymm4, ymm2 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_141] | |
vpermt2w ymm9, ymm4, ymm2 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_142] | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_148] | |
vpermt2w ymm31, ymm2, ymm7 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_143] | |
vpermt2w ymm16, ymm2, ymm7 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_144] | |
vpermt2w ymm25, ymm2, ymm7 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_145] | |
vpermt2w ymm20, ymm2, ymm7 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_146] | |
vpermt2w ymm9, ymm2, ymm7 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_147] | |
vpermt2w ymm31, ymm2, ymm1 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_149] | |
vpermt2w ymm16, ymm2, ymm1 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_150] | |
vpermt2w ymm25, ymm2, ymm1 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_151] | |
vpermt2w ymm20, ymm2, ymm1 | |
vmovdqa ymm2, ymm9 | |
vpermt2w ymm2, ymm4, ymm1 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_152] | |
vpermt2w ymm31, ymm1, ymm8 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_153] | |
vpermt2w ymm16, ymm1, ymm8 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_154] | |
vpermt2w ymm25, ymm1, ymm8 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_155] | |
vpermt2w ymm20, ymm1, ymm8 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_156] | |
vpermt2w ymm23, ymm1, ymm8 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_157] | |
vpermt2w ymm2, ymm1, ymm8 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_158] | |
vpbroadcastd ymm8, dword ptr [r9 + 2*r13 - 31968] | |
vpermt2w ymm5, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_160] | |
vmovdqa64 ymm27, ymm5 | |
vpermt2w ymm31, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_161] | |
vpermt2w ymm16, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_162] | |
vmovdqa64 ymm21, ymm16 | |
vmovdqa64 xmm16, xmmword ptr [r9 + 2*r13 - 19168] | |
vpermt2w ymm25, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_163] | |
vmovdqa64 ymm19, ymm25 | |
vpmovsxbw ymm25, xmmword ptr [rip + .LCPI1_115] | |
vpermt2w ymm20, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_164] | |
vpermt2w ymm23, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_165] | |
vpermt2w ymm3, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_166] | |
vpermt2w ymm2, ymm1, ymm0 | |
vmovdqa ymm0, ymmword ptr [r9 + 2*r13 - 108768] | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_159] | |
vpermt2w ymm27, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_167] | |
vpblendd ymm4, ymm3, ymm0, 128 | |
vmovdqa ymm3, ymmword ptr [rsp + 224] | |
vpermt2w ymm31, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_169] | |
vpermt2w ymm21, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_170] | |
vpermt2w ymm19, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_171] | |
vpermt2w ymm20, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_172] | |
vpermt2w ymm23, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_173] | |
vpermt2w ymm6, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_168] | |
vpermt2w ymm2, ymm1, ymm0 | |
vmovdqa ymm0, ymmword ptr [r9 + 2*r13 - 102368] | |
vmovdqa ymm1, ymmword ptr [rsp + 256] | |
vpermt2w ymm1, ymm22, ymm0 | |
vmovdqa64 xmm22, xmmword ptr [rsp + 752] | |
vmovdqa ymmword ptr [rsp + 256], ymm1 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_174] | |
vpermt2w ymm27, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_175] | |
vpermt2w ymm3, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_176] | |
vmovdqa ymmword ptr [rsp + 224], ymm3 | |
vmovdqa ymm3, ymmword ptr [rsp + 192] | |
vpermt2w ymm31, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_177] | |
vpermt2w ymm3, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_178] | |
vmovdqa ymmword ptr [rsp + 192], ymm3 | |
vmovdqa ymm3, ymmword ptr [rsp + 160] | |
vpermt2w ymm21, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_179] | |
vpermt2w ymm3, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_180] | |
vmovdqa ymmword ptr [rsp + 160], ymm3 | |
vmovdqa ymm3, ymmword ptr [rsp + 480] | |
vpermt2w ymm19, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_181] | |
vpermt2w ymm3, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_182] | |
vmovdqa ymmword ptr [rsp + 480], ymm3 | |
vmovdqa ymm3, ymmword ptr [rsp + 128] | |
vpermt2w ymm20, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_183] | |
vpermt2w ymm3, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_184] | |
vmovdqa ymmword ptr [rsp + 128], ymm3 | |
vpermt2w ymm23, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_185] | |
vpermt2w ymm14, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_186] | |
vmovdqa ymmword ptr [rsp + 448], ymm14 | |
vmovdqa ymm14, ymmword ptr [r9 + 2*r13 - 12768] | |
vpermt2w ymm6, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_187] | |
vmovdqa ymmword ptr [rsp + 416], ymm6 | |
vpermt2w ymm4, ymm1, ymm0 | |
vmovdqa xmm1, xmmword ptr [r9 + 2*r13 - 83168] | |
vmovdqa ymmword ptr [rsp + 384], ymm4 | |
vpblendw ymm4, ymm2, ymm0, 128 | |
vmovdqa xmm0, xmmword ptr [r9 + 2*r13 - 95968] | |
vmovdqa xmm2, xmmword ptr [r9 + 2*r13 - 89568] | |
vpblendd ymm4, ymm9, ymm4, 240 | |
vmovdqa xmm9, xmmword ptr [r9 + 2*r13 - 63968] | |
vmovdqa ymmword ptr [rsp + 352], ymm4 | |
vmovdqa xmm4, xmmword ptr [r9 + 2*r13 - 70368] | |
vpunpcklwd xmm3, xmm0, xmm2 | |
vpunpckldq xmm5, xmm3, xmm1 | |
vmovdqa xmm3, xmmword ptr [r9 + 2*r13 - 76768] | |
vpbroadcastw xmm6, xmm9 | |
insertq xmm5, xmm3, 16, 48 | |
vpunpcklqdq xmm5, xmm5, xmm4 | |
vpblendw xmm5, xmm5, xmm6, 32 | |
vmovdqa xmm6, xmmword ptr [r9 + 2*r13 - 51168] | |
vinsertps xmm5, xmm5, xmm10, 48 | |
vpbroadcastw xmm7, xmm6 | |
vpblendw xmm7, xmm5, xmm7, 128 | |
vinserti128 ymm5, ymm7, xmmword ptr [r9 + 2*r13 - 44768], 1 | |
vinserti32x4 ymm7, ymm7, xmm16, 1 | |
vpermt2w ymm5, ymm29, ymm11 | |
vpblendd ymm8, ymm5, ymm8, 32 | |
vpmovsxbw ymm5, xmmword ptr [rip + .LCPI1_95] | |
vpermt2w ymm8, ymm5, ymm13 | |
vpmovsxbw ymm5, xmmword ptr [rip + .LCPI1_96] | |
vshufpd ymm7, ymm8, ymm7, 2 | |
vpbroadcastd ymm8, dword ptr [r9 + 2*r13 - 6368] | |
vpermt2w ymm7, ymm5, ymm14 | |
vpblendd ymm5, ymm7, ymm8, 128 | |
vpsrld xmm7, xmm0, 16 | |
vpsrld xmm8, xmm1, 16 | |
vpblendw xmm7, xmm7, xmm2, 2 | |
vmovdqa ymmword ptr [rsp + 960], ymm5 | |
vpunpckldq xmm7, xmm7, xmm8 | |
vpsrld xmm8, xmm4, 16 | |
vpermt2w xmm7, xmm15, xmm3 | |
vpbroadcastd ymm15, dword ptr [r9 + 2*r13 - 19164] | |
vpunpcklqdq xmm7, xmm7, xmm8 | |
vpbroadcastw xmm8, word ptr [r9 + 2*r13 - 63966] | |
vpblendw xmm7, xmm7, xmm8, 32 | |
vpslldq xmm8, xmm10, 10 | |
vpblendd xmm7, xmm7, xmm8, 8 | |
vpbroadcastw xmm8, word ptr [r9 + 2*r13 - 51166] | |
vpblendw xmm5, xmm7, xmm8, 128 | |
vpbroadcastw xmm7, word ptr [r9 + 2*r13 - 89564] | |
vpbroadcastw xmm8, word ptr [r9 + 2*r13 - 95964] | |
vmovdqa64 ymm28, ymm5 | |
vpmovsxbw ymm5, xmmword ptr [rip + .LCPI1_108] | |
vpunpcklwd xmm7, xmm8, xmm7 | |
vpslldq xmm8, xmm9, 6 | |
vpblendd xmm7, xmm7, xmm1, 2 | |
vpermt2w xmm7, xmm17, xmm3 | |
vpmovsxbw ymm17, xmmword ptr [rip + .LCPI1_123] | |
vshufps xmm7, xmm7, xmm4, 212 | |
vpblendw xmm7, xmm7, xmm8, 32 | |
vpslldq xmm8, xmm6, 10 | |
vinsertps xmm7, xmm7, xmm10, 112 | |
vpblendw xmm7, xmm7, xmm8, 128 | |
vpbroadcastd ymm8, dword ptr [r9 + 2*r13 - 44764] | |
vpblendd ymm8, ymm7, ymm8, 240 | |
vinserti128 ymm7, ymm7, xmmword ptr [r9 + 2*r13 - 31968], 1 | |
vpermt2w ymm8, ymm18, ymm11 | |
vpmovsxbw ymm18, xmmword ptr [rip + .LCPI1_103] | |
vpblendd ymm7, ymm8, ymm7, 34 | |
vpbroadcastd ymm8, dword ptr [r9 + 2*r13 - 6364] | |
vpermt2w ymm7, ymm5, ymm13 | |
vpblendd ymm7, ymm7, ymm15, 192 | |
vpbroadcastw xmm15, word ptr [r9 + 2*r13 - 95960] | |
vpermt2w ymm7, ymm12, ymm14 | |
vpblendd ymm5, ymm7, ymm8, 128 | |
vpsrlq xmm7, xmm2, 48 | |
vpsrlq xmm8, xmm0, 48 | |
vpunpcklwd xmm7, xmm8, xmm7 | |
vpsrlq xmm8, xmm1, 48 | |
vmovdqa ymmword ptr [rsp + 928], ymm5 | |
vpbroadcastq ymm5, qword ptr [r9 + 2*r13 - 44760] | |
vpunpckldq xmm7, xmm7, xmm8 | |
vpsrlq xmm8, xmm4, 48 | |
vpblendw xmm7, xmm7, xmm3, 8 | |
vpunpcklqdq xmm7, xmm7, xmm8 | |
vpbroadcastw xmm8, word ptr [r9 + 2*r13 - 63962] | |
vpblendw xmm7, xmm7, xmm8, 32 | |
vpmovzxwd xmm8, xmm10 | |
vpblendd xmm7, xmm7, xmm8, 8 | |
vpbroadcastw xmm8, word ptr [r9 + 2*r13 - 51162] | |
vpblendw xmm12, xmm7, xmm8, 128 | |
vpbroadcastw xmm7, word ptr [r9 + 2*r13 - 89560] | |
vpmovsxbw ymm8, xmmword ptr [rip + .LCPI1_111] | |
vpunpcklwd xmm7, xmm15, xmm7 | |
vpsrldq xmm15, xmm3, 2 | |
vpsrldq xmm3, xmm3, 6 | |
vinsertps xmm7, xmm7, xmm1, 156 | |
vpblendw xmm7, xmm7, xmm15, 8 | |
vpslld xmm15, xmm9, 16 | |
vpblendd xmm7, xmm4, xmm7, 3 | |
vpblendw xmm7, xmm7, xmm15, 32 | |
vpsllq xmm15, xmm6, 48 | |
vinsertps xmm7, xmm7, xmm10, 176 | |
vpblendw xmm7, xmm7, xmm15, 128 | |
vpbroadcastd ymm15, dword ptr [r9 + 2*r13 - 31960] | |
vpblendd ymm5, ymm7, ymm5, 240 | |
vinserti32x4 ymm7, ymm7, xmm16, 1 | |
vpmovsxbw ymm16, xmmword ptr [rip + .LCPI1_112] | |
vpermt2w ymm5, ymm24, ymm11 | |
vmovdqa64 xmm24, xmmword ptr [r9 + 2*r13 - 63952] | |
vpblendd ymm5, ymm5, ymm15, 32 | |
vpbroadcastd ymm15, dword ptr [r9 + 2*r13 - 6360] | |
vpermt2w ymm5, ymm18, ymm13 | |
vpmovsxbw ymm18, xmmword ptr [rip + .LCPI1_113] | |
vpblendd ymm5, ymm5, ymm7, 204 | |
vpsrldq xmm7, xmm0, 10 | |
vpsrldq xmm0, xmm0, 14 | |
vpermt2w ymm5, ymm8, ymm14 | |
vpblendd ymm5, ymm5, ymm15, 128 | |
vpbroadcastw xmm15, word ptr [r9 + 2*r13 - 95956] | |
vmovdqa ymmword ptr [rsp + 896], ymm5 | |
vpsrldq xmm5, xmm2, 10 | |
vpsrldq xmm2, xmm2, 14 | |
vpunpcklwd xmm5, xmm7, xmm5 | |
vpsrldq xmm7, xmm1, 10 | |
vpunpcklwd xmm0, xmm0, xmm2 | |
vmovdqa ymm2, ymmword ptr [rsp + 1696] | |
vpunpckldq xmm5, xmm5, xmm7 | |
vpbroadcastw xmm7, word ptr [r9 + 2*r13 - 76758] | |
vpblendw xmm5, xmm5, xmm7, 8 | |
vpsrldq xmm7, xmm4, 10 | |
vpunpcklqdq xmm5, xmm5, xmm7 | |
vpsllq xmm7, xmm10, 16 | |
vpblendw xmm5, xmm5, xmm9, 32 | |
vpblendd xmm5, xmm5, xmm7, 8 | |
vpbroadcastw xmm7, word ptr [r9 + 2*r13 - 51158] | |
vpblendw xmm8, xmm5, xmm7, 128 | |
vpbroadcastw xmm5, word ptr [r9 + 2*r13 - 89556] | |
vpmovsxbw ymm7, xmmword ptr [rip + .LCPI1_114] | |
vpunpcklwd xmm5, xmm15, xmm5 | |
vmovdqa xmm15, xmmword ptr [rsp + 768] | |
vinsertps xmm5, xmm5, xmm1, 220 | |
vpsrldq xmm1, xmm1, 14 | |
vpblendw xmm3, xmm5, xmm3, 8 | |
vpsrlq xmm5, xmm9, 16 | |
vpslld xmm9, xmm6, 16 | |
vpunpckldq xmm0, xmm0, xmm1 | |
vpbroadcastw xmm1, word ptr [r9 + 2*r13 - 76754] | |
vshufps xmm3, xmm3, xmm4, 244 | |
vpblendw xmm3, xmm3, xmm5, 32 | |
vpbroadcastd ymm5, dword ptr [r9 + 2*r13 - 44756] | |
vpblendd xmm3, xmm3, xmm10, 8 | |
vpblendw xmm3, xmm3, xmm9, 128 | |
vpbroadcastd ymm9, dword ptr [r9 + 2*r13 - 31956] | |
vpblendw xmm0, xmm0, xmm1, 8 | |
vpsrldq xmm1, xmm4, 14 | |
vmovdqa ymm4, ymmword ptr [r9 + 2*r13 - 31968] | |
vpunpcklqdq xmm0, xmm0, xmm1 | |
vpbroadcastw xmm1, word ptr [r9 + 2*r13 - 63954] | |
vpblendd ymm5, ymm3, ymm5, 240 | |
vinserti128 ymm3, ymm3, xmmword ptr [r9 + 2*r13 - 6368], 1 | |
vpermt2w ymm5, ymm16, ymm11 | |
vpmovsxbw ymm16, xmmword ptr [rip + .LCPI1_116] | |
vpblendd ymm5, ymm5, ymm9, 32 | |
vpbroadcastd ymm9, dword ptr [r9 + 2*r13 - 19156] | |
vpermt2w ymm5, ymm18, ymm13 | |
vpblendw xmm0, xmm0, xmm1, 32 | |
vpsrld xmm1, xmm10, 16 | |
vmovdqa ymm10, ymmword ptr [r9 + 2*r13 - 44768] | |
vpmovsxbw ymm18, xmmword ptr [rip + .LCPI1_117] | |
vpblendd xmm0, xmm0, xmm1, 8 | |
vmovdqa xmm1, xmmword ptr [r9 + 2*r13 - 76752] | |
vpblendd ymm5, ymm5, ymm9, 192 | |
vmovdqa xmm9, xmmword ptr [r9 + 2*r13 - 70352] | |
vpermt2w ymm12, ymm17, ymm10 | |
vpmovsxbw ymm17, xmmword ptr [rip + .LCPI1_140] | |
vpermt2w ymm5, ymm7, ymm14 | |
vmovdqa64 xmm29, xmm1 | |
vpblendd ymm3, ymm5, ymm3, 136 | |
vpblendw xmm5, xmm0, xmm6, 128 | |
vpbroadcastw ymm0, word ptr [r9 + 2*r13 - 95952] | |
vpmovsxbw ymm6, xmmword ptr [rip + .LCPI1_119] | |
vmovdqa ymmword ptr [rsp + 832], ymm3 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_120] | |
vpunpcklwd xmm0, xmm0, xmm15 | |
vpunpckldq xmm0, xmm0, xmm22 | |
insertq xmm0, xmm1, 16, 48 | |
vpbroadcastw xmm1, xmm24 | |
vpunpcklqdq xmm0, xmm0, xmm9 | |
vpblendw xmm0, xmm0, xmm1, 32 | |
vpbroadcastd xmm1, dword ptr [r9 + 2*r13 - 57552] | |
vpblendd xmm0, xmm0, xmm1, 8 | |
vpbroadcastw xmm1, xmm30 | |
vpblendw xmm0, xmm0, xmm1, 128 | |
vpbroadcastd ymm1, dword ptr [r9 + 2*r13 - 31952] | |
vpblendd ymm0, ymm0, ymm10, 240 | |
vpermt2w ymm0, ymm25, ymm11 | |
vpmovsxbw ymm25, xmmword ptr [rip + .LCPI1_121] | |
vpblendd ymm0, ymm0, ymm1, 32 | |
vpbroadcastq ymm1, qword ptr [r9 + 2*r13 - 19152] | |
vpermt2w ymm0, ymm16, ymm13 | |
vmovapd xmm16, xmmword ptr [r9 + 2*r13 - 57552] | |
vpblendd ymm0, ymm0, ymm1, 192 | |
vpbroadcastd ymm1, dword ptr [r9 + 2*r13 - 6352] | |
vpermt2w ymm0, ymm18, ymm14 | |
vpmovsxbw ymm18, xmmword ptr [rip + .LCPI1_128] | |
vpblendd ymm0, ymm0, ymm1, 128 | |
vpmovsxbw xmm1, qword ptr [rip + .LCPI1_110] | |
vmovdqa ymmword ptr [rsp + 800], ymm0 | |
vmovdqa ymm0, ymmword ptr [r9 + 2*r13 - 76768] | |
vpermt2w ymm2, ymm1, ymm0 | |
vpsrld xmm1, xmm9, 16 | |
vpunpcklqdq xmm1, xmm2, xmm1 | |
vpbroadcastw xmm2, word ptr [r9 + 2*r13 - 63950] | |
vpblendw xmm1, xmm1, xmm2, 32 | |
vpslldq xmm2, xmm16, 10 | |
vpblendd xmm1, xmm1, xmm2, 8 | |
vpbroadcastw xmm2, word ptr [r9 + 2*r13 - 51150] | |
vpblendw xmm1, xmm1, xmm2, 128 | |
vpshuflw ymm2, ymm10, 85 | |
vpblendw ymm2, ymm2, ymm11, 2 | |
vpblendd ymm7, ymm1, ymm2, 240 | |
vpmovsxbw xmm2, qword ptr [rip + .LCPI1_100] | |
vmovdqa ymm1, ymmword ptr [rsp + 1728] | |
vpermt2w ymm1, ymm2, ymm0 | |
vpsrldq xmm2, xmm15, 10 | |
vshufps xmm0, xmm1, xmm9, 212 | |
vpslldq xmm1, xmm24, 6 | |
vpblendw xmm0, xmm0, xmm1, 32 | |
vpbroadcastd ymm1, dword ptr [r9 + 2*r13 - 57548] | |
vpblendd xmm0, xmm0, xmm1, 8 | |
vpslldq xmm1, xmm30, 10 | |
vpblendw xmm0, xmm0, xmm1, 128 | |
vpbroadcastd ymm1, dword ptr [r9 + 2*r13 - 44748] | |
vpblendd ymm0, ymm0, ymm1, 240 | |
vpbroadcastd ymm1, dword ptr [r9 + 2*r13 - 19148] | |
vpermt2w ymm0, ymm3, ymm11 | |
vmovdqa xmm3, xmmword ptr [rsp + 784] | |
vpblendd ymm0, ymm0, ymm4, 32 | |
vpermt2w ymm0, ymm6, ymm13 | |
vmovdqa64 xmm6, xmm22 | |
vpblendd ymm0, ymm0, ymm1, 192 | |
vpbroadcastd ymm1, dword ptr [r9 + 2*r13 - 6348] | |
vpermt2w ymm0, ymm25, ymm14 | |
vpblendd ymm0, ymm0, ymm1, 128 | |
vpsrlq xmm1, xmm15, 48 | |
vmovdqa64 ymm25, ymm0 | |
vpsrlq xmm0, xmm3, 48 | |
vpunpcklwd xmm0, xmm0, xmm1 | |
vpsrlq xmm1, xmm22, 48 | |
vmovapd xmm22, xmm15 | |
vmovdqa xmm15, xmm6 | |
vpunpckldq xmm0, xmm0, xmm1 | |
vmovdqa64 xmm1, xmm29 | |
vpblendw xmm0, xmm0, xmm1, 8 | |
vpsrlq xmm1, xmm9, 48 | |
vpunpcklqdq xmm0, xmm0, xmm1 | |
vpbroadcastw xmm1, word ptr [r9 + 2*r13 - 63946] | |
vpblendw xmm0, xmm0, xmm1, 32 | |
vpmovzxwd xmm1, xmm16 | |
vpblendd xmm0, xmm0, xmm1, 8 | |
vpbroadcastw xmm1, word ptr [r9 + 2*r13 - 51146] | |
vpblendw xmm0, xmm0, xmm1, 128 | |
vpsrldq xmm1, xmm3, 10 | |
vpunpcklwd xmm1, xmm1, xmm2 | |
vpsrldq xmm2, xmm6, 10 | |
vpunpckldq xmm1, xmm1, xmm2 | |
vpbroadcastw xmm2, word ptr [r9 + 2*r13 - 76742] | |
vpblendw xmm1, xmm1, xmm2, 8 | |
vpsrldq xmm2, xmm9, 10 | |
vpunpcklqdq xmm1, xmm1, xmm2 | |
vmovdqa64 xmm2, xmm24 | |
vpblendw xmm1, xmm1, xmm2, 32 | |
vpsllq xmm2, xmm16, 16 | |
vpblendd xmm1, xmm1, xmm2, 8 | |
vpbroadcastw xmm2, word ptr [r9 + 2*r13 - 51142] | |
vpblendw xmm6, xmm1, xmm2, 128 | |
vpsrldq xmm1, xmm22, 14 | |
vpsrldq xmm2, xmm3, 14 | |
vpmovsxbw ymm3, xmmword ptr [rip + .LCPI1_125] | |
vmovdqa64 xmm22, xmm29 | |
vpunpcklwd xmm1, xmm2, xmm1 | |
vpsrldq xmm2, xmm15, 14 | |
vmovdqa64 xmm15, xmm30 | |
vpmovsxbw ymm30, xmmword ptr [rip + .LCPI1_131] | |
vpunpckldq xmm1, xmm1, xmm2 | |
vpbroadcastw xmm2, word ptr [r9 + 2*r13 - 76738] | |
vpermt2w ymm5, ymm3, ymm10 | |
vpblendw xmm1, xmm1, xmm2, 8 | |
vpsrldq xmm2, xmm9, 14 | |
vpunpcklqdq xmm1, xmm1, xmm2 | |
vpbroadcastw xmm2, word ptr [r9 + 2*r13 - 63938] | |
vpermt2w ymm5, ymm17, ymm11 | |
vpmovsxbw ymm17, xmmword ptr [rip + .LCPI1_145] | |
vpblendw xmm1, xmm1, xmm2, 32 | |
vpsrld xmm2, xmm16, 16 | |
vpmovsxbw ymm16, xmmword ptr [rip + .LCPI1_129] | |
vpblendd xmm1, xmm1, xmm2, 8 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_97] | |
vpermt2w ymm7, ymm17, ymm4 | |
vpmovsxbw ymm17, xmmword ptr [rip + .LCPI1_151] | |
vpblendw xmm3, xmm1, xmm15, 128 | |
vpbroadcastd ymm1, dword ptr [rip + .LCPI1_66] | |
vmovdqa64 ymm26, ymm2 | |
vpermt2w ymm28, ymm2, ymm10 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_124] | |
vpermt2w ymm7, ymm17, ymm13 | |
vpmovsxbw ymm17, xmmword ptr [rip + .LCPI1_148] | |
vpermt2w ymm8, ymm2, ymm10 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_126] | |
vpermt2w ymm6, ymm2, ymm10 | |
vpmovsxbw ymm2, xmmword ptr [rip + .LCPI1_127] | |
vpermt2w ymm3, ymm2, ymm10 | |
vpermt2w ymm10, ymm1, ymm11 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_130] | |
vpermt2w ymm10, ymm18, ymm4 | |
vpmovsxbw ymm18, xmmword ptr [rip + .LCPI1_132] | |
vpblendw ymm2, ymm10, ymm13, 8 | |
vpbroadcastw xmm10, word ptr [r9 + 2*r13 - 89544] | |
vpblendd ymm2, ymm0, ymm2, 240 | |
vpbroadcastw ymm0, word ptr [r9 + 2*r13 - 95944] | |
vpunpcklwd xmm0, xmm0, xmm10 | |
vunpcklps xmm0, xmm0, dword ptr [r9 + 2*r13 - 83144]{1to4} | |
vpsrldq xmm10, xmm29, 2 | |
vmovdqa64 xmm29, xmm15 | |
vpblendw xmm0, xmm0, xmm10, 8 | |
vpslld xmm10, xmm24, 16 | |
vpblendd xmm0, xmm9, xmm0, 3 | |
vpblendw xmm0, xmm0, xmm10, 32 | |
vpbroadcastd xmm10, dword ptr [r9 + 2*r13 - 57544] | |
vpblendd xmm0, xmm0, xmm10, 8 | |
vpsllq xmm10, xmm15, 48 | |
vpbroadcastd ymm15, dword ptr [r9 + 2*r13 - 6344] | |
vpblendw xmm0, xmm0, xmm10, 128 | |
vpbroadcastq ymm10, qword ptr [r9 + 2*r13 - 44744] | |
vpblendd ymm0, ymm0, ymm10, 240 | |
vpbroadcastd ymm10, dword ptr [r9 + 2*r13 - 31944] | |
vpermt2w ymm0, ymm1, ymm11 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_133] | |
vpblendd ymm10, ymm0, ymm10, 32 | |
vmovdqa ymm0, ymmword ptr [r9 + 2*r13 - 19168] | |
vpermt2w ymm10, ymm30, ymm13 | |
vpblendd ymm10, ymm10, ymm0, 192 | |
vpermt2w ymm10, ymm16, ymm14 | |
vpmovsxbw ymm16, xmmword ptr [rip + .LCPI1_134] | |
vpblendd ymm10, ymm10, ymm15, 128 | |
vmovdqa ymm15, ymm6 | |
vpermt2w ymm15, ymm18, ymm11 | |
vpmovsxbw ymm18, xmmword ptr [rip + .LCPI1_135] | |
vpermt2w ymm15, ymm1, ymm4 | |
vpbroadcastd ymm1, dword ptr [r9 + 2*r13 - 83140] | |
vpermt2w ymm15, ymm16, ymm13 | |
vpbroadcastw xmm16, word ptr [r9 + 2*r13 - 89540] | |
vpermt2w ymm15, ymm18, ymm0 | |
vpmovsxbw ymm18, xmmword ptr [rip + .LCPI1_136] | |
vpblendw ymm15, ymm15, ymm14, 32 | |
vpblendd ymm6, ymm6, ymm15, 240 | |
vpbroadcastw ymm15, word ptr [r9 + 2*r13 - 95940] | |
vpunpcklwd xmm15, xmm15, xmm16 | |
vpmovsxbw ymm16, xmmword ptr [rip + .LCPI1_139] | |
vpblendd xmm1, xmm15, xmm1, 2 | |
vpsrldq xmm15, xmm22, 6 | |
vpblendw xmm1, xmm1, xmm15, 8 | |
vpmovsxbw ymm15, xmmword ptr [rip + .LCPI1_138] | |
vshufps xmm1, xmm1, xmm9, 244 | |
vpsrlq xmm9, xmm24, 16 | |
vpblendw xmm1, xmm1, xmm9, 32 | |
vpbroadcastd ymm9, dword ptr [r9 + 2*r13 - 57540] | |
vpermt2w ymm8, ymm16, ymm11 | |
vpmovsxbw ymm16, xmmword ptr [rip + .LCPI1_143] | |
vpermt2w ymm12, ymm15, ymm11 | |
vpmovsxbw ymm15, xmmword ptr [rip + .LCPI1_147] | |
vpblendd xmm1, xmm1, xmm9, 8 | |
vpslld xmm9, xmm29, 16 | |
vpblendw xmm1, xmm1, xmm9, 128 | |
vpbroadcastd ymm9, dword ptr [r9 + 2*r13 - 44740] | |
vpermt2w ymm8, ymm16, ymm4 | |
vpmovsxbw ymm16, xmmword ptr [rip + .LCPI1_150] | |
vpblendd ymm1, ymm1, ymm9, 240 | |
vpbroadcastd ymm9, dword ptr [r9 + 2*r13 - 31940] | |
vpermt2w ymm1, ymm18, ymm11 | |
vpmovsxbw ymm18, xmmword ptr [rip + .LCPI1_137] | |
vpblendd ymm1, ymm1, ymm9, 32 | |
vpbroadcastd ymm9, dword ptr [r9 + 2*r13 - 19140] | |
vpermt2w ymm1, ymm18, ymm13 | |
vpmovsxbw ymm18, xmmword ptr [rip + .LCPI1_141] | |
vpblendd ymm9, ymm1, ymm9, 192 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_98] | |
vpermt2w ymm3, ymm18, ymm11 | |
vpmovsxbw ymm18, xmmword ptr [rip + .LCPI1_104] | |
vpermt2w ymm28, ymm1, ymm11 | |
vmovdqa64 ymm22, ymm1 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_142] | |
vpmovsxbw ymm11, xmmword ptr [rip + .LCPI1_146] | |
vpermt2w ymm28, ymm18, ymm4 | |
vmovdqa64 ymm18, ymmword ptr [rsp + 800] | |
vpermt2w ymm12, ymm1, ymm4 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_144] | |
vpermt2w ymm3, ymm11, ymm4 | |
vpmovsxbw ymm11, xmmword ptr [rip + .LCPI1_101] | |
vpermt2w ymm12, ymm15, ymm13 | |
vpmovsxbw ymm15, xmmword ptr [rip + .LCPI1_153] | |
vpermt2w ymm5, ymm1, ymm4 | |
vpermt2w ymm28, ymm11, ymm13 | |
vpmovsxbw ymm11, xmmword ptr [rip + .LCPI1_149] | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_158] | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_159] | |
vpermt2w ymm5, ymm16, ymm13 | |
vpmovsxbw ymm16, xmmword ptr [rip + .LCPI1_154] | |
vpermt2w ymm8, ymm11, ymm13 | |
vmovdqa ymm11, ymm3 | |
vpermt2w ymm11, ymm17, ymm13 | |
vpmovsxbw ymm13, xmmword ptr [rip + .LCPI1_152] | |
vpmovsxbw ymm17, xmmword ptr [rip + .LCPI1_118] | |
vpermt2w ymm5, ymm16, ymm0 | |
vpmovsxbw ymm16, xmmword ptr [rip + .LCPI1_155] | |
vpermt2w ymm8, ymm15, ymm0 | |
vpmovsxbw ymm15, xmmword ptr [rip + .LCPI1_161] | |
vpermt2w ymm28, ymm17, ymm0 | |
vpermt2w ymm12, ymm13, ymm0 | |
vpmovsxbw ymm13, xmmword ptr [rip + .LCPI1_160] | |
vmovdqa64 ymm17, ymmword ptr [rsp + 832] | |
vpermt2w ymm7, ymm16, ymm0 | |
vpmovsxbw ymm16, xmmword ptr [rip + .LCPI1_156] | |
vpermt2w ymm28, ymm1, ymm14 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_163] | |
vpermt2w ymm8, ymm15, ymm14 | |
vpmovsxbw ymm15, xmmword ptr [rip + .LCPI1_164] | |
vpermt2w ymm12, ymm13, ymm14 | |
vpmovsxbw ymm13, xmmword ptr [rip + .LCPI1_167] | |
vpermt2w ymm2, ymm16, ymm0 | |
vpmovsxbw ymm16, xmmword ptr [rip + .LCPI1_157] | |
vpermt2w ymm7, ymm1, ymm14 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_169] | |
vpermt2w ymm2, ymm15, ymm14 | |
vmovdqa ymm15, ymmword ptr [rsp + 896] | |
vpermt2w ymm11, ymm16, ymm0 | |
vpmovsxbw ymm0, xmmword ptr [rip + .LCPI1_162] | |
vmovdqa64 ymm16, ymm28 | |
vpmovsxbw ymm28, xmmword ptr [rip + .LCPI1_95] | |
vpermt2w ymm5, ymm0, ymm14 | |
vpmovsxbw ymm0, xmmword ptr [rip + .LCPI1_165] | |
vpermt2w ymm9, ymm0, ymm14 | |
vpmovsxbw ymm0, xmmword ptr [rip + .LCPI1_166] | |
vpermt2w ymm11, ymm0, ymm14 | |
vmovdqa ymm0, ymmword ptr [r9 + 2*r13 - 6368] | |
vmovdqa ymm14, ymmword ptr [rsp + 960] | |
vpermt2w ymm8, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_171] | |
vpermt2w ymm12, ymm13, ymm0 | |
vpmovsxbw ymm13, xmmword ptr [rip + .LCPI1_170] | |
vpermt2w ymm16, ymm4, ymm0 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_174] | |
vpermt2w ymm7, ymm1, ymm0 | |
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI1_173] | |
vpermt2w ymm5, ymm13, ymm0 | |
vpmovsxbw ymm13, xmmword ptr [rip + .LCPI1_172] | |
vpermt2w ymm6, ymm1, ymm0 | |
vpblendd ymm1, ymm9, ymm0, 128 | |
vpmovsxbw ymm9, xmmword ptr [rip + .LCPI1_168] | |
vpermt2w ymm2, ymm13, ymm0 | |
vmovdqa ymm13, ymmword ptr [rsp + 928] | |
vpermt2w ymm11, ymm9, ymm0 | |
vmovdqa ymm0, ymmword ptr [r9 + 2*r13 + 32] | |
vpmovsxbw ymm9, xmmword ptr [rip + .LCPI1_122] | |
add r13, 16 | |
vpermt2w ymm16, ymm4, ymm0 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_175] | |
vpermt2w ymm14, ymm9, ymm0 | |
vmovaps ymm9, ymmword ptr [rsp + 704] | |
vpermt2w ymm13, ymm4, ymm0 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_176] | |
vmovups ymmword ptr [r15 - 307680], ymm9 | |
vmovaps ymm9, ymmword ptr [rsp + 672] | |
vpermt2w ymm12, ymm4, ymm0 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_177] | |
vpermt2w ymm15, ymm4, ymm0 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_178] | |
vpermt2w ymm8, ymm4, ymm0 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_179] | |
vpermt2w ymm17, ymm4, ymm0 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_180] | |
vpermt2w ymm5, ymm4, ymm0 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_181] | |
vpermt2w ymm18, ymm4, ymm0 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_182] | |
vpermt2w ymm7, ymm4, ymm0 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_183] | |
vpermt2w ymm25, ymm4, ymm0 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_184] | |
vpermt2w ymm2, ymm4, ymm0 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_185] | |
vpermt2w ymm10, ymm4, ymm0 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_186] | |
vpermt2w ymm6, ymm4, ymm0 | |
vpmovsxbw ymm4, xmmword ptr [rip + .LCPI1_187] | |
vpermt2w ymm1, ymm4, ymm0 | |
vmovaps ymm4, ymmword ptr [rsp + 1344] | |
vpblendw ymm0, ymm11, ymm0, 128 | |
vpmovsxbw ymm11, xmmword ptr [rip + .LCPI1_96] | |
vpblendd ymm0, ymm3, ymm0, 240 | |
vmovups ymmword ptr [r15 - 307648], ymm4 | |
vmovaps ymm4, ymmword ptr [rsp + 1664] | |
vmovups ymmword ptr [r15 - 307616], ymm9 | |
vmovaps ymm9, ymmword ptr [rsp + 1632] | |
vmovups ymmword ptr [r15 - 307584], ymm4 | |
vmovaps ymm4, ymmword ptr [rsp + 1600] | |
vmovups ymmword ptr [r15 - 307552], ymm9 | |
vmovaps ymm9, ymmword ptr [rsp + 640] | |
vmovups ymmword ptr [r15 - 307520], ymm4 | |
vmovaps ymm4, ymmword ptr [rsp + 1568] | |
vmovups ymmword ptr [r15 - 307488], ymm9 | |
vmovaps ymm9, ymmword ptr [rsp + 608] | |
vmovups ymmword ptr [r15 - 307456], ymm4 | |
vmovaps ymm4, ymmword ptr [rsp + 1536] | |
vmovups ymmword ptr [r15 - 307424], ymm9 | |
vmovaps ymm9, ymmword ptr [rsp + 320] | |
vmovups ymmword ptr [r15 - 307392], ymm4 | |
vmovaps ymm4, ymmword ptr [rsp + 1504] | |
vmovups ymmword ptr [r15 - 307360], ymm9 | |
vmovaps ymm9, ymmword ptr [rsp + 1472] | |
vmovups ymmword ptr [r15 - 307328], ymm4 | |
vmovaps ymm4, ymmword ptr [rsp + 1440] | |
vmovups ymmword ptr [r15 - 307296], ymm9 | |
vmovaps ymm9, ymmword ptr [rsp + 1408] | |
vmovups ymmword ptr [r15 - 307264], ymm4 | |
vmovaps ymm4, ymmword ptr [rsp + 1376] | |
vmovups ymmword ptr [r15 - 307232], ymm9 | |
vmovaps ymm9, ymmword ptr [rsp + 1088] | |
vmovups ymmword ptr [r15 - 307200], ymm4 | |
vmovaps ymm4, ymmword ptr [rsp + 992] | |
vmovups ymmword ptr [r15 - 205280], ymm9 | |
vmovaps ymm9, ymmword ptr [rsp + 576] | |
vmovups ymmword ptr [r15 - 205248], ymm4 | |
vmovaps ymm4, ymmword ptr [rsp + 864] | |
vmovups ymmword ptr [r15 - 205216], ymm9 | |
vmovaps ymm9, ymmword ptr [rsp + 1312] | |
vmovups ymmword ptr [r15 - 205184], ymm4 | |
vmovaps ymm4, ymmword ptr [rsp + 1024] | |
vmovups ymmword ptr [r15 - 205152], ymm9 | |
vmovaps ymm9, ymmword ptr [rsp + 544] | |
vmovups ymmword ptr [r15 - 205120], ymm4 | |
vmovaps ymm4, ymmword ptr [rsp + 1280] | |
vmovups ymmword ptr [r15 - 205088], ymm9 | |
vmovaps ymm9, ymmword ptr [rsp + 512] | |
vmovups ymmword ptr [r15 - 205056], ymm4 | |
vmovaps ymm4, ymmword ptr [rsp + 1248] | |
vmovups ymmword ptr [r15 - 205024], ymm9 | |
vmovaps ymm9, ymmword ptr [rsp + 288] | |
vmovups ymmword ptr [r15 - 204992], ymm4 | |
vmovaps ymm4, ymmword ptr [rsp + 1216] | |
vmovups ymmword ptr [r15 - 204960], ymm9 | |
vmovaps ymm9, ymmword ptr [rsp + 1184] | |
vmovups ymmword ptr [r15 - 204928], ymm4 | |
vmovaps ymm4, ymmword ptr [rsp + 1152] | |
vmovups ymmword ptr [r15 - 204896], ymm9 | |
vmovaps ymm9, ymmword ptr [rsp + 1120] | |
vmovups ymmword ptr [r15 - 204864], ymm4 | |
vmovaps ymm4, ymmword ptr [rsp + 1056] | |
vmovups ymmword ptr [r15 - 204832], ymm9 | |
vmovaps ymm9, ymmword ptr [rsp + 256] | |
vmovups ymmword ptr [r15 - 204800], ymm4 | |
vmovaps ymm4, ymmword ptr [rsp + 224] | |
vmovups ymmword ptr [r15 - 102880], ymm9 | |
vmovdqu64 ymmword ptr [r15 - 102848], ymm27 | |
vmovaps ymm9, ymmword ptr [rsp + 192] | |
vmovups ymmword ptr [r15 - 102816], ymm4 | |
vmovaps ymm4, ymmword ptr [rsp + 160] | |
vmovdqu64 ymmword ptr [r15 - 102784], ymm31 | |
vmovups ymmword ptr [r15 - 102752], ymm9 | |
vmovdqu64 ymmword ptr [r15 - 102720], ymm21 | |
vmovaps ymm9, ymmword ptr [rsp + 480] | |
vmovups ymmword ptr [r15 - 102688], ymm4 | |
vmovaps ymm4, ymmword ptr [rsp + 128] | |
vmovdqu64 ymmword ptr [r15 - 102656], ymm19 | |
vmovups ymmword ptr [r15 - 102624], ymm9 | |
vmovdqu64 ymmword ptr [r15 - 102592], ymm20 | |
vmovaps ymm9, ymmword ptr [rsp + 448] | |
vmovups ymmword ptr [r15 - 102560], ymm4 | |
vmovaps ymm4, ymmword ptr [rsp + 416] | |
vmovdqu64 ymmword ptr [r15 - 102528], ymm23 | |
vmovups ymmword ptr [r15 - 102496], ymm9 | |
vmovaps ymm9, ymmword ptr [rsp + 384] | |
vmovups ymmword ptr [r15 - 102464], ymm4 | |
vmovaps ymm4, ymmword ptr [rsp + 352] | |
vmovups ymmword ptr [r15 - 102432], ymm9 | |
vmovups ymmword ptr [r15 - 102400], ymm4 | |
vmovdqu ymmword ptr [r15 - 480], ymm14 | |
vmovdqu64 ymmword ptr [r15 - 448], ymm16 | |
vmovdqu ymmword ptr [r15 - 416], ymm13 | |
vmovdqu ymmword ptr [r15 - 384], ymm12 | |
vmovdqu ymmword ptr [r15 - 352], ymm15 | |
vmovdqu ymmword ptr [r15 - 320], ymm8 | |
vmovdqu64 ymmword ptr [r15 - 288], ymm17 | |
vmovdqu ymmword ptr [r15 - 256], ymm5 | |
vmovdqu64 ymmword ptr [r15 - 224], ymm18 | |
vmovdqu ymmword ptr [r15 - 192], ymm7 | |
vmovdqu64 ymmword ptr [r15 - 160], ymm25 | |
vmovdqu ymmword ptr [r15 - 128], ymm2 | |
vmovdqu ymmword ptr [r15 - 96], ymm10 | |
vmovdqu ymmword ptr [r15 - 64], ymm6 | |
vmovdqu ymmword ptr [r15 - 32], ymm1 | |
vmovdqu ymmword ptr [r15], ymm0 | |
add r15, 512 | |
cmp r13, 48 | |
jb .LBB1_9 | |
inc rsi | |
add r14, 55296000 | |
cmp rsi, rdx | |
jne .LBB1_8 | |
jmp .LBB1_11 | |
.LBB1_14: | |
xor eax, eax | |
lea rsp, [rbp - 40] | |
.loc 1 4 3 epilogue_begin | |
pop rbx | |
pop r12 | |
pop r13 | |
pop r14 | |
pop r15 | |
pop rbp | |
.cfi_def_cfa rsp, 8 | |
vzeroupper | |
ret | |
.Ltmp3: | |
.Lfunc_end1: | |
.size turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack, .Lfunc_end1-turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack | |
.cfi_endproc | |
.section .text.turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32,"ax",@progbits | |
.p2align 4, 0x90 | |
.type turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32,@function | |
turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32: | |
.Lfunc_begin2: | |
.loc 1 1 0 is_stmt 1 | |
.cfi_startproc | |
push rbp | |
.cfi_def_cfa_offset 16 | |
.cfi_offset rbp, -16 | |
mov rbp, rsp | |
.cfi_def_cfa_register rbp | |
push r15 | |
push r14 | |
push r13 | |
push r12 | |
push rbx | |
sub rsp, 120 | |
.cfi_offset rbx, -56 | |
.cfi_offset r12, -48 | |
.cfi_offset r13, -40 | |
.cfi_offset r14, -32 | |
.cfi_offset r15, -24 | |
.Ltmp4: | |
.loc 1 4 3 prologue_end | |
mov r8, qword ptr [rsi + 24] | |
mov rax, rdx | |
movzx edx, word ptr [rdx + 8] | |
mov rcx, qword ptr [r8 + 24] | |
mov qword ptr [rbp - 48], rdx | |
mov qword ptr [rbp - 160], rcx | |
cmp rcx, rdx | |
jle .LBB2_15 | |
.loc 1 0 3 is_stmt 0 | |
mov edx, dword ptr [r8 + 32] | |
mov edi, dword ptr [r8 + 36] | |
movabs r10, 593736278999040 | |
mov r12d, dword ptr [rax + 4] | |
mov rbx, qword ptr [rbp - 48] | |
mov r14d, dword ptr [rax] | |
mov rcx, qword ptr [rsi + 32] | |
mov r9d, 15361 | |
mov r11d, 15106 | |
mov r15d, dword ptr [rsi + 12] | |
bextr r9, qword ptr [r8], r9 | |
bextr r8, qword ptr [r8 + 8], r11 | |
.loc 1 4 3 | |
imul r10, rdi | |
imul r13, rdx, 138240 | |
imul r11, r14, 102400 | |
mov qword ptr [rbp - 112], r14 | |
mov r14, r14 | |
shl r14, 10 | |
shl rdi, 32 | |
mov qword ptr [rbp - 88], r12 | |
mov qword ptr [rbp - 216], r15 | |
add r13, r10 | |
imul r10, r12, 552960 | |
mov rax, r13 | |
imul rax, rbx | |
lea rax, [r10 + 4*rax] | |
imul r10, rbx, 55296000 | |
add r14, rax | |
mov rax, qword ptr [rcx + 8] | |
add r11, r10 | |
lea r10, [rdi + rdx] | |
add rdi, rdx | |
lea r11, [r11 + 2*r9] | |
lea r9, [r14 + 4*r8] | |
mov rdx, rdi | |
imul rdx, rbx | |
mov qword ptr [rbp - 104], r10 | |
lea r14, [rax + 4*r8] | |
mov r8d, dword ptr [rsi + 16] | |
movzx esi, word ptr [rsi + 20] | |
add r9, rax | |
imul rax, rdx, 204800 | |
imul rdx, r12, 204800 | |
mov qword ptr [rbp - 56], r9 | |
mov r9, qword ptr [rcx] | |
mov rcx, r15 | |
shl rcx, 10 | |
mov qword ptr [rbp - 208], rcx | |
imul rcx, r10, 138240 | |
add rdx, rax | |
imul rdi, rsi | |
mov qword ptr [rbp - 120], rcx | |
imul rcx, rsi, 55296000 | |
imul r13, rsi | |
mov qword ptr [rbp - 144], rsi | |
mov qword ptr [rbp - 192], r8 | |
lea rax, [r9 + rdx + 60] | |
add r11, r9 | |
mov qword ptr [rbp - 96], r11 | |
imul rdx, rdi, 204800 | |
mov qword ptr [rbp - 128], rcx | |
imul rcx, r8, 204800 | |
shl r13, 2 | |
mov qword ptr [rbp - 80], rax | |
mov qword ptr [rbp - 152], r13 | |
mov qword ptr [rbp - 136], rdx | |
imul rdx, r8, 552960 | |
mov qword ptr [rbp - 176], rcx | |
mov qword ptr [rbp - 184], rdx | |
imul rdx, r15, 102400 | |
mov qword ptr [rbp - 200], rdx | |
jmp .LBB2_2 | |
.p2align 4, 0x90 | |
.LBB2_14: | |
.loc 1 0 3 | |
mov rsi, qword ptr [rbp - 128] | |
mov rdx, qword ptr [rbp - 136] | |
mov rax, qword ptr [rbp - 48] | |
mov rcx, qword ptr [rbp - 56] | |
.loc 1 4 3 | |
add rax, qword ptr [rbp - 144] | |
add rcx, qword ptr [rbp - 152] | |
add qword ptr [rbp - 96], rsi | |
add qword ptr [rbp - 80], rdx | |
mov qword ptr [rbp - 56], rcx | |
mov qword ptr [rbp - 48], rax | |
cmp rax, qword ptr [rbp - 160] | |
jge .LBB2_15 | |
.LBB2_2: | |
.loc 1 0 3 | |
mov rax, qword ptr [rbp - 104] | |
cmp rax, qword ptr [rbp - 88] | |
.loc 1 4 3 | |
jle .LBB2_14 | |
.loc 1 0 3 | |
mov rax, qword ptr [rbp - 48] | |
mov r12, qword ptr [rbp - 80] | |
mov rdx, qword ptr [rbp - 56] | |
mov rcx, qword ptr [rbp - 88] | |
imul rax, qword ptr [rbp - 120] | |
mov qword ptr [rbp - 72], rdx | |
mov qword ptr [rbp - 64], rcx | |
mov qword ptr [rbp - 168], rax | |
jmp .LBB2_4 | |
.p2align 4, 0x90 | |
.LBB2_13: | |
mov rcx, qword ptr [rbp - 64] | |
mov rax, qword ptr [rbp - 72] | |
.loc 1 4 3 | |
add r12, qword ptr [rbp - 176] | |
add rcx, qword ptr [rbp - 192] | |
add rax, qword ptr [rbp - 184] | |
mov qword ptr [rbp - 72], rax | |
mov qword ptr [rbp - 64], rcx | |
cmp rcx, qword ptr [rbp - 104] | |
jge .LBB2_14 | |
.LBB2_4: | |
.loc 1 0 3 | |
cmp dword ptr [rbp - 112], 539 | |
.loc 1 4 3 | |
ja .LBB2_13 | |
.loc 1 0 3 | |
imul rcx, qword ptr [rbp - 64], 138240 | |
mov r11, qword ptr [rbp - 96] | |
mov rbx, qword ptr [rbp - 72] | |
mov r8, qword ptr [rbp - 112] | |
add rcx, qword ptr [rbp - 168] | |
.p2align 4, 0x90 | |
.LBB2_6: | |
mov rax, r8 | |
shl rax, 8 | |
mov rdx, rbx | |
xor esi, esi | |
.p2align 4, 0x90 | |
.LBB2_7: | |
xor edi, edi | |
.p2align 4, 0x90 | |
.LBB2_8: | |
.loc 1 4 3 | |
mov dword ptr [rdx + 4*rdi], 0 | |
inc rdi | |
cmp rdi, 16 | |
jne .LBB2_8 | |
inc rsi | |
add rdx, 64 | |
cmp rsi, 16 | |
jne .LBB2_7 | |
lea rdx, [rcx + rax] | |
lea rsi, [rcx + rax + 32] | |
lea r15, [rcx + rax + 160] | |
lea r10, [rcx + rax + 176] | |
lea rdi, [rcx + rax + 208] | |
lea r13, [rcx + rax + 224] | |
lea r9, [rcx + rax + 240] | |
mov qword ptr [rbp - 288], rdx | |
vmovups zmm0, zmmword ptr [r14 + 4*rdx] | |
lea rdx, [rcx + rax + 16] | |
mov qword ptr [rbp - 272], rsi | |
vmovups zmm1, zmmword ptr [r14 + 4*rsi] | |
lea rsi, [rcx + rax + 64] | |
vmovups zmm9, zmmword ptr [r14 + 4*r15] | |
vmovups zmm12, zmmword ptr [r14 + 4*r10] | |
vmovups zmm14, zmmword ptr [r14 + 4*rdi] | |
vmovups zmm13, zmmword ptr [r14 + 4*r13] | |
vmovups zmm15, zmmword ptr [r14 + 4*r9] | |
mov qword ptr [rbp - 280], rdx | |
vmovups zmm2, zmmword ptr [r14 + 4*rdx] | |
lea rdx, [rcx + rax + 48] | |
mov qword ptr [rbp - 256], rsi | |
vmovups zmm3, zmmword ptr [r14 + 4*rsi] | |
lea rsi, [rcx + rax + 96] | |
mov qword ptr [rbp - 264], rdx | |
vmovups zmm4, zmmword ptr [r14 + 4*rdx] | |
lea rdx, [rcx + rax + 80] | |
mov qword ptr [rbp - 240], rsi | |
vmovups zmm5, zmmword ptr [r14 + 4*rsi] | |
lea rsi, [rcx + rax + 128] | |
mov qword ptr [rbp - 248], rdx | |
vmovups zmm6, zmmword ptr [r14 + 4*rdx] | |
lea rdx, [rcx + rax + 112] | |
mov qword ptr [rbp - 224], rsi | |
vmovups zmm7, zmmword ptr [r14 + 4*rsi] | |
lea rsi, [rcx + rax + 144] | |
mov qword ptr [rbp - 232], rdx | |
vmovups zmm8, zmmword ptr [r14 + 4*rdx] | |
lea rdx, [rcx + rax + 192] | |
mov eax, 0 | |
vmovups zmm10, zmmword ptr [r14 + 4*rsi] | |
vmovups zmm11, zmmword ptr [r14 + 4*rdx] | |
.p2align 4, 0x90 | |
.LBB2_11: | |
.loc 1 0 3 | |
vcvtph2ps zmm16, ymmword ptr [r11 + rax] | |
vfmadd231ps zmm0, zmm16, dword ptr [r12 + 2*rax - 60]{1to16} | |
vfmadd231ps zmm2, zmm16, dword ptr [r12 + 2*rax - 56]{1to16} | |
vfmadd231ps zmm1, zmm16, dword ptr [r12 + 2*rax - 52]{1to16} | |
vfmadd231ps zmm4, zmm16, dword ptr [r12 + 2*rax - 48]{1to16} | |
vfmadd231ps zmm3, zmm16, dword ptr [r12 + 2*rax - 44]{1to16} | |
vfmadd231ps zmm6, zmm16, dword ptr [r12 + 2*rax - 40]{1to16} | |
vfmadd231ps zmm5, zmm16, dword ptr [r12 + 2*rax - 36]{1to16} | |
vfmadd231ps zmm8, zmm16, dword ptr [r12 + 2*rax - 32]{1to16} | |
vfmadd231ps zmm7, zmm16, dword ptr [r12 + 2*rax - 28]{1to16} | |
vfmadd231ps zmm10, zmm16, dword ptr [r12 + 2*rax - 24]{1to16} | |
vfmadd231ps zmm9, zmm16, dword ptr [r12 + 2*rax - 20]{1to16} | |
vfmadd231ps zmm12, zmm16, dword ptr [r12 + 2*rax - 16]{1to16} | |
vfmadd231ps zmm11, zmm16, dword ptr [r12 + 2*rax - 12]{1to16} | |
vfmadd231ps zmm14, zmm16, dword ptr [r12 + 2*rax - 8]{1to16} | |
vfmadd231ps zmm13, zmm16, dword ptr [r12 + 2*rax - 4]{1to16} | |
vfmadd231ps zmm15, zmm16, dword ptr [r12 + 2*rax]{1to16} | |
.loc 1 4 3 | |
add rax, 32 | |
cmp rax, 102400 | |
jne .LBB2_11 | |
.loc 1 0 3 | |
mov rax, qword ptr [rbp - 288] | |
.loc 1 4 3 | |
add r8, qword ptr [rbp - 216] | |
add rbx, qword ptr [rbp - 208] | |
add r11, qword ptr [rbp - 200] | |
vmovups zmmword ptr [r14 + 4*rax], zmm0 | |
mov rax, qword ptr [rbp - 280] | |
vmovups zmmword ptr [r14 + 4*rax], zmm2 | |
mov rax, qword ptr [rbp - 272] | |
vmovups zmmword ptr [r14 + 4*rax], zmm1 | |
mov rax, qword ptr [rbp - 264] | |
vmovups zmmword ptr [r14 + 4*rax], zmm4 | |
mov rax, qword ptr [rbp - 256] | |
vmovups zmmword ptr [r14 + 4*rax], zmm3 | |
mov rax, qword ptr [rbp - 248] | |
vmovups zmmword ptr [r14 + 4*rax], zmm6 | |
mov rax, qword ptr [rbp - 240] | |
vmovups zmmword ptr [r14 + 4*rax], zmm5 | |
mov rax, qword ptr [rbp - 232] | |
vmovups zmmword ptr [r14 + 4*rax], zmm8 | |
mov rax, qword ptr [rbp - 224] | |
vmovups zmmword ptr [r14 + 4*rax], zmm7 | |
vmovups zmmword ptr [r14 + 4*rsi], zmm10 | |
vmovups zmmword ptr [r14 + 4*r15], zmm9 | |
vmovups zmmword ptr [r14 + 4*r10], zmm12 | |
vmovups zmmword ptr [r14 + 4*rdx], zmm11 | |
vmovups zmmword ptr [r14 + 4*rdi], zmm14 | |
vmovups zmmword ptr [r14 + 4*r13], zmm13 | |
vmovups zmmword ptr [r14 + 4*r9], zmm15 | |
cmp r8, 540 | |
jl .LBB2_6 | |
jmp .LBB2_13 | |
.LBB2_15: | |
xor eax, eax | |
.loc 1 4 3 epilogue_begin | |
add rsp, 120 | |
pop rbx | |
pop r12 | |
pop r13 | |
pop r14 | |
pop r15 | |
pop rbp | |
.cfi_def_cfa rsp, 8 | |
vzeroupper | |
ret | |
.Ltmp5: | |
.Lfunc_end2: | |
.size turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32, .Lfunc_end2-turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32 | |
.cfi_endproc | |
.section .text.turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32,"ax",@progbits | |
.p2align 4, 0x90 | |
.type turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32,@function | |
turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32: | |
.Lfunc_begin3: | |
.loc 1 1 0 is_stmt 1 | |
.cfi_startproc | |
push rbp | |
.cfi_def_cfa_offset 16 | |
.cfi_offset rbp, -16 | |
mov rbp, rsp | |
.cfi_def_cfa_register rbp | |
.Ltmp6: | |
push r15 | |
push r14 | |
push r13 | |
push r12 | |
push rbx | |
sub rsp, 128 | |
.cfi_offset rbx, -56 | |
.cfi_offset r12, -48 | |
.cfi_offset r13, -40 | |
.cfi_offset r14, -32 | |
.cfi_offset r15, -24 | |
.loc 1 4 3 prologue_end | |
mov rcx, qword ptr [rsi + 24] | |
movzx edi, word ptr [rdx + 8] | |
mov r15d, dword ptr [rcx + 20] | |
mov r9d, dword ptr [rcx + 16] | |
mov r12, rdi | |
shl edi, 6 | |
mov qword ptr [rbp - 56], rdi | |
shl r15, 32 | |
lea rax, [r15 + r9] | |
mov qword ptr [rbp - 112], rax | |
cmp rdi, rax | |
jge .LBB3_22 | |
.loc 1 0 3 is_stmt 0 | |
mov eax, dword ptr [rsi + 12] | |
mov r10d, dword ptr [rcx + 24] | |
mov r11d, dword ptr [rcx + 28] | |
mov r13d, dword ptr [rdx] | |
mov edx, dword ptr [rdx + 4] | |
movabs rbx, 37108517437440 | |
mov edi, dword ptr [rsi + 16] | |
movzx r8d, word ptr [rsi + 20] | |
mov rsi, qword ptr [rsi + 32] | |
.loc 1 4 3 | |
or r15, r9 | |
movabs r14, 148434069749760 | |
sub r15, qword ptr [rbp - 56] | |
mov qword ptr [rbp - 48], rax | |
imul rbx, r11 | |
imul rax, r10, 8640 | |
imul r9, rdx, 2211840 | |
imul r14, r11 | |
shl r11, 32 | |
shl rdx, 6 | |
mov qword ptr [rbp - 216], r13 | |
mov qword ptr [rbp - 104], rdx | |
add rax, rbx | |
imul rbx, r13, 3840 | |
add rbx, r9 | |
add rbx, qword ptr [rsi + 8] | |
mov r9, rax | |
imul r9, r12 | |
mov rsi, qword ptr [rsi] | |
imul rax, r8 | |
shl r9, 8 | |
shl rax, 8 | |
mov qword ptr [rbp - 160], rax | |
add rbx, r9 | |
mov r9d, 15106 | |
mov qword ptr [rbp - 64], rbx | |
lea rbx, [r11 + r10] | |
or r11, r10 | |
imul r10, r10, 34560 | |
bextr r9, qword ptr [rcx], r9 | |
sub r11, rdx | |
mov rdx, qword ptr [rbp - 48] | |
mov qword ptr [rbp - 208], rbx | |
mov qword ptr [rbp - 136], r11 | |
add r10, r14 | |
mov qword ptr [rbp - 280], r10 | |
mov r10, qword ptr [rcx + 8] | |
lea rcx, [rsi + 4*r9] | |
imul rsi, rdi, 2211840 | |
shl rdi, 6 | |
imul rax, rdx, 960 | |
imul rdx, rdx, 3840 | |
mov qword ptr [rbp - 192], rdi | |
mov qword ptr [rbp - 184], rcx | |
imul rcx, r13, 960 | |
mov qword ptr [rbp - 200], rsi | |
imul r12, r10 | |
imul r9, r10, 552960 | |
imul r10, r8 | |
shl r8d, 6 | |
mov qword ptr [rbp - 248], rax | |
mov qword ptr [rbp - 240], rdx | |
mov qword ptr [rbp - 144], r8 | |
mov qword ptr [rbp - 176], rcx | |
shl r12, 6 | |
shl r10, 6 | |
mov qword ptr [rbp - 272], r9 | |
mov qword ptr [rbp - 120], r12 | |
mov qword ptr [rbp - 152], r10 | |
jmp .LBB3_2 | |
.p2align 4, 0x90 | |
.LBB3_21: | |
.loc 1 0 3 | |
mov rdx, qword ptr [rbp - 152] | |
mov rsi, qword ptr [rbp - 64] | |
mov rax, qword ptr [rbp - 56] | |
mov rcx, qword ptr [rbp - 144] | |
mov r15, qword ptr [rbp - 168] | |
.loc 1 4 3 | |
add rsi, qword ptr [rbp - 160] | |
add qword ptr [rbp - 120], rdx | |
add rax, rcx | |
sub r15, rcx | |
mov qword ptr [rbp - 64], rsi | |
mov qword ptr [rbp - 56], rax | |
cmp rax, qword ptr [rbp - 112] | |
jge .LBB3_22 | |
.LBB3_2: | |
cmp r15, 64 | |
mov ecx, 64 | |
mov eax, 1 | |
mov qword ptr [rbp - 168], r15 | |
cmovl rcx, r15 | |
cmp rcx, 2 | |
cmovl rcx, rax | |
mov qword ptr [rbp - 288], rcx | |
cmp qword ptr [rbp - 104], rbx | |
jge .LBB3_21 | |
.loc 1 0 3 | |
mov rcx, qword ptr [rbp - 64] | |
mov rax, qword ptr [rbp - 112] | |
mov rdx, qword ptr [rbp - 104] | |
.loc 1 4 3 | |
sub rax, qword ptr [rbp - 56] | |
mov qword ptr [rbp - 72], rcx | |
mov rcx, qword ptr [rbp - 136] | |
mov qword ptr [rbp - 256], rax | |
jmp .LBB3_4 | |
.p2align 4, 0x90 | |
.LBB3_20: | |
.loc 1 0 3 | |
mov rsi, qword ptr [rbp - 72] | |
mov rax, qword ptr [rbp - 192] | |
mov rdx, qword ptr [rbp - 128] | |
mov rcx, qword ptr [rbp - 224] | |
mov rbx, qword ptr [rbp - 208] | |
.loc 1 4 3 | |
add rsi, qword ptr [rbp - 200] | |
add rdx, rax | |
sub rcx, rax | |
mov qword ptr [rbp - 72], rsi | |
cmp rdx, rbx | |
jge .LBB3_21 | |
.LBB3_4: | |
cmp rcx, 64 | |
mov eax, 64 | |
mov qword ptr [rbp - 128], rdx | |
mov qword ptr [rbp - 224], rcx | |
cmovl rax, rcx | |
sub rbx, rdx | |
mov edx, 64 | |
cmp rbx, 64 | |
mov qword ptr [rbp - 264], rax | |
mov qword ptr [rbp - 296], rbx | |
cmovl rdx, rbx | |
cmp dword ptr [rbp - 216], 8 | |
ja .LBB3_20 | |
.loc 1 0 3 | |
mov rsi, qword ptr [rbp - 128] | |
mov rax, rsi | |
sar rax, 63 | |
mov rcx, rax | |
xor rcx, rsi | |
lea rsi, [rcx + 15] | |
test rcx, rcx | |
cmovns rsi, rcx | |
mov rcx, qword ptr [rbp - 176] | |
sar rsi, 4 | |
xor rsi, rax | |
.loc 1 4 3 | |
add rsi, qword ptr [rbp - 120] | |
mov qword ptr [rbp - 80], rcx | |
imul rax, rsi, 552960 | |
add rax, qword ptr [rbp - 184] | |
mov rsi, qword ptr [rbp - 72] | |
mov qword ptr [rbp - 232], rax | |
mov qword ptr [rbp - 88], rsi | |
jmp .LBB3_6 | |
.p2align 4, 0x90 | |
.LBB3_19: | |
.loc 1 0 3 | |
mov rcx, qword ptr [rbp - 80] | |
mov rax, qword ptr [rbp - 88] | |
.loc 1 4 3 | |
add rcx, qword ptr [rbp - 248] | |
add rax, qword ptr [rbp - 240] | |
mov qword ptr [rbp - 88], rax | |
mov qword ptr [rbp - 80], rcx | |
cmp rcx, 8640 | |
jge .LBB3_20 | |
.LBB3_6: | |
.loc 1 0 3 | |
cmp qword ptr [rbp - 256], 0 | |
.loc 1 4 3 | |
jle .LBB3_19 | |
.loc 1 0 3 | |
mov rsi, qword ptr [rbp - 80] | |
mov rax, rsi | |
sar rax, 63 | |
mov rcx, rax | |
xor rcx, rsi | |
lea rsi, [rcx + 15] | |
test rcx, rcx | |
cmovns rsi, rcx | |
shr rsi, 4 | |
xor rsi, rax | |
mov rax, qword ptr [rbp - 88] | |
.loc 1 4 3 | |
shl rsi, 10 | |
add rsi, qword ptr [rbp - 232] | |
mov qword ptr [rbp - 96], rax | |
mov qword ptr [rbp - 48], rsi | |
xor esi, esi | |
jmp .LBB3_8 | |
.p2align 4, 0x90 | |
.LBB3_18: | |
.loc 1 0 3 | |
mov rax, qword ptr [rbp - 96] | |
mov rcx, qword ptr [rbp - 48] | |
.loc 1 4 3 | |
inc rsi | |
add rax, qword ptr [rbp - 280] | |
add rcx, qword ptr [rbp - 272] | |
mov qword ptr [rbp - 96], rax | |
mov qword ptr [rbp - 48], rcx | |
cmp rsi, qword ptr [rbp - 288] | |
je .LBB3_19 | |
.LBB3_8: | |
.loc 1 0 3 | |
cmp qword ptr [rbp - 296], 0 | |
.loc 1 4 3 | |
jle .LBB3_18 | |
.loc 1 0 3 | |
mov rbx, qword ptr [rbp - 48] | |
mov r12, qword ptr [rbp - 96] | |
mov r9, qword ptr [rbp - 264] | |
xor r13d, r13d | |
jmp .LBB3_10 | |
.p2align 4, 0x90 | |
.LBB3_17: | |
.loc 1 4 3 | |
add r13, 16 | |
add r9, -16 | |
add r12, 552960 | |
add rbx, 552960 | |
cmp r13, rdx | |
jge .LBB3_18 | |
.LBB3_10: | |
cmp r9, 16 | |
mov r10d, 16 | |
mov eax, 1 | |
mov r14, rbx | |
mov rcx, r12 | |
cmovl r10, r9 | |
cmp r10, 2 | |
cmovl r10, rax | |
xor eax, eax | |
jmp .LBB3_11 | |
.p2align 4, 0x90 | |
.LBB3_16: | |
lea rdi, [rax + 16] | |
add rcx, 64 | |
add r14, 1024 | |
cmp rax, 944 | |
mov rax, rdi | |
jae .LBB3_17 | |
.LBB3_11: | |
.loc 1 0 3 | |
cmp rdx, r13 | |
.loc 1 4 3 | |
jle .LBB3_16 | |
.loc 1 0 3 | |
mov rdi, r14 | |
mov r15, rcx | |
xor r11d, r11d | |
.p2align 4, 0x90 | |
.LBB3_13: | |
xor r8d, r8d | |
.p2align 4, 0x90 | |
.LBB3_14: | |
.loc 1 4 3 | |
vmovss xmm0, dword ptr [rdi + 4*r8] | |
vmovss dword ptr [r15 + 4*r8], xmm0 | |
inc r8 | |
cmp r8, 16 | |
jne .LBB3_14 | |
inc r11 | |
add r15, 34560 | |
add rdi, 64 | |
cmp r11, r10 | |
jne .LBB3_13 | |
jmp .LBB3_16 | |
.LBB3_22: | |
xor eax, eax | |
.loc 1 4 3 epilogue_begin | |
add rsp, 128 | |
pop rbx | |
pop r12 | |
pop r13 | |
pop r14 | |
pop r15 | |
pop rbp | |
.cfi_def_cfa rsp, 8 | |
ret | |
.Ltmp7: | |
.Lfunc_end3: | |
.size turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32, .Lfunc_end3-turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32 | |
.cfi_endproc | |
.section .text.iree_hal_executable_library_query,"ax",@progbits | |
.globl iree_hal_executable_library_query | |
.p2align 4, 0x90 | |
.type iree_hal_executable_library_query,@function | |
iree_hal_executable_library_query: | |
.Liree_hal_executable_library_query$local: | |
.type .Liree_hal_executable_library_query$local,@function | |
.Lfunc_begin4: | |
.cfi_startproc | |
xor eax, eax | |
cmp edi, 4 | |
lea rcx, [rip + iree_hal_executable_library_query_v0] | |
cmove rax, rcx | |
ret | |
.Lfunc_end4: | |
.size iree_hal_executable_library_query, .Lfunc_end4-iree_hal_executable_library_query | |
.size .Liree_hal_executable_library_query$local, .Lfunc_end4-iree_hal_executable_library_query | |
.cfi_endproc | |
.section .text.iree_h2f_ieee,"ax",@progbits | |
.p2align 4, 0x90 | |
.type iree_h2f_ieee,@function | |
iree_h2f_ieee: | |
.Lfunc_begin5: | |
.cfi_startproc | |
mov eax, edi | |
and eax, 32768 | |
mov edx, edi | |
mov ecx, edi | |
and ecx, 1023 | |
shl eax, 16 | |
and dx, 31744 | |
je .LBB5_6 | |
and edi, 31744 | |
cmp edi, 31744 | |
jne .LBB5_5 | |
test cx, cx | |
je .LBB5_4 | |
or eax, 2143289344 | |
vmovd xmm0, eax | |
ret | |
.LBB5_6: | |
movzx ecx, cx | |
or eax, 864026624 | |
vcvtsi2ss xmm0, xmm0, ecx | |
vmovd xmm1, eax | |
vmulss xmm0, xmm0, xmm1 | |
ret | |
.LBB5_5: | |
movzx ecx, cx | |
movzx edx, dx | |
add edx, ecx | |
shl edx, 13 | |
lea eax, [rdx + rax + 939524096] | |
vmovd xmm0, eax | |
ret | |
.LBB5_4: | |
or eax, 2139095040 | |
vmovd xmm0, eax | |
ret | |
.Lfunc_end5: | |
.size iree_h2f_ieee, .Lfunc_end5-iree_h2f_ieee | |
.cfi_endproc | |
.section .text.iree_f2h_ieee,"ax",@progbits | |
.p2align 4, 0x90 | |
.type iree_f2h_ieee,@function | |
iree_f2h_ieee: | |
.Lfunc_begin6: | |
.cfi_startproc | |
vmovd edi, xmm0 | |
mov edx, 2071 | |
bextr esi, edi, edx | |
mov eax, edi | |
mov ecx, edi | |
shr ecx, 31 | |
and eax, 8388607 | |
lea edx, [rsi - 112] | |
lea r8d, [rsi - 113] | |
cmp r8d, 28 | |
ja .LBB6_2 | |
add eax, 4096 | |
shl ecx, 15 | |
shl edx, 10 | |
shr eax, 13 | |
or eax, ecx | |
or eax, edx | |
ret | |
.LBB6_2: | |
test edi, edi | |
je .LBB6_6 | |
cmp esi, 112 | |
ja .LBB6_7 | |
cmp esi, 102 | |
jae .LBB6_10 | |
.LBB6_6: | |
xor eax, eax | |
ret | |
.LBB6_7: | |
cmp edx, 143 | |
jne .LBB6_11 | |
shl ecx, 15 | |
test eax, eax | |
je .LBB6_14 | |
shr eax, 13 | |
or ecx, eax | |
or ecx, 31744 | |
mov eax, ecx | |
ret | |
.LBB6_10: | |
mov dl, 113 | |
or eax, 8388608 | |
shl ecx, 15 | |
sub dl, sil | |
shrx eax, eax, edx | |
mov edx, eax | |
and edx, 4096 | |
lea eax, [rax + 2*rdx] | |
shr eax, 13 | |
or eax, ecx | |
ret | |
.LBB6_11: | |
test edi, 4096 | |
je .LBB6_13 | |
lea edi, [rax + 8192] | |
add esi, -111 | |
xor r8d, r8d | |
cmp eax, 8380416 | |
cmovb esi, edx | |
cmovb r8d, edi | |
mov eax, r8d | |
mov edx, esi | |
.LBB6_13: | |
shl ecx, 15 | |
cmp edx, 31 | |
jb .LBB6_15 | |
.LBB6_14: | |
or ecx, 31744 | |
mov eax, ecx | |
ret | |
.LBB6_15: | |
shr eax, 13 | |
shl edx, 10 | |
or eax, ecx | |
or eax, edx | |
ret | |
.Lfunc_end6: | |
.size iree_f2h_ieee, .Lfunc_end6-iree_f2h_ieee | |
.cfi_endproc | |
.section .text.__gnu_h2f_ieee,"ax",@progbits | |
.p2align 4, 0x90 | |
.type __gnu_h2f_ieee,@function | |
__gnu_h2f_ieee: | |
.Lfunc_begin7: | |
.cfi_startproc | |
mov eax, edi | |
and eax, 32768 | |
mov edx, edi | |
mov ecx, edi | |
and ecx, 1023 | |
shl eax, 16 | |
and dx, 31744 | |
je .LBB7_6 | |
and edi, 31744 | |
cmp edi, 31744 | |
jne .LBB7_5 | |
test cx, cx | |
je .LBB7_4 | |
or eax, 2143289344 | |
vmovd xmm0, eax | |
ret | |
.LBB7_6: | |
movzx ecx, cx | |
or eax, 864026624 | |
vcvtsi2ss xmm0, xmm0, ecx | |
vmovd xmm1, eax | |
vmulss xmm0, xmm0, xmm1 | |
ret | |
.LBB7_5: | |
movzx ecx, cx | |
movzx edx, dx | |
add edx, ecx | |
shl edx, 13 | |
lea eax, [rdx + rax + 939524096] | |
vmovd xmm0, eax | |
ret | |
.LBB7_4: | |
or eax, 2139095040 | |
vmovd xmm0, eax | |
ret | |
.Lfunc_end7: | |
.size __gnu_h2f_ieee, .Lfunc_end7-__gnu_h2f_ieee | |
.cfi_endproc | |
.section .text.__extendhfsf2,"ax",@progbits | |
.p2align 4, 0x90 | |
.type __extendhfsf2,@function | |
__extendhfsf2: | |
.Lfunc_begin8: | |
.cfi_startproc | |
vmovd ecx, xmm0 | |
mov eax, ecx | |
shl eax, 16 | |
mov edx, ecx | |
and edx, 1023 | |
mov esi, ecx | |
and eax, -2147483648 | |
and esi, 31744 | |
je .LBB8_6 | |
cmp esi, 31744 | |
jne .LBB8_5 | |
test dx, dx | |
je .LBB8_4 | |
or eax, 2143289344 | |
vmovd xmm0, eax | |
ret | |
.LBB8_6: | |
movzx ecx, dx | |
or eax, 864026624 | |
vcvtsi2ss xmm0, xmm1, ecx | |
vmovd xmm1, eax | |
vmulss xmm0, xmm0, xmm1 | |
ret | |
.LBB8_5: | |
and ecx, 32767 | |
shl ecx, 13 | |
lea eax, [rcx + rax + 939524096] | |
vmovd xmm0, eax | |
ret | |
.LBB8_4: | |
or eax, 2139095040 | |
vmovd xmm0, eax | |
ret | |
.Lfunc_end8: | |
.size __extendhfsf2, .Lfunc_end8-__extendhfsf2 | |
.cfi_endproc | |
.section .text.__gnu_f2h_ieee,"ax",@progbits | |
.p2align 4, 0x90 | |
.type __gnu_f2h_ieee,@function | |
__gnu_f2h_ieee: | |
.Lfunc_begin9: | |
.cfi_startproc | |
jmp iree_f2h_ieee | |
.Lfunc_end9: | |
.size __gnu_f2h_ieee, .Lfunc_end9-__gnu_f2h_ieee | |
.cfi_endproc | |
.section .text.__truncsfhf2,"ax",@progbits | |
.p2align 4, 0x90 | |
.type __truncsfhf2,@function | |
__truncsfhf2: | |
.Lfunc_begin10: | |
.cfi_startproc | |
push rax | |
.cfi_def_cfa_offset 16 | |
call iree_f2h_ieee | |
mov word ptr [rsp + 4], ax | |
vmovss xmm0, dword ptr [rsp + 4] | |
pop rax | |
.cfi_def_cfa_offset 8 | |
ret | |
.Lfunc_end10: | |
.size __truncsfhf2, .Lfunc_end10-__truncsfhf2 | |
.cfi_endproc | |
.section .rodata.cst4,"aM",@progbits,4 | |
.p2align 2, 0x0 | |
.LCPI11_0: | |
.long 0x7b800000 | |
.LCPI11_1: | |
.long 0x80000000 | |
.LCPI11_2: | |
.long 0x3f800000 | |
.section .text.ceilf,"ax",@progbits | |
.p2align 4, 0x90 | |
.type ceilf,@function | |
ceilf: | |
.Lfunc_begin11: | |
.cfi_startproc | |
vmovd eax, xmm0 | |
mov ecx, 2071 | |
bextr ecx, eax, ecx | |
cmp ecx, 149 | |
ja .LBB11_7 | |
cmp ecx, 127 | |
jb .LBB11_4 | |
add ecx, -127 | |
mov edx, 8388607 | |
shrx edx, edx, ecx | |
test edx, eax | |
je .LBB11_7 | |
vaddss xmm0, xmm0, dword ptr [rip + .LCPI11_0] | |
xor esi, esi | |
test eax, eax | |
mov edi, -8388608 | |
cmovs edx, esi | |
sarx ecx, edi, ecx | |
add edx, eax | |
and edx, ecx | |
vmovss dword ptr [rsp - 8], xmm0 | |
vmovd xmm0, edx | |
ret | |
.LBB11_4: | |
vaddss xmm1, xmm0, dword ptr [rip + .LCPI11_0] | |
vmovss dword ptr [rsp - 4], xmm1 | |
test eax, eax | |
js .LBB11_5 | |
vmovss xmm1, dword ptr [rip + .LCPI11_2] | |
sete al | |
kmovd k1, eax | |
vmovss xmm1 {k1}, xmm1, xmm0 | |
vmovaps xmm0, xmm1 | |
.LBB11_7: | |
ret | |
.LBB11_5: | |
vmovss xmm0, dword ptr [rip + .LCPI11_1] | |
ret | |
.Lfunc_end11: | |
.size ceilf, .Lfunc_end11-ceilf | |
.cfi_endproc | |
.section .rodata.cst4,"aM",@progbits,4 | |
.p2align 2, 0x0 | |
.LCPI12_0: | |
.long 0x42b17217 | |
.LCPI12_1: | |
.long 0xc2cff1b4 | |
.LCPI12_2: | |
.long 0x10000000 | |
.LCPI12_3: | |
.long 0x70000000 | |
.section .rodata.cst8,"aM",@progbits,8 | |
.p2align 3, 0x0 | |
.LCPI12_4: | |
.quad 0x40471547652b82fe | |
.LCPI12_5: | |
.quad 0x4338000000000000 | |
.LCPI12_6: | |
.quad 0xc338000000000000 | |
.LCPI12_7: | |
.quad 0x3ebc6af84b912394 | |
.LCPI12_8: | |
.quad 0x3f2ebfce50fac4f3 | |
.LCPI12_9: | |
.quad 0x3f962e42ff0c52d6 | |
.LCPI12_10: | |
.quad 0x3ff0000000000000 | |
.section .text.expf,"ax",@progbits | |
.p2align 4, 0x90 | |
.type expf,@function | |
expf: | |
.Lfunc_begin12: | |
.cfi_startproc | |
vmovd ecx, xmm0 | |
mov eax, 2836 | |
bextr eax, ecx, eax | |
cmp eax, 1067 | |
jae .LBB12_1 | |
.LBB12_8: | |
vcvtss2sd xmm0, xmm0, xmm0 | |
vmulsd xmm0, xmm0, qword ptr [rip + .LCPI12_4] | |
lea rdx, [rip + __exp2f_data] | |
vaddsd xmm1, xmm0, qword ptr [rip + .LCPI12_5] | |
vmovq rax, xmm1 | |
vaddsd xmm1, xmm1, qword ptr [rip + .LCPI12_6] | |
mov ecx, eax | |
and ecx, 31 | |
shl rax, 47 | |
add rax, qword ptr [rdx + 8*rcx] | |
vsubsd xmm0, xmm0, xmm1 | |
vmulsd xmm2, xmm0, qword ptr [rip + .LCPI12_7] | |
vmovq xmm1, rax | |
vmulsd xmm3, xmm0, xmm0 | |
vaddsd xmm2, xmm2, qword ptr [rip + .LCPI12_8] | |
vmulsd xmm0, xmm0, qword ptr [rip + .LCPI12_9] | |
vaddsd xmm0, xmm0, qword ptr [rip + .LCPI12_10] | |
vmulsd xmm2, xmm3, xmm2 | |
vaddsd xmm0, xmm0, xmm2 | |
vmulsd xmm0, xmm0, xmm1 | |
vcvtsd2ss xmm1, xmm0, xmm0 | |
.LBB12_9: | |
vmovaps xmm0, xmm1 | |
ret | |
.LBB12_1: | |
vxorps xmm1, xmm1, xmm1 | |
cmp ecx, -8388608 | |
je .LBB12_9 | |
cmp eax, 2040 | |
jae .LBB12_3 | |
vucomiss xmm0, dword ptr [rip + .LCPI12_0] | |
jbe .LBB12_6 | |
mov dword ptr [rsp - 8], 1879048192 | |
vmovss xmm0, dword ptr [rsp - 8] | |
vmulss xmm0, xmm0, dword ptr [rip + .LCPI12_3] | |
ret | |
.LBB12_3: | |
vaddss xmm0, xmm0, xmm0 | |
ret | |
.LBB12_6: | |
vmovss xmm1, dword ptr [rip + .LCPI12_1] | |
vucomiss xmm1, xmm0 | |
jbe .LBB12_8 | |
mov dword ptr [rsp - 4], 268435456 | |
vmovss xmm0, dword ptr [rsp - 4] | |
vmulss xmm0, xmm0, dword ptr [rip + .LCPI12_2] | |
ret | |
.Lfunc_end12: | |
.size expf, .Lfunc_end12-expf | |
.cfi_endproc | |
.section .rodata.cst8,"aM",@progbits,8 | |
.p2align 2, 0x0 | |
.LCPI13_0: | |
.long 0xf0000000 | |
.long 0x70000000 | |
.section .rodata.cst4,"aM",@progbits,4 | |
.p2align 2, 0x0 | |
.LCPI13_1: | |
.long 0x70000000 | |
.section .text.__math_oflowf,"ax",@progbits | |
.p2align 4, 0x90 | |
.type __math_oflowf,@function | |
__math_oflowf: | |
.Lfunc_begin13: | |
.cfi_startproc | |
xor eax, eax | |
test edi, edi | |
lea rcx, [rip + .LCPI13_0] | |
sete al | |
vmovss xmm0, dword ptr [rcx + 4*rax] | |
vmovss dword ptr [rsp - 4], xmm0 | |
vmovss xmm0, dword ptr [rsp - 4] | |
vmulss xmm0, xmm0, dword ptr [rip + .LCPI13_1] | |
ret | |
.Lfunc_end13: | |
.size __math_oflowf, .Lfunc_end13-__math_oflowf | |
.cfi_endproc | |
.section .rodata.cst8,"aM",@progbits,8 | |
.p2align 2, 0x0 | |
.LCPI14_0: | |
.long 0x90000000 | |
.long 0x10000000 | |
.section .rodata.cst4,"aM",@progbits,4 | |
.p2align 2, 0x0 | |
.LCPI14_1: | |
.long 0x10000000 | |
.section .text.__math_uflowf,"ax",@progbits | |
.p2align 4, 0x90 | |
.type __math_uflowf,@function | |
__math_uflowf: | |
.Lfunc_begin14: | |
.cfi_startproc | |
xor eax, eax | |
test edi, edi | |
lea rcx, [rip + .LCPI14_0] | |
sete al | |
vmovss xmm0, dword ptr [rcx + 4*rax] | |
vmovss dword ptr [rsp - 4], xmm0 | |
vmovss xmm0, dword ptr [rsp - 4] | |
vmulss xmm0, xmm0, dword ptr [rip + .LCPI14_1] | |
ret | |
.Lfunc_end14: | |
.size __math_uflowf, .Lfunc_end14-__math_uflowf | |
.cfi_endproc | |
.section .rodata.cst4,"aM",@progbits,4 | |
.p2align 2, 0x0 | |
.LCPI15_0: | |
.long 0x80000000 | |
.section .text.__math_xflowf,"ax",@progbits | |
.p2align 4, 0x90 | |
.type __math_xflowf,@function | |
__math_xflowf: | |
.Lfunc_begin15: | |
.cfi_startproc | |
vxorps xmm1, xmm0, dword ptr [rip + .LCPI15_0]{1to4} | |
test edi, edi | |
sete al | |
kmovd k1, eax | |
vmovss xmm1 {k1}, xmm1, xmm0 | |
vmovss dword ptr [rsp - 4], xmm1 | |
vmulss xmm0, xmm0, dword ptr [rsp - 4] | |
ret | |
.Lfunc_end15: | |
.size __math_xflowf, .Lfunc_end15-__math_xflowf | |
.cfi_endproc | |
.section .text.feclearexcept,"ax",@progbits | |
.p2align 4, 0x90 | |
.type feclearexcept,@function | |
feclearexcept: | |
.Lfunc_begin16: | |
.cfi_startproc | |
xor eax, eax | |
ret | |
.Lfunc_end16: | |
.size feclearexcept, .Lfunc_end16-feclearexcept | |
.cfi_endproc | |
.section .text.feraiseexcept,"ax",@progbits | |
.p2align 4, 0x90 | |
.type feraiseexcept,@function | |
feraiseexcept: | |
.Lfunc_begin17: | |
.cfi_startproc | |
xor eax, eax | |
ret | |
.Lfunc_end17: | |
.size feraiseexcept, .Lfunc_end17-feraiseexcept | |
.cfi_endproc | |
.section .text.fetestexcept,"ax",@progbits | |
.p2align 4, 0x90 | |
.type fetestexcept,@function | |
fetestexcept: | |
.Lfunc_begin18: | |
.cfi_startproc | |
xor eax, eax | |
ret | |
.Lfunc_end18: | |
.size fetestexcept, .Lfunc_end18-fetestexcept | |
.cfi_endproc | |
.section .text.fegetround,"ax",@progbits | |
.p2align 4, 0x90 | |
.type fegetround,@function | |
fegetround: | |
.Lfunc_begin19: | |
.cfi_startproc | |
xor eax, eax | |
ret | |
.Lfunc_end19: | |
.size fegetround, .Lfunc_end19-fegetround | |
.cfi_endproc | |
.section .text.__fesetround,"ax",@progbits | |
.p2align 4, 0x90 | |
.type __fesetround,@function | |
__fesetround: | |
.Lfunc_begin20: | |
.cfi_startproc | |
xor eax, eax | |
ret | |
.Lfunc_end20: | |
.size __fesetround, .Lfunc_end20-__fesetround | |
.cfi_endproc | |
.section .text.fegetenv,"ax",@progbits | |
.p2align 4, 0x90 | |
.type fegetenv,@function | |
fegetenv: | |
.Lfunc_begin21: | |
.cfi_startproc | |
xor eax, eax | |
ret | |
.Lfunc_end21: | |
.size fegetenv, .Lfunc_end21-fegetenv | |
.cfi_endproc | |
.section .text.fesetenv,"ax",@progbits | |
.p2align 4, 0x90 | |
.type fesetenv,@function | |
fesetenv: | |
.Lfunc_begin22: | |
.cfi_startproc | |
xor eax, eax | |
ret | |
.Lfunc_end22: | |
.size fesetenv, .Lfunc_end22-fesetenv | |
.cfi_endproc | |
.section .rodata.cst4,"aM",@progbits,4 | |
.p2align 2, 0x0 | |
.LCPI23_0: | |
.long 0x7b800000 | |
.LCPI23_1: | |
.long 0xbf800000 | |
.section .text.floorf,"ax",@progbits | |
.p2align 4, 0x90 | |
.type floorf,@function | |
floorf: | |
.Lfunc_begin23: | |
.cfi_startproc | |
vmovd eax, xmm0 | |
mov ecx, 2071 | |
bextr ecx, eax, ecx | |
cmp ecx, 149 | |
jbe .LBB23_1 | |
ret | |
.LBB23_1: | |
cmp ecx, 127 | |
jb .LBB23_4 | |
add ecx, -127 | |
mov edx, 8388607 | |
shrx edx, edx, ecx | |
test edx, eax | |
je .LBB23_6 | |
vaddss xmm0, xmm0, dword ptr [rip + .LCPI23_0] | |
mov esi, -8388608 | |
sarx ecx, esi, ecx | |
mov esi, eax | |
sar esi, 31 | |
and esi, edx | |
add esi, eax | |
and esi, ecx | |
vmovss dword ptr [rsp - 8], xmm0 | |
vmovd xmm0, esi | |
ret | |
.LBB23_4: | |
vaddss xmm1, xmm0, dword ptr [rip + .LCPI23_0] | |
vmovss dword ptr [rsp - 4], xmm1 | |
vxorps xmm1, xmm1, xmm1 | |
test eax, eax | |
js .LBB23_7 | |
vmovaps xmm0, xmm1 | |
.LBB23_6: | |
ret | |
.LBB23_7: | |
vcmpeqss k1, xmm0, xmm1 | |
vmovss xmm1, dword ptr [rip + .LCPI23_1] | |
vmovss xmm1 {k1}, xmm1, xmm0 | |
vmovaps xmm0, xmm1 | |
ret | |
.Lfunc_end23: | |
.size floorf, .Lfunc_end23-floorf | |
.cfi_endproc | |
.section .text.fmaf,"ax",@progbits | |
.p2align 4, 0x90 | |
.type fmaf,@function | |
fmaf: | |
.Lfunc_begin24: | |
.cfi_startproc | |
vcvtss2sd xmm0, xmm0, xmm0 | |
movabs rdx, 9218868437227405312 | |
vcvtss2sd xmm1, xmm1, xmm1 | |
vcvtss2sd xmm2, xmm2, xmm2 | |
vmulsd xmm1, xmm0, xmm1 | |
vaddsd xmm0, xmm1, xmm2 | |
vmovq rax, xmm0 | |
mov ecx, eax | |
and ecx, 536870911 | |
cmp ecx, 268435456 | |
setne cl | |
andn rdx, rax, rdx | |
sete dl | |
or dl, cl | |
jne .LBB24_4 | |
vsubsd xmm3, xmm0, xmm1 | |
vucomisd xmm3, xmm2 | |
jne .LBB24_3 | |
jp .LBB24_3 | |
vsubsd xmm3, xmm0, xmm2 | |
vucomisd xmm3, xmm1 | |
jne .LBB24_3 | |
jp .LBB24_3 | |
.LBB24_4: | |
vcvtsd2ss xmm0, xmm0, xmm0 | |
ret | |
.LBB24_3: | |
test rax, rax | |
vsubsd xmm3, xmm1, xmm0 | |
vsubsd xmm0, xmm2, xmm0 | |
sets cl | |
vucomisd xmm2, xmm1 | |
vaddsd xmm0, xmm1, xmm0 | |
vaddsd xmm3, xmm3, xmm2 | |
vxorpd xmm1, xmm1, xmm1 | |
setbe dl | |
xor dl, cl | |
kmovd k1, edx | |
vmovsd xmm0 {k1}, xmm0, xmm3 | |
vucomisd xmm1, xmm0 | |
setbe dl | |
xor dl, cl | |
movzx ecx, dl | |
dec rcx | |
or rcx, 1 | |
add rcx, rax | |
vmovq xmm0, rcx | |
vcvtsd2ss xmm0, xmm0, xmm0 | |
ret | |
.Lfunc_end24: | |
.size fmaf, .Lfunc_end24-fmaf | |
.cfi_endproc | |
.section .text.fmodf,"ax",@progbits | |
.p2align 4, 0x90 | |
.type fmodf,@function | |
fmodf: | |
.Lfunc_begin25: | |
.cfi_startproc | |
vmovd edx, xmm1 | |
mov esi, edx | |
add esi, edx | |
je .LBB25_2 | |
mov r8d, edx | |
vmovd eax, xmm0 | |
mov edi, 2071 | |
and r8d, 2147483647 | |
bextr ecx, eax, edi | |
cmp r8d, 2139095041 | |
setae r8b | |
cmp ecx, 255 | |
sete r9b | |
or r9b, r8b | |
cmp r9b, 1 | |
jne .LBB25_3 | |
.LBB25_2: | |
vmulss xmm0, xmm0, xmm1 | |
vdivss xmm0, xmm0, xmm0 | |
ret | |
.LBB25_3: | |
lea r8d, [rax + rax] | |
cmp r8d, esi | |
jbe .LBB25_4 | |
bextr edi, edx, edi | |
test ecx, ecx | |
je .LBB25_6 | |
mov esi, eax | |
and esi, 8388607 | |
or esi, 8388608 | |
test edi, edi | |
je .LBB25_11 | |
.LBB25_14: | |
and edx, 8388607 | |
or edx, 8388608 | |
cmp ecx, edi | |
jg .LBB25_16 | |
.LBB25_21: | |
mov edi, esi | |
sub edi, edx | |
jns .LBB25_22 | |
jmp .LBB25_23 | |
.LBB25_4: | |
vpxor xmm1, xmm1, xmm1 | |
sete al | |
vmulss xmm1, xmm0, xmm1 | |
kmovd k1, eax | |
vmovss xmm0 {k1}, xmm0, xmm1 | |
ret | |
.LBB25_6: | |
mov esi, eax | |
xor ecx, ecx | |
shl esi, 9 | |
js .LBB25_8 | |
.p2align 4, 0x90 | |
.LBB25_7: | |
dec ecx | |
add esi, esi | |
jns .LBB25_7 | |
.LBB25_8: | |
mov sil, 1 | |
sub sil, cl | |
shlx esi, eax, esi | |
test edi, edi | |
jne .LBB25_14 | |
.LBB25_11: | |
mov r8d, edx | |
xor edi, edi | |
shl r8d, 9 | |
js .LBB25_13 | |
.p2align 4, 0x90 | |
.LBB25_12: | |
dec edi | |
add r8d, r8d | |
jns .LBB25_12 | |
.LBB25_13: | |
mov r8b, 1 | |
sub r8b, dil | |
shlx edx, edx, r8d | |
cmp ecx, edi | |
jg .LBB25_16 | |
jmp .LBB25_21 | |
.p2align 4, 0x90 | |
.LBB25_19: | |
add esi, esi | |
dec ecx | |
cmp ecx, edi | |
jle .LBB25_20 | |
.LBB25_16: | |
mov r8d, esi | |
sub r8d, edx | |
js .LBB25_19 | |
mov esi, r8d | |
jne .LBB25_19 | |
jmp .LBB25_18 | |
.LBB25_20: | |
mov ecx, edi | |
mov edi, esi | |
sub edi, edx | |
js .LBB25_23 | |
.LBB25_22: | |
mov esi, edi | |
je .LBB25_18 | |
.LBB25_23: | |
cmp esi, 8388607 | |
ja .LBB25_24 | |
.p2align 4, 0x90 | |
.LBB25_25: | |
lea edx, [rsi + rsi] | |
dec ecx | |
cmp esi, 4194304 | |
mov esi, edx | |
jb .LBB25_25 | |
and eax, -2147483648 | |
test ecx, ecx | |
jle .LBB25_28 | |
.LBB25_27: | |
add edx, -8388608 | |
shl ecx, 23 | |
or ecx, edx | |
or ecx, eax | |
vmovd xmm0, ecx | |
ret | |
.LBB25_18: | |
vpxor xmm1, xmm1, xmm1 | |
vmulss xmm0, xmm0, xmm1 | |
ret | |
.LBB25_24: | |
mov edx, esi | |
and eax, -2147483648 | |
test ecx, ecx | |
jg .LBB25_27 | |
.LBB25_28: | |
mov sil, 1 | |
sub sil, cl | |
shrx ecx, edx, esi | |
or ecx, eax | |
vmovd xmm0, ecx | |
ret | |
.Lfunc_end25: | |
.size fmodf, .Lfunc_end25-fmodf | |
.cfi_endproc | |
.section .text.__math_invalidf,"ax",@progbits | |
.p2align 4, 0x90 | |
.type __math_invalidf,@function | |
__math_invalidf: | |
.Lfunc_begin26: | |
.cfi_startproc | |
vsubss xmm0, xmm0, xmm0 | |
vdivss xmm0, xmm0, xmm0 | |
ret | |
.Lfunc_end26: | |
.size __math_invalidf, .Lfunc_end26-__math_invalidf | |
.cfi_endproc | |
.section .rodata.cst4,"aM",@progbits,4 | |
.p2align 2, 0x0 | |
.LCPI27_0: | |
.long 0x3f800000 | |
.LCPI27_1: | |
.long 0x80000000 | |
.LCPI27_2: | |
.long 0x4b000000 | |
.LCPI27_12: | |
.long 0x10000000 | |
.LCPI27_20: | |
.long 0x70000000 | |
.section .rodata.cst8,"aM",@progbits,8 | |
.p2align 3, 0x0 | |
.LCPI27_3: | |
.quad 0xbff0000000000000 | |
.LCPI27_4: | |
.quad 0x3fd27616c9496e0b | |
.LCPI27_5: | |
.quad 0xbfd71969a075c67a | |
.LCPI27_6: | |
.quad 0x3fdec70a6ca7badd | |
.LCPI27_7: | |
.quad 0xbfe7154748bef6c8 | |
.LCPI27_8: | |
.quad 0x3ff71547652ab82b | |
.LCPI27_9: | |
.quad 0x405fffffffd1d571 | |
.LCPI27_10: | |
.quad 0xc062c00000000000 | |
.LCPI27_11: | |
.long 0x90000000 | |
.long 0x10000000 | |
.LCPI27_13: | |
.quad 0x42e8000000000000 | |
.LCPI27_14: | |
.quad 0xc2e8000000000000 | |
.LCPI27_15: | |
.quad 0x3fac6af84b912394 | |
.LCPI27_16: | |
.quad 0x3fcebfce50fac4f3 | |
.LCPI27_17: | |
.quad 0x3fe62e42ff0c52d6 | |
.LCPI27_18: | |
.quad 0x3ff0000000000000 | |
.LCPI27_19: | |
.long 0xf0000000 | |
.long 0x70000000 | |
.section .text.powf,"ax",@progbits | |
.p2align 4, 0x90 | |
.type powf,@function | |
powf: | |
.Lfunc_begin27: | |
.cfi_startproc | |
vmovd edx, xmm0 | |
vmovd eax, xmm1 | |
lea ecx, [rdx - 2139095040] | |
cmp ecx, -2130706432 | |
jb .LBB27_2 | |
lea esi, [rax + rax + 16777216] | |
xor ecx, ecx | |
cmp esi, 16777216 | |
jbe .LBB27_2 | |
.LBB27_24: | |
lea eax, [rdx - 1060306944] | |
mov esi, eax | |
mov edi, eax | |
shr esi, 19 | |
and edi, -8388608 | |
sar eax, 23 | |
sub edx, edi | |
shl esi, 4 | |
lea rdi, [rip + __powf_log2_data] | |
vmovd xmm0, edx | |
movzx esi, sil | |
movabs rdx, 9223231299366420480 | |
vcvtss2sd xmm0, xmm0, xmm0 | |
vmulsd xmm0, xmm0, qword ptr [rsi + rdi] | |
vaddsd xmm0, xmm0, qword ptr [rip + .LCPI27_3] | |
vmulsd xmm5, xmm0, qword ptr [rip + .LCPI27_6] | |
vmulsd xmm4, xmm0, qword ptr [rip + .LCPI27_4] | |
vaddsd xmm5, xmm5, qword ptr [rip + .LCPI27_7] | |
vaddsd xmm4, xmm4, qword ptr [rip + .LCPI27_5] | |
vcvtsi2sd xmm2, xmm2, eax | |
vmulsd xmm3, xmm0, xmm0 | |
vaddsd xmm2, xmm2, qword ptr [rsi + rdi + 8] | |
vmulsd xmm0, xmm0, qword ptr [rip + .LCPI27_8] | |
movabs rsi, 4638426141214900225 | |
vmulsd xmm6, xmm3, xmm3 | |
vmulsd xmm3, xmm3, xmm5 | |
vmulsd xmm4, xmm4, xmm6 | |
vaddsd xmm0, xmm2, xmm0 | |
vaddsd xmm0, xmm0, xmm3 | |
vaddsd xmm0, xmm4, xmm0 | |
vcvtss2sd xmm1, xmm1, xmm1 | |
vmulsd xmm0, xmm0, xmm1 | |
vmovq rax, xmm0 | |
and rdx, rax | |
cmp rdx, rsi | |
jae .LBB27_25 | |
.LBB27_29: | |
vaddsd xmm1, xmm0, qword ptr [rip + .LCPI27_13] | |
lea rdx, [rip + __exp2f_data] | |
vmovq rax, xmm1 | |
vaddsd xmm1, xmm1, qword ptr [rip + .LCPI27_14] | |
add ecx, eax | |
and eax, 31 | |
shl rcx, 47 | |
add rcx, qword ptr [rdx + 8*rax] | |
vsubsd xmm0, xmm0, xmm1 | |
vmulsd xmm2, xmm0, qword ptr [rip + .LCPI27_15] | |
vmovq xmm1, rcx | |
vmulsd xmm3, xmm0, xmm0 | |
vaddsd xmm2, xmm2, qword ptr [rip + .LCPI27_16] | |
vmulsd xmm0, xmm0, qword ptr [rip + .LCPI27_17] | |
vaddsd xmm0, xmm0, qword ptr [rip + .LCPI27_18] | |
vmulsd xmm2, xmm3, xmm2 | |
vaddsd xmm0, xmm0, xmm2 | |
vmulsd xmm0, xmm0, xmm1 | |
vcvtsd2ss xmm0, xmm0, xmm0 | |
.LBB27_30: | |
ret | |
.LBB27_2: | |
lea ecx, [rax + rax] | |
lea esi, [rcx - 1] | |
cmp esi, -16777217 | |
jae .LBB27_3 | |
lea ecx, [rdx + rdx - 1] | |
cmp ecx, -16777217 | |
jae .LBB27_10 | |
xor ecx, ecx | |
test edx, edx | |
js .LBB27_16 | |
cmp edx, 8388607 | |
ja .LBB27_24 | |
.LBB27_23: | |
vmulss xmm0, xmm0, dword ptr [rip + .LCPI27_2] | |
vmovd edx, xmm0 | |
and edx, 2147483647 | |
add edx, -192937984 | |
jmp .LBB27_24 | |
.LBB27_25: | |
vucomisd xmm0, qword ptr [rip + .LCPI27_9] | |
jbe .LBB27_27 | |
xor eax, eax | |
test ecx, ecx | |
lea rcx, [rip + .LCPI27_19] | |
sete al | |
vmovss xmm0, dword ptr [rcx + 4*rax] | |
vmovss dword ptr [rsp - 8], xmm0 | |
vmovss xmm0, dword ptr [rsp - 8] | |
vmulss xmm0, xmm0, dword ptr [rip + .LCPI27_20] | |
ret | |
.LBB27_16: | |
mov ecx, 2071 | |
bextr ecx, eax, ecx | |
cmp ecx, 127 | |
jb .LBB27_31 | |
cmp ecx, 150 | |
jbe .LBB27_18 | |
.LBB27_20: | |
xor ecx, ecx | |
.LBB27_21: | |
vmovd edx, xmm0 | |
and edx, 2147483647 | |
cmp edx, 8388607 | |
ja .LBB27_24 | |
jmp .LBB27_23 | |
.LBB27_27: | |
vmovsd xmm1, qword ptr [rip + .LCPI27_10] | |
vucomisd xmm1, xmm0 | |
jb .LBB27_29 | |
xor eax, eax | |
test ecx, ecx | |
lea rcx, [rip + .LCPI27_11] | |
sete al | |
vmovss xmm0, dword ptr [rcx + 4*rax] | |
vmovss dword ptr [rsp - 4], xmm0 | |
vmovss xmm0, dword ptr [rsp - 4] | |
vmulss xmm0, xmm0, dword ptr [rip + .LCPI27_12] | |
ret | |
.LBB27_18: | |
mov dl, -106 | |
sub dl, cl | |
bzhi ecx, eax, edx | |
je .LBB27_19 | |
.LBB27_31: | |
vsubss xmm0, xmm0, xmm0 | |
vdivss xmm0, xmm0, xmm0 | |
ret | |
.LBB27_19: | |
mov ecx, 1 | |
shlx edx, ecx, edx | |
mov ecx, 65536 | |
test edx, eax | |
jne .LBB27_21 | |
jmp .LBB27_20 | |
.LBB27_3: | |
vmovss xmm2, dword ptr [rip + .LCPI27_0] | |
test ecx, ecx | |
sete sil | |
cmp edx, 1065353216 | |
sete dil | |
or dil, sil | |
je .LBB27_5 | |
vmovaps xmm0, xmm2 | |
ret | |
.LBB27_10: | |
vmulss xmm0, xmm0, xmm0 | |
test edx, edx | |
jns .LBB27_13 | |
mov ecx, 2071 | |
bextr ecx, eax, ecx | |
lea edx, [rcx - 151] | |
cmp edx, -24 | |
jb .LBB27_13 | |
vxorps xmm1, xmm0, dword ptr [rip + .LCPI27_1]{1to4} | |
mov dl, -106 | |
sub dl, cl | |
bzhi ecx, eax, edx | |
movzx edx, dl | |
setne cl | |
bt eax, edx | |
setae dl | |
kmovd k1, ecx | |
kmovd k2, edx | |
vmovss xmm1 {k2}, xmm1, xmm0 | |
vmovss xmm1 {k1}, xmm1, xmm0 | |
vmovaps xmm0, xmm1 | |
.LBB27_13: | |
test eax, eax | |
jns .LBB27_30 | |
vmovss xmm1, dword ptr [rip + .LCPI27_0] | |
vdivss xmm0, xmm1, xmm0 | |
vmovss dword ptr [rsp - 12], xmm0 | |
vmovss xmm0, dword ptr [rsp - 12] | |
ret | |
.LBB27_5: | |
add edx, edx | |
cmp edx, -16777215 | |
setae sil | |
cmp ecx, -16777215 | |
setae cl | |
or cl, sil | |
cmp cl, 1 | |
jne .LBB27_7 | |
vaddss xmm0, xmm0, xmm1 | |
ret | |
.LBB27_7: | |
vmovaps xmm0, xmm2 | |
cmp edx, 2130706432 | |
je .LBB27_30 | |
setae cl | |
test eax, eax | |
vmulss xmm0, xmm1, xmm1 | |
vxorps xmm1, xmm1, xmm1 | |
setns al | |
xor al, cl | |
kmovd k1, eax | |
vmovss xmm0 {k1}, xmm0, xmm1 | |
ret | |
.Lfunc_end27: | |
.size powf, .Lfunc_end27-powf | |
.cfi_endproc | |
.section .rodata.cst4,"aM",@progbits,4 | |
.p2align 2, 0x0 | |
.LCPI28_0: | |
.long 0x7fffffff | |
.LCPI28_1: | |
.long 0x4b000000 | |
.LCPI28_2: | |
.long 0xcb000000 | |
.LCPI28_3: | |
.long 0x3f000000 | |
.LCPI28_4: | |
.long 0xbf000000 | |
.LCPI28_5: | |
.long 0x3f800000 | |
.LCPI28_6: | |
.long 0xbf800000 | |
.LCPI28_7: | |
.long 0x80000000 | |
.section .text.roundf,"ax",@progbits | |
.p2align 4, 0x90 | |
.type roundf,@function | |
roundf: | |
.Lfunc_begin28: | |
.cfi_startproc | |
vmovd eax, xmm0 | |
mov ecx, 2071 | |
bextr ecx, eax, ecx | |
cmp ecx, 149 | |
ja .LBB28_8 | |
vpandd xmm1, xmm0, dword ptr [rip + .LCPI28_0]{1to4} | |
vaddss xmm2, xmm1, dword ptr [rip + .LCPI28_1] | |
cmp ecx, 125 | |
ja .LBB28_3 | |
vxorps xmm1, xmm1, xmm1 | |
vmovss dword ptr [rsp - 4], xmm2 | |
vmulss xmm0, xmm0, xmm1 | |
ret | |
.LBB28_3: | |
vaddss xmm0, xmm2, dword ptr [rip + .LCPI28_2] | |
vsubss xmm0, xmm0, xmm1 | |
vucomiss xmm0, dword ptr [rip + .LCPI28_3] | |
jbe .LBB28_5 | |
vaddss xmm0, xmm1, xmm0 | |
vaddss xmm0, xmm0, dword ptr [rip + .LCPI28_6] | |
jmp .LBB28_7 | |
.LBB28_5: | |
vmovss xmm2, dword ptr [rip + .LCPI28_4] | |
vucomiss xmm2, xmm0 | |
vaddss xmm0, xmm1, xmm0 | |
jb .LBB28_7 | |
vaddss xmm0, xmm0, dword ptr [rip + .LCPI28_5] | |
.LBB28_7: | |
vxorps xmm1, xmm0, dword ptr [rip + .LCPI28_7]{1to4} | |
test eax, eax | |
sets al | |
kmovd k1, eax | |
vmovss xmm0 {k1}, xmm0, xmm1 | |
.LBB28_8: | |
ret | |
.Lfunc_end28: | |
.size roundf, .Lfunc_end28-roundf | |
.cfi_endproc | |
.type __unnamed_1,@object | |
.section .rodata.__unnamed_1,"a",@progbits | |
__unnamed_1: | |
.asciz "mmt3d_kernel_linked_llvm_cpu" | |
.size __unnamed_1, 29 | |
.type iree_hal_executable_library_query_v0_header,@object | |
.section .data.rel.ro.iree_hal_executable_library_query_v0_header,"aw",@progbits | |
.p2align 4, 0x0 | |
iree_hal_executable_library_query_v0_header: | |
.long 4 | |
.zero 4 | |
.quad __unnamed_1 | |
.long 0 | |
.long 0 | |
.size iree_hal_executable_library_query_v0_header, 24 | |
.type iree_hal_executable_library_query_v0_funcs,@object | |
.section .data.rel.ro.iree_hal_executable_library_query_v0_funcs,"aw",@progbits | |
.p2align 4, 0x0 | |
iree_hal_executable_library_query_v0_funcs: | |
.quad turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32 | |
.quad turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack | |
.quad turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32 | |
.quad turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32 | |
.size iree_hal_executable_library_query_v0_funcs, 32 | |
.type iree_hal_executable_library_query_v0_attrs,@object | |
.section .rodata.iree_hal_executable_library_query_v0_attrs,"a",@progbits | |
.p2align 3, 0x0 | |
iree_hal_executable_library_query_v0_attrs: | |
.zero 16 | |
.size iree_hal_executable_library_query_v0_attrs, 16 | |
.type __unnamed_2,@object | |
.section .rodata.__unnamed_2,"a",@progbits | |
__unnamed_2: | |
.asciz "turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32" | |
.size __unnamed_2, 58 | |
.type __unnamed_3,@object | |
.section .rodata.__unnamed_3,"a",@progbits | |
__unnamed_3: | |
.asciz "turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack" | |
.size __unnamed_3, 78 | |
.type __unnamed_4,@object | |
.section .rodata.__unnamed_4,"a",@progbits | |
__unnamed_4: | |
.asciz "turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32" | |
.size __unnamed_4, 94 | |
.type __unnamed_5,@object | |
.section .rodata.__unnamed_5,"a",@progbits | |
__unnamed_5: | |
.asciz "turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32" | |
.size __unnamed_5, 60 | |
.type iree_hal_executable_library_query_v0_names,@object | |
.section .data.rel.ro.iree_hal_executable_library_query_v0_names,"aw",@progbits | |
.p2align 4, 0x0 | |
iree_hal_executable_library_query_v0_names: | |
.quad __unnamed_2 | |
.quad __unnamed_3 | |
.quad __unnamed_4 | |
.quad __unnamed_5 | |
.size iree_hal_executable_library_query_v0_names, 32 | |
.type __unnamed_6,@object | |
.section .rodata.__unnamed_6,"a",@progbits | |
__unnamed_6: | |
.asciz "mmt3d_kernel.mlir" | |
.size __unnamed_6, 18 | |
.type __unnamed_7,@object | |
.section .rodata.__unnamed_7,"a",@progbits | |
__unnamed_7: | |
.asciz "mmt3d_kernel.mlir" | |
.size __unnamed_7, 18 | |
.type __unnamed_8,@object | |
.section .rodata.__unnamed_8,"a",@progbits | |
__unnamed_8: | |
.asciz "mmt3d_kernel.mlir" | |
.size __unnamed_8, 18 | |
.type __unnamed_9,@object | |
.section .rodata.__unnamed_9,"a",@progbits | |
__unnamed_9: | |
.asciz "mmt3d_kernel.mlir" | |
.size __unnamed_9, 18 | |
.type iree_hal_executable_library_query_v0_source_locations,@object | |
.section .data.rel.ro.iree_hal_executable_library_query_v0_source_locations,"aw",@progbits | |
.p2align 4, 0x0 | |
iree_hal_executable_library_query_v0_source_locations: | |
.long 4 | |
.long 17 | |
.quad __unnamed_6 | |
.long 4 | |
.long 17 | |
.quad __unnamed_7 | |
.long 4 | |
.long 17 | |
.quad __unnamed_8 | |
.long 4 | |
.long 17 | |
.quad __unnamed_9 | |
.size iree_hal_executable_library_query_v0_source_locations, 64 | |
.type iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32_stage_names,@object | |
.section .rodata.iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32_stage_names,"a",@progbits | |
.p2align 3, 0x0 | |
iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32_stage_names: | |
.size iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32_stage_names, 0 | |
.type iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32_stage_source_locations,@object | |
.section .rodata.iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32_stage_source_locations,"a",@progbits | |
.p2align 3, 0x0 | |
iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32_stage_source_locations: | |
.size iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32_stage_source_locations, 0 | |
.type iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack_stage_names,@object | |
.section .rodata.iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack_stage_names,"a",@progbits | |
.p2align 3, 0x0 | |
iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack_stage_names: | |
.size iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack_stage_names, 0 | |
.type iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack_stage_source_locations,@object | |
.section .rodata.iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack_stage_source_locations,"a",@progbits | |
.p2align 3, 0x0 | |
iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack_stage_source_locations: | |
.size iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack_stage_source_locations, 0 | |
.type iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32_stage_names,@object | |
.section .rodata.iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32_stage_names,"a",@progbits | |
.p2align 3, 0x0 | |
iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32_stage_names: | |
.size iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32_stage_names, 0 | |
.type iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32_stage_source_locations,@object | |
.section .rodata.iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32_stage_source_locations,"a",@progbits | |
.p2align 3, 0x0 | |
iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32_stage_source_locations: | |
.size iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32_stage_source_locations, 0 | |
.type iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32_stage_names,@object | |
.section .rodata.iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32_stage_names,"a",@progbits | |
.p2align 3, 0x0 | |
iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32_stage_names: | |
.size iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32_stage_names, 0 | |
.type iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32_stage_source_locations,@object | |
.section .rodata.iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32_stage_source_locations,"a",@progbits | |
.p2align 3, 0x0 | |
iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32_stage_source_locations: | |
.size iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32_stage_source_locations, 0 | |
.type iree_hal_executable_library_query_v0_stage_location_tables,@object | |
.section .data.rel.ro.iree_hal_executable_library_query_v0_stage_location_tables,"aw",@progbits | |
.p2align 4, 0x0 | |
iree_hal_executable_library_query_v0_stage_location_tables: | |
.long 0 | |
.zero 4 | |
.quad iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32_stage_names | |
.quad iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32_stage_source_locations | |
.long 0 | |
.zero 4 | |
.quad iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack_stage_names | |
.quad iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack_stage_source_locations | |
.long 0 | |
.zero 4 | |
.quad iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32_stage_names | |
.quad iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32_stage_source_locations | |
.long 0 | |
.zero 4 | |
.quad iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32_stage_names | |
.quad iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32_stage_source_locations | |
.size iree_hal_executable_library_query_v0_stage_location_tables, 96 | |
.type iree_hal_executable_library_query_v0,@object | |
.section .data.rel.ro.iree_hal_executable_library_query_v0,"aw",@progbits | |
.p2align 4, 0x0 | |
iree_hal_executable_library_query_v0: | |
.quad iree_hal_executable_library_query_v0_header | |
.zero 16 | |
.long 4 | |
.zero 4 | |
.quad iree_hal_executable_library_query_v0_funcs | |
.quad iree_hal_executable_library_query_v0_attrs | |
.quad iree_hal_executable_library_query_v0_names | |
.quad 0 | |
.quad iree_hal_executable_library_query_v0_source_locations | |
.quad iree_hal_executable_library_query_v0_stage_location_tables | |
.zero 4 | |
.zero 4 | |
.zero 16 | |
.size iree_hal_executable_library_query_v0, 104 | |
.type __powf_log2_data,@object | |
.section .rodata.__powf_log2_data,"a",@progbits | |
.p2align 3, 0x0 | |
__powf_log2_data: | |
.quad 0x3ff661ec79f8f3be | |
.quad 0xbfdefec65b963019 | |
.quad 0x3ff571ed4aaf883d | |
.quad 0xbfdb0b6832d4fca4 | |
.quad 0x3ff49539f0f010b0 | |
.quad 0xbfd7418b0a1fb77b | |
.quad 0x3ff3c995b0b80385 | |
.quad 0xbfd39de91a6dcf7b | |
.quad 0x3ff30d190c8864a5 | |
.quad 0xbfd01d9bf3f2b631 | |
.quad 0x3ff25e227b0b8ea0 | |
.quad 0xbfc97c1d1b3b7af0 | |
.quad 0x3ff1bb4a4a1a343f | |
.quad 0xbfc2f9e393af3c9f | |
.quad 0x3ff12358f08ae5ba | |
.quad 0xbfb960cbbf788d5c | |
.quad 0x3ff0953f419900a7 | |
.quad 0xbfaa6f9db6475fce | |
.quad 0x3ff0000000000000 | |
.quad 0x0000000000000000 | |
.quad 0x3fee608cfd9a47ac | |
.quad 0x3fb338ca9f24f53d | |
.quad 0x3feca4b31f026aa0 | |
.quad 0x3fc476a9543891ba | |
.quad 0x3feb2036576afce6 | |
.quad 0x3fce840b4ac4e4d2 | |
.quad 0x3fe9c2d163a1aa2d | |
.quad 0x3fd40645f0c6651c | |
.quad 0x3fe886e6037841ed | |
.quad 0x3fd88e9c2c1b9ff8 | |
.quad 0x3fe767dcf5534862 | |
.quad 0x3fdce0a44eb17bcc | |
.quad 0x3fd27616c9496e0b | |
.quad 0xbfd71969a075c67a | |
.quad 0x3fdec70a6ca7badd | |
.quad 0xbfe7154748bef6c8 | |
.quad 0x3ff71547652ab82b | |
.size __powf_log2_data, 296 | |
.type __exp2f_data,@object | |
.section .rodata.__exp2f_data,"a",@progbits | |
.p2align 3, 0x0 | |
__exp2f_data: | |
.quad 4607182418800017408 | |
.quad 4607140297302181236 | |
.quad 4607100335213349135 | |
.quad 4607062579818421073 | |
.quad 4607027079437701499 | |
.quad 4606993883449571754 | |
.quad 4606963042313658936 | |
.quad 4606934607594512097 | |
.quad 4606908631985796885 | |
.quad 4606885169335019979 | |
.quad 4606864274668794914 | |
.quad 4606846004218661165 | |
.quad 4606830415447468583 | |
.quad 4606817567076339586 | |
.quad 4606807519112221737 | |
.quad 4606800332876043653 | |
.quad 4606796071031487437 | |
.quad 4606794797614391156 | |
.quad 4606796578062795143 | |
.quad 4606801479247646227 | |
.quad 4606809569504174299 | |
.quad 4606820918663955941 | |
.quad 4606835598087680144 | |
.quad 4606853680698631517 | |
.quad 4606875241016906669 | |
.quad 4606900355194379847 | |
.quad 4606929101050434204 | |
.quad 4606961558108475497 | |
.quad 4606997807633245319 | |
.quad 4607037932668951391 | |
.quad 4607082018078232794 | |
.quad 4607130150581978432 | |
.quad 0x42e8000000000000 | |
.quad 0x3fac6af84b912394 | |
.quad 0x3fcebfce50fac4f3 | |
.quad 0x3fe62e42ff0c52d6 | |
.quad 0x4338000000000000 | |
.quad 0x40471547652b82fe | |
.quad 0x3ebc6af84b912394 | |
.quad 0x3f2ebfce50fac4f3 | |
.quad 0x3f962e42ff0c52d6 | |
.size __exp2f_data, 328 | |
.section .debug_abbrev,"",@progbits | |
.byte 1 | |
.byte 17 | |
.byte 1 | |
.byte 37 | |
.byte 14 | |
.byte 19 | |
.byte 5 | |
.byte 3 | |
.byte 14 | |
.byte 16 | |
.byte 23 | |
.ascii "\264B" | |
.byte 25 | |
.byte 17 | |
.byte 1 | |
.byte 18 | |
.byte 6 | |
.byte 0 | |
.byte 0 | |
.byte 2 | |
.byte 46 | |
.byte 0 | |
.byte 17 | |
.byte 1 | |
.byte 18 | |
.byte 6 | |
.byte 64 | |
.byte 24 | |
.byte 110 | |
.byte 14 | |
.byte 3 | |
.byte 14 | |
.byte 58 | |
.byte 11 | |
.byte 59 | |
.byte 11 | |
.byte 73 | |
.byte 19 | |
.byte 63 | |
.byte 25 | |
.byte 0 | |
.byte 0 | |
.byte 3 | |
.byte 36 | |
.byte 0 | |
.byte 3 | |
.byte 14 | |
.byte 62 | |
.byte 11 | |
.byte 11 | |
.byte 11 | |
.byte 0 | |
.byte 0 | |
.byte 4 | |
.byte 46 | |
.byte 0 | |
.byte 17 | |
.byte 1 | |
.byte 18 | |
.byte 6 | |
.byte 64 | |
.byte 24 | |
.byte 110 | |
.byte 14 | |
.byte 3 | |
.byte 14 | |
.byte 58 | |
.byte 11 | |
.byte 59 | |
.byte 11 | |
.byte 73 | |
.byte 16 | |
.byte 63 | |
.byte 25 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.section .debug_info,"",@progbits | |
.Lcu_begin0: | |
.long .Ldebug_info_end0-.Ldebug_info_start0 | |
.Ldebug_info_start0: | |
.short 4 | |
.long .debug_abbrev | |
.byte 8 | |
.byte 1 | |
.long .Linfo_string0 | |
.short 44 | |
.long .Linfo_string1 | |
.long .Lline_table_start0 | |
.quad .Lfunc_begin0 | |
.long .Lfunc_end0-.Lfunc_begin0 | |
.byte 2 | |
.quad .Lfunc_begin0 | |
.long .Lfunc_end0-.Lfunc_begin0 | |
.byte 1 | |
.byte 86 | |
.long .Linfo_string2 | |
.long .Linfo_string2 | |
.byte 1 | |
.byte 1 | |
.long 67 | |
.byte 3 | |
.long .Linfo_string3 | |
.byte 5 | |
.byte 4 | |
.byte 0 | |
.Ldebug_info_end0: | |
.Lcu_begin1: | |
.long .Ldebug_info_end1-.Ldebug_info_start1 | |
.Ldebug_info_start1: | |
.short 4 | |
.long .debug_abbrev | |
.byte 8 | |
.byte 1 | |
.long .Linfo_string0 | |
.short 44 | |
.long .Linfo_string1 | |
.long .Lline_table_start0 | |
.quad .Lfunc_begin1 | |
.long .Lfunc_end1-.Lfunc_begin1 | |
.byte 4 | |
.quad .Lfunc_begin1 | |
.long .Lfunc_end1-.Lfunc_begin1 | |
.byte 1 | |
.byte 86 | |
.long .Linfo_string4 | |
.long .Linfo_string4 | |
.byte 1 | |
.byte 1 | |
.long .debug_info+67 | |
.byte 0 | |
.Ldebug_info_end1: | |
.Lcu_begin2: | |
.long .Ldebug_info_end2-.Ldebug_info_start2 | |
.Ldebug_info_start2: | |
.short 4 | |
.long .debug_abbrev | |
.byte 8 | |
.byte 1 | |
.long .Linfo_string0 | |
.short 44 | |
.long .Linfo_string1 | |
.long .Lline_table_start0 | |
.quad .Lfunc_begin2 | |
.long .Lfunc_end2-.Lfunc_begin2 | |
.byte 4 | |
.quad .Lfunc_begin2 | |
.long .Lfunc_end2-.Lfunc_begin2 | |
.byte 1 | |
.byte 86 | |
.long .Linfo_string5 | |
.long .Linfo_string5 | |
.byte 1 | |
.byte 1 | |
.long .debug_info+67 | |
.byte 0 | |
.Ldebug_info_end2: | |
.Lcu_begin3: | |
.long .Ldebug_info_end3-.Ldebug_info_start3 | |
.Ldebug_info_start3: | |
.short 4 | |
.long .debug_abbrev | |
.byte 8 | |
.byte 1 | |
.long .Linfo_string0 | |
.short 44 | |
.long .Linfo_string1 | |
.long .Lline_table_start0 | |
.quad .Lfunc_begin3 | |
.long .Lfunc_end3-.Lfunc_begin3 | |
.byte 4 | |
.quad .Lfunc_begin3 | |
.long .Lfunc_end3-.Lfunc_begin3 | |
.byte 1 | |
.byte 86 | |
.long .Linfo_string6 | |
.long .Linfo_string6 | |
.byte 1 | |
.byte 1 | |
.long .debug_info+67 | |
.byte 0 | |
.Ldebug_info_end3: | |
.section .debug_str,"MS",@progbits,1 | |
.Linfo_string0: | |
.asciz "IREE" | |
.Linfo_string1: | |
.asciz "-" | |
.Linfo_string2: | |
.asciz "turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32" | |
.Linfo_string3: | |
.asciz "int" | |
.Linfo_string4: | |
.asciz "turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack" | |
.Linfo_string5: | |
.asciz "turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32" | |
.Linfo_string6: | |
.asciz "turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32" | |
.section .debug_pubnames,"",@progbits | |
.long .LpubNames_end0-.LpubNames_start0 | |
.LpubNames_start0: | |
.short 2 | |
.long .Lcu_begin0 | |
.long 75 | |
.long 38 | |
.asciz "turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32" | |
.long 0 | |
.LpubNames_end0: | |
.section .debug_pubtypes,"",@progbits | |
.long .LpubTypes_end0-.LpubTypes_start0 | |
.LpubTypes_start0: | |
.short 2 | |
.long .Lcu_begin0 | |
.long 75 | |
.long 67 | |
.asciz "int" | |
.long 0 | |
.LpubTypes_end0: | |
.section .debug_pubnames,"",@progbits | |
.long .LpubNames_end1-.LpubNames_start1 | |
.LpubNames_start1: | |
.short 2 | |
.long .Lcu_begin1 | |
.long 68 | |
.long 38 | |
.asciz "turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack" | |
.long 0 | |
.LpubNames_end1: | |
.section .debug_pubtypes,"",@progbits | |
.long .LpubTypes_end1-.LpubTypes_start1 | |
.LpubTypes_start1: | |
.short 2 | |
.long .Lcu_begin1 | |
.long 68 | |
.long 0 | |
.LpubTypes_end1: | |
.section .debug_pubnames,"",@progbits | |
.long .LpubNames_end2-.LpubNames_start2 | |
.LpubNames_start2: | |
.short 2 | |
.long .Lcu_begin2 | |
.long 68 | |
.long 38 | |
.asciz "turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32" | |
.long 0 | |
.LpubNames_end2: | |
.section .debug_pubtypes,"",@progbits | |
.long .LpubTypes_end2-.LpubTypes_start2 | |
.LpubTypes_start2: | |
.short 2 | |
.long .Lcu_begin2 | |
.long 68 | |
.long 0 | |
.LpubTypes_end2: | |
.section .debug_pubnames,"",@progbits | |
.long .LpubNames_end3-.LpubNames_start3 | |
.LpubNames_start3: | |
.short 2 | |
.long .Lcu_begin3 | |
.long 68 | |
.long 38 | |
.asciz "turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32" | |
.long 0 | |
.LpubNames_end3: | |
.section .debug_pubtypes,"",@progbits | |
.long .LpubTypes_end3-.LpubTypes_start3 | |
.LpubTypes_start3: | |
.short 2 | |
.long .Lcu_begin3 | |
.long 68 | |
.long 0 | |
.LpubTypes_end3: | |
.section ".note.GNU-stack","",@progbits | |
.section .debug_line,"",@progbits | |
.Lline_table_start0: |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment