Created
April 19, 2024 20:16
-
-
Save pashu123/9ec48d44ece3b40897700d96eb65b9c6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.text | |
.file "mmt3d_kernel_linked_llvm_cpu" | |
.section .text.turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32,"ax",@progbits | |
.p2align 4, 0x90 | |
.type turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32,@function | |
turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32: | |
.Lfunc_begin0: | |
.file 1 "-" | |
.loc 1 1 0 | |
.cfi_startproc | |
pushq %rbp | |
.cfi_def_cfa_offset 16 | |
.cfi_offset %rbp, -16 | |
movq %rsp, %rbp | |
.cfi_def_cfa_register %rbp | |
.Ltmp0: | |
pushq %r15 | |
pushq %r14 | |
pushq %r13 | |
pushq %r12 | |
pushq %rbx | |
subq $184, %rsp | |
.cfi_offset %rbx, -56 | |
.cfi_offset %r12, -48 | |
.cfi_offset %r13, -40 | |
.cfi_offset %r14, -32 | |
.cfi_offset %r15, -24 | |
.loc 1 4 3 prologue_end | |
movq 24(%rsi), %rdi | |
movl 4(%rdi), %ecx | |
movl (%rdi), %eax | |
movl 12(%rdi), %r12d | |
movq %rcx, %r8 | |
shlq $32, %r8 | |
leaq (%r8,%rax), %r10 | |
leaq -1(%r8,%rax), %r8 | |
movq %r10, %r9 | |
negq %r9 | |
testq %r10, %r10 | |
movq %r10, -248(%rbp) | |
cmovleq %r9, %r8 | |
leaq 15(%r8), %r15 | |
testq %r8, %r8 | |
cmovnsq %r8, %r15 | |
sarq $4, %r15 | |
movq %r15, %r8 | |
negq %r8 | |
incq %r15 | |
testq %r10, %r10 | |
movl 8(%rdi), %r10d | |
cmovleq %r8, %r15 | |
movzwl 8(%rdx), %r8d | |
shlq $32, %r12 | |
leaq (%r12,%r10), %r9 | |
movl %r8d, %r11d | |
shll $6, %r11d | |
movq %r9, -144(%rbp) | |
movq %r11, -64(%rbp) | |
cmpq %r9, %r11 | |
jge .LBB0_17 | |
.loc 1 0 3 is_stmt 0 | |
movq 32(%rsi), %r9 | |
movq 16(%rdi), %rdi | |
movq %rcx, -112(%rbp) | |
.loc 1 4 3 | |
orq %r10, %r12 | |
movl 12(%rsi), %r13d | |
subq -64(%rbp), %r12 | |
movq %r15, -240(%rbp) | |
movq %r9, -120(%rbp) | |
movq 8(%r9), %r11 | |
movq %rcx, %r9 | |
movl (%rdx), %ecx | |
movl 4(%rdx), %edx | |
movq %rdi, %r10 | |
imulq %r8, %r10 | |
imulq $13107200, %r10, %r10 | |
imulq $819200, %rdx, %rbx | |
movq %rcx, %r14 | |
shlq $13, %r14 | |
movq %rcx, -232(%rbp) | |
addq %rbx, %r14 | |
addq %r10, %r14 | |
leaq 960(%r11,%r14), %r10 | |
movabsq $13743895347200, %r11 | |
imulq $3200, %rax, %r14 | |
imulq $12800, %rax, %rax | |
imulq %r9, %r11 | |
movq %r10, -136(%rbp) | |
movl 16(%rsi), %r10d | |
movzwl 20(%rsi), %esi | |
addq %r11, %r14 | |
movq %rcx, %r11 | |
shlq $9, %r11 | |
shlq $7, %rcx | |
addq %rbx, %r11 | |
movq -120(%rbp), %rbx | |
imulq %r14, %r8 | |
movq %rcx, -208(%rbp) | |
movq %r15, %rcx | |
imulq %rsi, %r14 | |
shlq $8, %r8 | |
addq %r8, %r11 | |
imulq $204800, %rdi, %r8 | |
imulq %rsi, %rdi | |
shll $6, %esi | |
movq (%rbx), %r9 | |
movq %rsi, -184(%rbp) | |
leaq (,%rdx,4), %rsi | |
shlq $8, %r14 | |
movq %r14, -192(%rbp) | |
movq %r8, -320(%rbp) | |
movabsq $54975581388800, %r8 | |
imulq -112(%rbp), %r8 | |
subq %rsi, %rcx | |
movq %rsi, -128(%rbp) | |
movq %rcx, -168(%rbp) | |
leaq (,%r10,4), %rcx | |
leaq 192000(%r9,%r11), %rdx | |
movq %rcx, -224(%rbp) | |
movq %r13, %rcx | |
shlq $13, %rcx | |
movq %rcx, -272(%rbp) | |
movq %rdx, -72(%rbp) | |
movq %r13, %rdx | |
shlq $7, %rdx | |
shlq $9, %r13 | |
movq %rdx, -280(%rbp) | |
imulq $13107200, %rdi, %rdx | |
movq %r13, -288(%rbp) | |
addq %r8, %rax | |
movq %rax, -312(%rbp) | |
movq %rdx, -176(%rbp) | |
imulq $819200, %r10, %rdx | |
movq %rdx, -216(%rbp) | |
jmp .LBB0_2 | |
.p2align 4, 0x90 | |
.LBB0_16: | |
.loc 1 0 3 | |
movq -176(%rbp), %rsi | |
movq -72(%rbp), %rdx | |
movq -64(%rbp), %rax | |
movq -184(%rbp), %rcx | |
movq -200(%rbp), %r12 | |
.loc 1 4 3 | |
addq -192(%rbp), %rdx | |
addq %rsi, -136(%rbp) | |
addq %rcx, %rax | |
subq %rcx, %r12 | |
movq %rdx, -72(%rbp) | |
movq %rax, -64(%rbp) | |
cmpq -144(%rbp), %rax | |
jge .LBB0_17 | |
.LBB0_2: | |
cmpq $64, %r12 | |
movl $64, %ecx | |
movl $1, %eax | |
movq %r12, -200(%rbp) | |
cmovlq %r12, %rcx | |
cmpq $2, %rcx | |
cmovlq %rax, %rcx | |
movq %rcx, -328(%rbp) | |
cmpq -128(%rbp), %r15 | |
jle .LBB0_16 | |
.loc 1 0 3 | |
movq -72(%rbp), %rdx | |
movq -136(%rbp), %rcx | |
movq -144(%rbp), %rax | |
.loc 1 4 3 | |
subq -64(%rbp), %rax | |
movq %rdx, -160(%rbp) | |
movq %rcx, -152(%rbp) | |
movq -168(%rbp), %rcx | |
movq -128(%rbp), %rdx | |
movq %rax, -296(%rbp) | |
jmp .LBB0_4 | |
.p2align 4, 0x90 | |
.LBB0_15: | |
.loc 1 0 3 | |
movq -216(%rbp), %rsi | |
movq -224(%rbp), %rax | |
movq -256(%rbp), %rdx | |
movq -264(%rbp), %rcx | |
movq -240(%rbp), %r15 | |
.loc 1 4 3 | |
addq %rsi, -152(%rbp) | |
addq %rsi, -160(%rbp) | |
addq %rax, %rdx | |
subq %rax, %rcx | |
cmpq %r15, %rdx | |
jge .LBB0_16 | |
.LBB0_4: | |
cmpq $4, %rcx | |
movl $4, %esi | |
movq -248(%rbp), %rdi | |
movl $1, %eax | |
movq %rdx, -256(%rbp) | |
movq %rcx, -264(%rbp) | |
cmovlq %rcx, %rsi | |
cmpq $2, %rsi | |
cmovlq %rax, %rsi | |
subq %rdx, %r15 | |
movl $4, %eax | |
cmpq $4, %r15 | |
movq %rsi, -352(%rbp) | |
movq %r15, -336(%rbp) | |
cmovlq %r15, %rax | |
shlq $4, %rdx | |
shlq $4, %rax | |
subq %rdx, %rdi | |
cmpq %rdi, %rax | |
cmovlq %rax, %rdi | |
cmpl $24, -232(%rbp) | |
ja .LBB0_15 | |
.loc 1 0 3 | |
movq -152(%rbp), %rcx | |
movq -160(%rbp), %rax | |
movq %rcx, -80(%rbp) | |
movq -208(%rbp), %rcx | |
movq %rax, -88(%rbp) | |
jmp .LBB0_6 | |
.p2align 4, 0x90 | |
.LBB0_14: | |
movq -304(%rbp), %rcx | |
movq -80(%rbp), %rax | |
movq -88(%rbp), %rdx | |
.loc 1 4 3 | |
addq -272(%rbp), %rax | |
addq -288(%rbp), %rdx | |
addq -280(%rbp), %rcx | |
movq %rax, -80(%rbp) | |
movq %rdx, -88(%rbp) | |
cmpq $3200, %rcx | |
jge .LBB0_15 | |
.LBB0_6: | |
.loc 1 0 3 | |
cmpq $0, -296(%rbp) | |
movq %rcx, -304(%rbp) | |
.loc 1 4 3 | |
jle .LBB0_14 | |
.loc 1 0 3 | |
movq -88(%rbp), %rax | |
movq -80(%rbp), %rdx | |
xorl %ecx, %ecx | |
movq %rax, -104(%rbp) | |
movq %rdx, -96(%rbp) | |
jmp .LBB0_8 | |
.p2align 4, 0x90 | |
.LBB0_13: | |
movq -344(%rbp), %rcx | |
movq -96(%rbp), %rax | |
movq -104(%rbp), %rdx | |
.loc 1 4 3 | |
addq -320(%rbp), %rax | |
addq -312(%rbp), %rdx | |
incq %rcx | |
movq %rax, -96(%rbp) | |
movq %rdx, -104(%rbp) | |
cmpq -328(%rbp), %rcx | |
je .LBB0_14 | |
.LBB0_8: | |
.loc 1 0 3 | |
cmpq $0, -336(%rbp) | |
movq %rcx, -344(%rbp) | |
.loc 1 4 3 | |
jle .LBB0_13 | |
.loc 1 0 3 | |
movq -104(%rbp), %r13 | |
movq -96(%rbp), %r12 | |
xorl %r8d, %r8d | |
.p2align 4, 0x90 | |
.LBB0_10: | |
.loc 1 4 3 | |
movq %r8, %rax | |
shlq $4, %rax | |
movq %rdi, %r14 | |
movq %r12, %rdx | |
subq %rax, %r14 | |
xorl %eax, %eax | |
testq %r14, %r14 | |
setg %al | |
negl %eax | |
kmovd %eax, %k1 | |
xorl %eax, %eax | |
cmpq $2, %r14 | |
setge %al | |
kmovw %k1, -120(%rbp) | |
negl %eax | |
kmovd %eax, %k1 | |
xorl %eax, %eax | |
cmpq $3, %r14 | |
setge %al | |
kmovw %k1, -112(%rbp) | |
negl %eax | |
kmovd %eax, %k1 | |
xorl %eax, %eax | |
cmpq $4, %r14 | |
setge %al | |
kmovw %k1, -56(%rbp) | |
negl %eax | |
kmovd %eax, %k1 | |
xorl %eax, %eax | |
cmpq $5, %r14 | |
setge %al | |
kmovw %k1, -54(%rbp) | |
negl %eax | |
kmovd %eax, %k1 | |
xorl %eax, %eax | |
cmpq $6, %r14 | |
setge %al | |
kmovw %k1, -52(%rbp) | |
negl %eax | |
kmovd %eax, %k1 | |
xorl %eax, %eax | |
cmpq $7, %r14 | |
setge %al | |
kmovw %k1, -50(%rbp) | |
negl %eax | |
kmovd %eax, %k1 | |
xorl %eax, %eax | |
cmpq $8, %r14 | |
setge %al | |
xorl %esi, %esi | |
kmovw %k1, -48(%rbp) | |
negl %eax | |
cmpq $9, %r14 | |
setge %sil | |
xorl %ebx, %ebx | |
kmovd %eax, %k1 | |
negl %esi | |
cmpq $10, %r14 | |
kmovw %k1, -46(%rbp) | |
setge %bl | |
xorl %r9d, %r9d | |
kmovd %esi, %k1 | |
negl %ebx | |
cmpq $11, %r14 | |
kmovw %k1, -44(%rbp) | |
setge %r9b | |
xorl %r15d, %r15d | |
kmovd %ebx, %k1 | |
negl %r9d | |
cmpq $12, %r14 | |
kmovw %k1, -42(%rbp) | |
setge %r15b | |
xorl %r11d, %r11d | |
kmovd %r9d, %k4 | |
negl %r15d | |
cmpq $13, %r14 | |
setge %r11b | |
xorl %eax, %eax | |
kmovd %r15d, %k5 | |
negl %r11d | |
cmpq $14, %r14 | |
setge %al | |
xorl %r10d, %r10d | |
kmovd %r11d, %k6 | |
negl %eax | |
cmpq $15, %r14 | |
setge %r10b | |
xorl %ecx, %ecx | |
kmovd %eax, %k7 | |
negl %r10d | |
cmpq $16, %r14 | |
movq $-16, %r14 | |
setge %cl | |
kmovd %r10d, %k1 | |
negl %ecx | |
kmovd %ecx, %k2 | |
.p2align 4, 0x90 | |
.LBB0_11: | |
.loc 1 0 3 | |
kmovw -120(%rbp), %k3 | |
.loc 1 4 3 | |
vmovups -63936(%r13,%r14,4), %zmm10 {%k4} {z} | |
vmovups -51136(%r13,%r14,4), %zmm11 {%k5} {z} | |
vmovups -38336(%r13,%r14,4), %zmm12 {%k6} {z} | |
vmovups -25536(%r13,%r14,4), %zmm13 {%k7} {z} | |
vmovups -12736(%r13,%r14,4), %zmm14 {%k1} {z} | |
vmovups 64(%r13,%r14,4), %zmm15 {%k2} {z} | |
vmovups -191936(%r13,%r14,4), %zmm0 {%k3} {z} | |
kmovw -112(%rbp), %k3 | |
vunpcklps %zmm11, %zmm10, %zmm17 | |
vunpckhps %zmm11, %zmm10, %zmm10 | |
vunpcklps %zmm13, %zmm12, %zmm11 | |
vunpckhps %zmm13, %zmm12, %zmm12 | |
vunpcklps %zmm15, %zmm14, %zmm13 | |
vunpckhps %zmm15, %zmm14, %zmm14 | |
vunpcklpd %zmm13, %zmm11, %zmm20 | |
vunpckhpd %zmm13, %zmm11, %zmm11 | |
vunpcklpd %zmm14, %zmm12, %zmm13 | |
vunpckhpd %zmm14, %zmm12, %zmm12 | |
vmovups -179136(%r13,%r14,4), %zmm1 {%k3} {z} | |
kmovw -56(%rbp), %k3 | |
vmovups -166336(%r13,%r14,4), %zmm2 {%k3} {z} | |
kmovw -54(%rbp), %k3 | |
vunpcklps %zmm1, %zmm0, %zmm16 | |
vunpckhps %zmm1, %zmm0, %zmm0 | |
vmovups -153536(%r13,%r14,4), %zmm3 {%k3} {z} | |
kmovw -52(%rbp), %k3 | |
vmovups -140736(%r13,%r14,4), %zmm4 {%k3} {z} | |
kmovw -50(%rbp), %k3 | |
vunpcklps %zmm3, %zmm2, %zmm1 | |
vunpckhps %zmm3, %zmm2, %zmm2 | |
vunpcklpd %zmm1, %zmm16, %zmm15 | |
vunpckhpd %zmm1, %zmm16, %zmm1 | |
vunpcklpd %zmm2, %zmm0, %zmm16 | |
vunpckhpd %zmm2, %zmm0, %zmm0 | |
vmovups -127936(%r13,%r14,4), %zmm5 {%k3} {z} | |
kmovw -48(%rbp), %k3 | |
vmovups -115136(%r13,%r14,4), %zmm6 {%k3} {z} | |
kmovw -46(%rbp), %k3 | |
vunpcklps %zmm5, %zmm4, %zmm3 | |
vunpckhps %zmm5, %zmm4, %zmm4 | |
vmovups -102336(%r13,%r14,4), %zmm7 {%k3} {z} | |
kmovw -44(%rbp), %k3 | |
vmovups -89536(%r13,%r14,4), %zmm8 {%k3} {z} | |
kmovw -42(%rbp), %k3 | |
vunpcklps %zmm7, %zmm6, %zmm5 | |
vunpckhps %zmm7, %zmm6, %zmm6 | |
vunpcklpd %zmm5, %zmm3, %zmm2 | |
vunpckhpd %zmm5, %zmm3, %zmm3 | |
vunpcklpd %zmm6, %zmm4, %zmm5 | |
vunpckhpd %zmm6, %zmm4, %zmm4 | |
vmovups -76736(%r13,%r14,4), %zmm9 {%k3} {z} | |
vshuff64x2 $136, %zmm2, %zmm15, %zmm14 | |
vshuff64x2 $136, %zmm4, %zmm0, %zmm19 | |
vshuff64x2 $136, %zmm5, %zmm16, %zmm18 | |
vshuff64x2 $221, %zmm2, %zmm15, %zmm2 | |
vshuff64x2 $221, %zmm4, %zmm0, %zmm0 | |
addq $16, %r14 | |
vunpcklps %zmm9, %zmm8, %zmm7 | |
vunpckhps %zmm9, %zmm8, %zmm8 | |
vunpcklpd %zmm10, %zmm8, %zmm21 | |
vunpcklpd %zmm17, %zmm7, %zmm6 | |
vunpckhpd %zmm17, %zmm7, %zmm7 | |
vunpckhpd %zmm10, %zmm8, %zmm8 | |
vshuff64x2 $136, %zmm3, %zmm1, %zmm17 | |
vshuff64x2 $221, %zmm3, %zmm1, %zmm1 | |
vshuff64x2 $221, %zmm5, %zmm16, %zmm3 | |
vshuff64x2 $136, %zmm20, %zmm6, %zmm4 | |
vshuff64x2 $221, %zmm20, %zmm6, %zmm6 | |
vshuff64x2 $136, %zmm11, %zmm7, %zmm5 | |
vshuff64x2 $136, %zmm12, %zmm8, %zmm16 | |
vshuff64x2 $221, %zmm11, %zmm7, %zmm7 | |
vshuff64x2 $221, %zmm13, %zmm21, %zmm9 | |
vshuff64x2 $136, %zmm13, %zmm21, %zmm15 | |
vshuff64x2 $221, %zmm12, %zmm8, %zmm8 | |
vshuff64x2 $136, %zmm4, %zmm14, %zmm10 | |
vshuff64x2 $136, %zmm5, %zmm17, %zmm11 | |
vshuff64x2 $136, %zmm6, %zmm2, %zmm20 | |
vshuff64x2 $136, %zmm7, %zmm1, %zmm21 | |
vshuff64x2 $136, %zmm9, %zmm3, %zmm22 | |
vshuff64x2 $136, %zmm8, %zmm0, %zmm23 | |
vshuff64x2 $221, %zmm4, %zmm14, %zmm4 | |
vshuff64x2 $136, %zmm15, %zmm18, %zmm12 | |
vshuff64x2 $221, %zmm15, %zmm18, %zmm14 | |
vshuff64x2 $136, %zmm16, %zmm19, %zmm13 | |
vshuff64x2 $221, %zmm5, %zmm17, %zmm5 | |
vshuff64x2 $221, %zmm16, %zmm19, %zmm15 | |
vshuff64x2 $221, %zmm6, %zmm2, %zmm2 | |
vshuff64x2 $221, %zmm7, %zmm1, %zmm1 | |
vshuff64x2 $221, %zmm9, %zmm3, %zmm3 | |
vshuff64x2 $221, %zmm8, %zmm0, %zmm0 | |
vmovapd %zmm10, -960(%rdx) | |
vmovapd %zmm11, -896(%rdx) | |
vmovapd %zmm12, -832(%rdx) | |
vmovapd %zmm13, -768(%rdx) | |
vmovapd %zmm20, -704(%rdx) | |
vmovapd %zmm21, -640(%rdx) | |
vmovapd %zmm22, -576(%rdx) | |
vmovapd %zmm23, -512(%rdx) | |
vmovapd %zmm4, -448(%rdx) | |
vmovapd %zmm5, -384(%rdx) | |
vmovapd %zmm14, -320(%rdx) | |
vmovapd %zmm15, -256(%rdx) | |
vmovapd %zmm2, -192(%rdx) | |
vmovapd %zmm1, -128(%rdx) | |
vmovapd %zmm3, -64(%rdx) | |
vmovapd %zmm0, (%rdx) | |
addq $1024, %rdx | |
cmpq $112, %r14 | |
jb .LBB0_11 | |
incq %r8 | |
addq $204800, %r12 | |
addq $204800, %r13 | |
cmpq -352(%rbp), %r8 | |
jne .LBB0_10 | |
jmp .LBB0_13 | |
.LBB0_17: | |
xorl %eax, %eax | |
.loc 1 4 3 epilogue_begin | |
addq $184, %rsp | |
popq %rbx | |
popq %r12 | |
popq %r13 | |
popq %r14 | |
popq %r15 | |
popq %rbp | |
.cfi_def_cfa %rsp, 8 | |
vzeroupper | |
retq | |
.Ltmp1: | |
.Lfunc_end0: | |
.size turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32, .Lfunc_end0-turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32 | |
.cfi_endproc | |
.section .rodata.cst16,"aM",@progbits,16 | |
.p2align 4, 0x0 | |
.LCPI1_0: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 26 | |
.short 2 | |
.short 26 | |
.short 3 | |
.short 27 | |
.LCPI1_1: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 25 | |
.short 2 | |
.short 0 | |
.short 0 | |
.short 24 | |
.LCPI1_6: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 9 | |
.short 2 | |
.short 0 | |
.short 0 | |
.short 8 | |
.LCPI1_15: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 10 | |
.short 2 | |
.short 10 | |
.short 3 | |
.short 11 | |
.LCPI1_95: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 16 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_96: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 16 | |
.byte 0 | |
.byte 0 | |
.LCPI1_97: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 17 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_98: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 17 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_99: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 16 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_101: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 17 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_103: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 20 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_104: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 17 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_105: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 20 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_107: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 18 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_108: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 18 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_109: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 18 | |
.byte 0 | |
.byte 0 | |
.LCPI1_111: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 20 | |
.byte 0 | |
.byte 0 | |
.LCPI1_112: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 22 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_113: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 22 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_114: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 22 | |
.byte 0 | |
.byte 0 | |
.LCPI1_115: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 24 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_116: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 24 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_117: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 24 | |
.byte 0 | |
.byte 0 | |
.LCPI1_118: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 17 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_119: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 26 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_120: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 26 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_121: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 26 | |
.byte 0 | |
.byte 0 | |
.LCPI1_122: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 16 | |
.LCPI1_123: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 19 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_124: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 21 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_125: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 23 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_126: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 29 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_127: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 31 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_128: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 27 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_129: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 28 | |
.byte 0 | |
.byte 0 | |
.LCPI1_130: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 28 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_131: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 28 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_132: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 29 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_133: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 29 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_134: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 29 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_135: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 29 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_136: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 30 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_137: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 30 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_138: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 19 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_139: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 21 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_140: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 23 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_141: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 31 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_142: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 19 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_143: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 21 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_144: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 23 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_145: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 25 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_146: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 31 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_147: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 19 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_148: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 31 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_149: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 21 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_150: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 23 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_151: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 25 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_152: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 19 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_153: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 21 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_154: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 23 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_155: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 25 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_156: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 27 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_157: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 31 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.LCPI1_158: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 17 | |
.byte 0 | |
.byte 0 | |
.LCPI1_159: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 17 | |
.byte 0 | |
.LCPI1_160: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 19 | |
.byte 0 | |
.byte 0 | |
.LCPI1_161: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 21 | |
.byte 0 | |
.byte 0 | |
.LCPI1_162: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 23 | |
.byte 0 | |
.byte 0 | |
.LCPI1_163: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 25 | |
.byte 0 | |
.byte 0 | |
.LCPI1_164: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 27 | |
.byte 0 | |
.byte 0 | |
.LCPI1_165: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 30 | |
.byte 0 | |
.byte 0 | |
.LCPI1_166: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 31 | |
.byte 0 | |
.byte 0 | |
.LCPI1_167: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 19 | |
.byte 0 | |
.LCPI1_168: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 31 | |
.byte 0 | |
.LCPI1_169: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 21 | |
.byte 0 | |
.LCPI1_170: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 23 | |
.byte 0 | |
.LCPI1_171: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 25 | |
.byte 0 | |
.LCPI1_172: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 27 | |
.byte 0 | |
.LCPI1_173: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 29 | |
.byte 0 | |
.LCPI1_174: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 17 | |
.LCPI1_175: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 18 | |
.LCPI1_176: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 19 | |
.LCPI1_177: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 20 | |
.LCPI1_178: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 21 | |
.LCPI1_179: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 22 | |
.LCPI1_180: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 23 | |
.LCPI1_181: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 24 | |
.LCPI1_182: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 25 | |
.LCPI1_183: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 26 | |
.LCPI1_184: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 27 | |
.LCPI1_185: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 28 | |
.LCPI1_186: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 29 | |
.LCPI1_187: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 3 | |
.byte 4 | |
.byte 5 | |
.byte 6 | |
.byte 7 | |
.byte 8 | |
.byte 9 | |
.byte 10 | |
.byte 11 | |
.byte 12 | |
.byte 13 | |
.byte 14 | |
.byte 30 | |
.section .rodata.cst32,"aM",@progbits,32 | |
.p2align 5, 0x0 | |
.LCPI1_2: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 16 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_3: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 16 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_4: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 16 | |
.zero 2 | |
.zero 2 | |
.LCPI1_5: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 16 | |
.LCPI1_7: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 17 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_8: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 17 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_9: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 17 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_10: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 17 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_11: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 17 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_12: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 17 | |
.zero 2 | |
.zero 2 | |
.LCPI1_13: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 17 | |
.zero 2 | |
.LCPI1_14: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 17 | |
.LCPI1_16: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 18 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_17: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 18 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_18: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 18 | |
.zero 2 | |
.zero 2 | |
.LCPI1_19: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 18 | |
.LCPI1_20: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 19 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_21: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 19 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_22: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 19 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_23: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 19 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_24: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 19 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_25: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 19 | |
.zero 2 | |
.zero 2 | |
.LCPI1_26: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 19 | |
.zero 2 | |
.LCPI1_27: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 19 | |
.LCPI1_28: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 20 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_29: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 20 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_30: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 20 | |
.zero 2 | |
.zero 2 | |
.LCPI1_31: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 20 | |
.LCPI1_32: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 21 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_33: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 21 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_34: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 21 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_35: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 21 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_36: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 21 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_37: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 21 | |
.zero 2 | |
.zero 2 | |
.LCPI1_38: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 21 | |
.zero 2 | |
.LCPI1_39: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 21 | |
.LCPI1_40: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 22 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_41: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 22 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_42: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 22 | |
.zero 2 | |
.zero 2 | |
.LCPI1_43: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 22 | |
.LCPI1_44: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 23 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_45: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 23 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_46: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 23 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_47: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 23 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_48: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 23 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_49: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 23 | |
.zero 2 | |
.zero 2 | |
.LCPI1_50: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 23 | |
.zero 2 | |
.LCPI1_51: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 23 | |
.LCPI1_52: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 24 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_53: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 24 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_54: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 24 | |
.zero 2 | |
.zero 2 | |
.LCPI1_55: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 24 | |
.LCPI1_56: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 25 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_57: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 25 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_58: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 25 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_59: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 25 | |
.zero 2 | |
.zero 2 | |
.LCPI1_60: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 25 | |
.zero 2 | |
.LCPI1_61: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 25 | |
.LCPI1_62: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 26 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_63: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 26 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_64: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 26 | |
.zero 2 | |
.zero 2 | |
.LCPI1_65: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 26 | |
.LCPI1_67: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 27 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_68: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 27 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_69: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 27 | |
.zero 2 | |
.zero 2 | |
.LCPI1_70: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 27 | |
.zero 2 | |
.LCPI1_71: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 27 | |
.LCPI1_72: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 28 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_73: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 28 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_74: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 28 | |
.zero 2 | |
.zero 2 | |
.LCPI1_75: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 28 | |
.LCPI1_76: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 29 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_77: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 29 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_78: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 29 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_79: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 29 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_80: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 29 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_81: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 29 | |
.zero 2 | |
.LCPI1_82: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 29 | |
.LCPI1_83: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 30 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_84: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 30 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_85: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 30 | |
.zero 2 | |
.zero 2 | |
.LCPI1_86: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 14 | |
.short 30 | |
.LCPI1_87: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 31 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_88: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 31 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_89: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 31 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_90: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 31 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_91: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 31 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.LCPI1_92: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 31 | |
.zero 2 | |
.zero 2 | |
.LCPI1_93: | |
.short 0 | |
.short 1 | |
.short 2 | |
.short 3 | |
.short 4 | |
.short 5 | |
.short 6 | |
.short 7 | |
.short 8 | |
.short 9 | |
.short 10 | |
.short 11 | |
.short 12 | |
.short 13 | |
.short 31 | |
.zero 2 | |
.LCPI1_94: | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.short 11 | |
.short 27 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.zero 2 | |
.section .rodata.cst4,"aM",@progbits,4 | |
.p2align 2, 0x0 | |
.LCPI1_66: | |
.short 11 | |
.short 27 | |
.section .rodata.cst8,"aM",@progbits,8 | |
.LCPI1_100: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 26 | |
.byte 2 | |
.byte 26 | |
.byte 3 | |
.byte 27 | |
.LCPI1_102: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 9 | |
.byte 2 | |
.byte 0 | |
.byte 0 | |
.byte 8 | |
.LCPI1_106: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 10 | |
.byte 2 | |
.byte 10 | |
.byte 3 | |
.byte 11 | |
.LCPI1_110: | |
.byte 0 | |
.byte 1 | |
.byte 2 | |
.byte 25 | |
.byte 2 | |
.byte 0 | |
.byte 0 | |
.byte 24 | |
.section .text.turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack,"ax",@progbits | |
.p2align 4, 0x90 | |
.type turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack,@function | |
turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack: | |
.Lfunc_begin1: | |
.loc 1 1 0 is_stmt 1 | |
.cfi_startproc | |
pushq %rbp | |
.cfi_def_cfa_offset 16 | |
.cfi_offset %rbp, -16 | |
movq %rsp, %rbp | |
.cfi_def_cfa_register %rbp | |
.Ltmp2: | |
pushq %r15 | |
pushq %r14 | |
pushq %r13 | |
pushq %r12 | |
pushq %rbx | |
andq $-32, %rsp | |
subq $1792, %rsp | |
.cfi_offset %rbx, -56 | |
.cfi_offset %r12, -48 | |
.cfi_offset %r13, -40 | |
.cfi_offset %r14, -32 | |
.cfi_offset %r15, -24 | |
.loc 1 4 3 prologue_end | |
movq 24(%rsi), %r8 | |
movzwl 8(%rdx), %edi | |
movl 12(%r8), %r14d | |
movl 8(%r8), %ecx | |
movl %edi, %r9d | |
shll $6, %r9d | |
movq %r9, 16(%rsp) | |
shlq $32, %r14 | |
leaq (%r14,%rcx), %rax | |
movq %rax, 32(%rsp) | |
cmpq %rax, %r9 | |
jge .LBB1_14 | |
.loc 1 0 3 is_stmt 0 | |
movq 32(%rsi), %rbx | |
movl $15361, %r9d | |
movl 12(%rsi), %r11d | |
movl 16(%rsi), %eax | |
movzwl 20(%rsi), %r10d | |
movl $3538944000, %r15d | |
.loc 1 4 3 | |
orq %rcx, %r14 | |
vpmovsxbw .LCPI1_95(%rip), %ymm28 | |
vpmovsxbw .LCPI1_96(%rip), %ymm11 | |
vpmovsxbw .LCPI1_97(%rip), %ymm26 | |
vpmovsxbw .LCPI1_98(%rip), %ymm22 | |
subq 16(%rsp), %r14 | |
bextrq %r9, (%r8), %r9 | |
imulq %r15, %rdi | |
movq (%rbx), %rsi | |
movq 8(%rbx), %r8 | |
movl (%rdx), %ebx | |
movl 4(%rdx), %edx | |
imulq %r10, %r15 | |
shll $6, %r10d | |
movq %r11, %r12 | |
shlq $6, %r12 | |
movq %r10, 64(%rsp) | |
movq %r15, 72(%rsp) | |
imulq $409600, %rdx, %rcx | |
leaq (,%rdx,4), %r10 | |
movq %rdx, 80(%rsp) | |
movq %rbx, %rdx | |
shlq $11, %rdx | |
movq %rbx, 120(%rsp) | |
movq %r10, 56(%rsp) | |
addq %rcx, %rdi | |
addq %rdi, %rdx | |
leaq (,%rax,4), %rdi | |
imulq $409600, %rax, %rax | |
leaq (%rdx,%r9,2), %rdx | |
movq %rdi, 112(%rsp) | |
movq %rbx, %rdi | |
shlq $6, %rdi | |
leaq 307680(%r8,%rdx), %rdx | |
movq %rax, 104(%rsp) | |
movq %r11, %r8 | |
shlq $11, %r8 | |
shlq $7, %r11 | |
movq %rdi, 96(%rsp) | |
movq %rdx, 24(%rsp) | |
movq %rbx, %rdx | |
shlq $7, %rdx | |
addq %rcx, %rdx | |
leaq 403200(%rsi,%rdx), %rax | |
movq %rax, 48(%rsp) | |
jmp .LBB1_2 | |
.p2align 4, 0x90 | |
.LBB1_13: | |
.loc 1 0 3 | |
movq 24(%rsp), %rdx | |
movq 16(%rsp), %rax | |
movq 64(%rsp), %rcx | |
movq 88(%rsp), %r14 | |
.loc 1 4 3 | |
addq 72(%rsp), %rdx | |
addq %rcx, %rax | |
subq %rcx, %r14 | |
movq %rdx, 24(%rsp) | |
movq %rax, 16(%rsp) | |
cmpq 32(%rsp), %rax | |
jge .LBB1_14 | |
.LBB1_2: | |
cmpq $64, %r14 | |
movl $64, %edx | |
movl $1, %eax | |
movq %r14, 88(%rsp) | |
cmovlq %r14, %rdx | |
cmpq $2, %rdx | |
cmovlq %rax, %rdx | |
cmpl $134, 80(%rsp) | |
ja .LBB1_13 | |
.loc 1 0 3 | |
movq 32(%rsp), %rcx | |
movq 48(%rsp), %rax | |
movq 24(%rsp), %rdi | |
movq 56(%rsp), %r10 | |
.loc 1 4 3 | |
subq 16(%rsp), %rcx | |
movq %rax, 40(%rsp) | |
jmp .LBB1_4 | |
.p2align 4, 0x90 | |
.LBB1_12: | |
.loc 1 0 3 | |
movq 104(%rsp), %rax | |
.loc 1 4 3 | |
addq 112(%rsp), %r10 | |
addq %rax, 40(%rsp) | |
addq %rax, %rdi | |
cmpq $540, %r10 | |
jge .LBB1_13 | |
.LBB1_4: | |
.loc 1 0 3 | |
cmpl $49, 120(%rsp) | |
.loc 1 4 3 | |
ja .LBB1_12 | |
.loc 1 0 3 | |
movq 40(%rsp), %r9 | |
movq 96(%rsp), %rax | |
movq %rdi, %rbx | |
jmp .LBB1_6 | |
.p2align 4, 0x90 | |
.LBB1_11: | |
.loc 1 4 3 | |
addq %r12, %rax | |
addq %r8, %rbx | |
addq %r11, %r9 | |
cmpq $3200, %rax | |
jge .LBB1_12 | |
.LBB1_6: | |
.loc 1 0 3 | |
testq %rcx, %rcx | |
.loc 1 4 3 | |
jle .LBB1_11 | |
.loc 1 0 3 | |
movq %rbx, %r14 | |
xorl %esi, %esi | |
.p2align 4, 0x90 | |
.LBB1_8: | |
movq $-16, %r13 | |
movq %r14, %r15 | |
.p2align 4, 0x90 | |
.LBB1_9: | |
.loc 1 4 3 | |
vpbroadcastw -403148(%r9,%r13,2), %ymm0 | |
vpbroadcastw -396748(%r9,%r13,2), %xmm1 | |
vmovdqa64 -403152(%r9,%r13,2), %xmm23 | |
vmovdqa64 -390352(%r9,%r13,2), %xmm16 | |
vpbroadcastw -191948(%r9,%r13,2), %xmm2 | |
vmovdqa -403168(%r9,%r13,2), %xmm13 | |
vmovdqa -396768(%r9,%r13,2), %xmm12 | |
vmovdqa -390368(%r9,%r13,2), %xmm5 | |
vmovdqa -383968(%r9,%r13,2), %xmm6 | |
vmovdqa -377568(%r9,%r13,2), %xmm14 | |
vpmovsxbw .LCPI1_99(%rip), %ymm27 | |
vmovdqa -345568(%r9,%r13,2), %ymm10 | |
vmovdqa64 %ymm22, %ymm7 | |
vmovdqa64 -326368(%r9,%r13,2), %xmm22 | |
vmovdqa -319968(%r9,%r13,2), %ymm15 | |
vpmovsxbw .LCPI1_100(%rip), %xmm25 | |
vmovdqa64 %ymm26, %ymm17 | |
vpmovsxbw .LCPI1_101(%rip), %ymm29 | |
vpunpcklwd %xmm1, %xmm0, %xmm0 | |
vpbroadcastd -390348(%r9,%r13,2), %ymm1 | |
vmovdqa64 %ymm27, %ymm21 | |
vpblendd $2, %xmm1, %xmm0, %xmm0 | |
vmovdqa -396752(%r9,%r13,2), %xmm1 | |
vmovdqa %ymm0, 320(%rsp) | |
vpsrld $16, %xmm23, %xmm0 | |
vpblendw $1, %xmm0, %xmm1, %xmm0 | |
vmovdqa64 %xmm1, %xmm31 | |
vpsrld $16, %xmm16, %xmm1 | |
vpunpckldq %xmm1, %xmm0, %xmm24 | |
vpbroadcastw -300748(%r9,%r13,2), %ymm0 | |
vpbroadcastw -294348(%r9,%r13,2), %xmm1 | |
vpunpcklwd %xmm1, %xmm0, %xmm0 | |
vpbroadcastd -287948(%r9,%r13,2), %ymm1 | |
vpblendd $2, %xmm1, %xmm0, %xmm0 | |
vmovdqa -294352(%r9,%r13,2), %xmm1 | |
vmovdqa %ymm0, 288(%rsp) | |
vmovdqa -300752(%r9,%r13,2), %xmm0 | |
vmovdqa %xmm1, 192(%rsp) | |
vmovdqa %xmm0, 224(%rsp) | |
vpsrld $16, %xmm0, %xmm0 | |
vpblendw $1, %xmm0, %xmm1, %xmm0 | |
vmovdqa -287952(%r9,%r13,2), %xmm1 | |
vmovdqa %xmm1, 160(%rsp) | |
vpsrld $16, %xmm1, %xmm1 | |
vpunpckldq %xmm1, %xmm0, %xmm0 | |
vpbroadcastd -185548(%r9,%r13,2), %ymm1 | |
vmovdqa %ymm0, 256(%rsp) | |
vpbroadcastw -198348(%r9,%r13,2), %ymm0 | |
vpunpcklwd %xmm2, %xmm0, %xmm0 | |
vpunpcklwd %xmm12, %xmm13, %xmm2 | |
vpblendd $2, %xmm1, %xmm0, %xmm0 | |
vmovdqa -191952(%r9,%r13,2), %xmm1 | |
vpunpckldq %xmm5, %xmm2, %xmm4 | |
vmovaps -364768(%r9,%r13,2), %xmm2 | |
vmovdqa %ymm0, 128(%rsp) | |
vmovdqa -198352(%r9,%r13,2), %xmm0 | |
insertq $48, $16, %xmm6, %xmm4 | |
vmovdqa %xmm1, 384(%rsp) | |
vmovdqa %xmm0, 448(%rsp) | |
vpsrld $16, %xmm0, %xmm0 | |
vpblendw $1, %xmm0, %xmm1, %xmm0 | |
vmovdqa -185552(%r9,%r13,2), %xmm1 | |
vmovdqa %xmm1, 352(%rsp) | |
vpsrld $16, %xmm1, %xmm1 | |
vpunpckldq %xmm1, %xmm0, %xmm0 | |
vpbroadcastw -89548(%r9,%r13,2), %xmm1 | |
vmovdqa %ymm0, 416(%rsp) | |
vpbroadcastw -95948(%r9,%r13,2), %ymm0 | |
vpunpcklwd %xmm1, %xmm0, %xmm0 | |
vpbroadcastd -83148(%r9,%r13,2), %ymm1 | |
vpblendd $2, %xmm1, %xmm0, %xmm0 | |
vmovdqa -89552(%r9,%r13,2), %xmm1 | |
vmovdqa %ymm0, 1728(%rsp) | |
vmovdqa -95952(%r9,%r13,2), %xmm0 | |
vmovdqa %xmm1, 768(%rsp) | |
vmovdqa %xmm0, 784(%rsp) | |
vpsrld $16, %xmm0, %xmm0 | |
vpblendw $1, %xmm0, %xmm1, %xmm0 | |
vmovdqa -83152(%r9,%r13,2), %xmm1 | |
vpsrld $16, %xmm1, %xmm3 | |
vmovdqa %xmm1, 752(%rsp) | |
vpmovsxbw .LCPI1_102(%rip), %xmm1 | |
vpunpckldq %xmm3, %xmm0, %xmm0 | |
vmovdqa -371168(%r9,%r13,2), %xmm3 | |
vmovdqa %ymm0, 1696(%rsp) | |
vpunpcklqdq %xmm14, %xmm4, %xmm0 | |
vpbroadcastw %xmm3, %xmm4 | |
vpblendw $32, %xmm4, %xmm0, %xmm4 | |
vpbroadcastd -339168(%r9,%r13,2), %ymm0 | |
vinsertps $48, %xmm2, %xmm4, %xmm8 | |
vmovdqa -358368(%r9,%r13,2), %xmm4 | |
vpbroadcastw %xmm4, %xmm9 | |
vpblendw $128, %xmm9, %xmm8, %xmm8 | |
vinserti128 $1, -351968(%r9,%r13,2), %ymm8, %ymm9 | |
vinserti32x4 $1, %xmm22, %ymm8, %ymm8 | |
vpermt2w %ymm10, %ymm27, %ymm9 | |
vmovdqa64 %ymm7, %ymm27 | |
vpmovsxbw .LCPI1_103(%rip), %ymm7 | |
vpblendd $32, %ymm0, %ymm9, %ymm0 | |
vmovdqa -332768(%r9,%r13,2), %ymm9 | |
vpermt2w %ymm9, %ymm28, %ymm0 | |
vpmovsxbw .LCPI1_104(%rip), %ymm28 | |
vshufpd $2, %ymm8, %ymm0, %ymm0 | |
vpmovsxbw .LCPI1_105(%rip), %ymm8 | |
vpermt2w %ymm15, %ymm11, %ymm0 | |
vpbroadcastd -313568(%r9,%r13,2), %ymm11 | |
vpblendd $128, %ymm11, %ymm0, %ymm0 | |
vpsrld $16, %xmm5, %xmm11 | |
vmovdqa %ymm0, 704(%rsp) | |
vpsrld $16, %xmm13, %xmm0 | |
vpblendw $2, %xmm12, %xmm0, %xmm0 | |
vpunpckldq %xmm11, %xmm0, %xmm0 | |
vpsrld $16, %xmm14, %xmm11 | |
vpermt2w %xmm6, %xmm1, %xmm0 | |
vpmovsxbw .LCPI1_106(%rip), %xmm1 | |
vpunpcklqdq %xmm11, %xmm0, %xmm0 | |
vpbroadcastw -371166(%r9,%r13,2), %xmm11 | |
vpblendw $32, %xmm11, %xmm0, %xmm0 | |
vpslldq $10, %xmm2, %xmm11 | |
vpblendd $8, %xmm11, %xmm0, %xmm0 | |
vpbroadcastw -358366(%r9,%r13,2), %xmm11 | |
vpblendw $128, %xmm11, %xmm0, %xmm0 | |
vpbroadcastw -403164(%r9,%r13,2), %xmm11 | |
vmovdqa64 %ymm0, %ymm30 | |
vpbroadcastw -396764(%r9,%r13,2), %xmm0 | |
vpunpcklwd %xmm0, %xmm11, %xmm0 | |
vpslldq $6, %xmm3, %xmm11 | |
vpblendd $2, %xmm5, %xmm0, %xmm0 | |
vpermt2w %xmm6, %xmm1, %xmm0 | |
vpmovsxbw .LCPI1_107(%rip), %ymm1 | |
vshufps $212, %xmm14, %xmm0, %xmm0 | |
vpblendw $32, %xmm11, %xmm0, %xmm0 | |
vpslldq $10, %xmm4, %xmm11 | |
vinsertps $112, %xmm2, %xmm0, %xmm0 | |
vpblendw $128, %xmm11, %xmm0, %xmm0 | |
vpbroadcastd -351964(%r9,%r13,2), %ymm11 | |
vpblendd $240, %ymm11, %ymm0, %ymm11 | |
vpermt2w %ymm10, %ymm1, %ymm11 | |
vinserti128 $1, -339168(%r9,%r13,2), %ymm0, %ymm1 | |
vpbroadcastd -326364(%r9,%r13,2), %ymm0 | |
vpblendd $34, %ymm1, %ymm11, %ymm1 | |
vpmovsxbw .LCPI1_108(%rip), %ymm11 | |
vpermt2w %ymm9, %ymm11, %ymm1 | |
vpmovsxbw .LCPI1_109(%rip), %ymm11 | |
vpblendd $192, %ymm0, %ymm1, %ymm0 | |
vpbroadcastd -313564(%r9,%r13,2), %ymm1 | |
vpermt2w %ymm15, %ymm11, %ymm0 | |
vpbroadcastq -351960(%r9,%r13,2), %ymm11 | |
vpblendd $128, %ymm1, %ymm0, %ymm0 | |
vpsrlq $48, %xmm13, %xmm1 | |
vmovdqa %ymm0, 672(%rsp) | |
vpsrlq $48, %xmm12, %xmm0 | |
vpunpcklwd %xmm0, %xmm1, %xmm0 | |
vpsrlq $48, %xmm5, %xmm1 | |
vpunpckldq %xmm1, %xmm0, %xmm0 | |
vpsrlq $48, %xmm14, %xmm1 | |
vpblendw $8, %xmm6, %xmm0, %xmm0 | |
vpunpcklqdq %xmm1, %xmm0, %xmm0 | |
vpbroadcastw -371162(%r9,%r13,2), %xmm1 | |
vpblendw $32, %xmm1, %xmm0, %xmm0 | |
vpmovzxwd %xmm2, %xmm1 | |
vpblendd $8, %xmm1, %xmm0, %xmm0 | |
vpbroadcastw -358362(%r9,%r13,2), %xmm1 | |
vpblendw $128, %xmm1, %xmm0, %xmm0 | |
vpbroadcastw -403160(%r9,%r13,2), %xmm1 | |
vmovdqa64 %ymm0, %ymm20 | |
vpbroadcastw -396760(%r9,%r13,2), %xmm0 | |
vpunpcklwd %xmm0, %xmm1, %xmm0 | |
vpsrldq $2, %xmm6, %xmm1 | |
vinsertps $156, %xmm5, %xmm0, %xmm0 | |
vpblendw $8, %xmm1, %xmm0, %xmm0 | |
vpslld $16, %xmm3, %xmm1 | |
vpblendd $3, %xmm0, %xmm14, %xmm0 | |
vpblendw $32, %xmm1, %xmm0, %xmm0 | |
vpsllq $48, %xmm4, %xmm1 | |
vinsertps $176, %xmm2, %xmm0, %xmm0 | |
vpblendw $128, %xmm1, %xmm0, %xmm0 | |
vpblendd $240, %ymm11, %ymm0, %ymm1 | |
vpbroadcastd -339160(%r9,%r13,2), %ymm11 | |
vinserti32x4 $1, %xmm22, %ymm0, %ymm0 | |
vpermt2w %ymm10, %ymm8, %ymm1 | |
vpmovsxbw .LCPI1_110(%rip), %xmm8 | |
vpblendd $32, %ymm11, %ymm1, %ymm1 | |
vpbroadcastd -313560(%r9,%r13,2), %ymm11 | |
vmovdqa64 %ymm8, %ymm26 | |
vpermt2w %ymm9, %ymm7, %ymm1 | |
vmovdqa -371152(%r9,%r13,2), %xmm7 | |
vpblendd $204, %ymm0, %ymm1, %ymm0 | |
vpmovsxbw .LCPI1_111(%rip), %ymm1 | |
vpermt2w %ymm15, %ymm1, %ymm0 | |
vpsrldq $10, %xmm13, %xmm1 | |
vpblendd $128, %ymm11, %ymm0, %ymm0 | |
vmovdqa64 %ymm0, %ymm22 | |
vpsrldq $10, %xmm12, %xmm0 | |
vpunpcklwd %xmm0, %xmm1, %xmm0 | |
vpsrldq $10, %xmm5, %xmm1 | |
vpunpckldq %xmm1, %xmm0, %xmm0 | |
vpbroadcastw -383958(%r9,%r13,2), %xmm1 | |
vpblendw $8, %xmm1, %xmm0, %xmm0 | |
vpsrldq $10, %xmm14, %xmm1 | |
vpunpcklqdq %xmm1, %xmm0, %xmm0 | |
vpsllq $16, %xmm2, %xmm1 | |
vpblendw $32, %xmm3, %xmm0, %xmm0 | |
vpblendd $8, %xmm1, %xmm0, %xmm0 | |
vpbroadcastw -358358(%r9,%r13,2), %xmm1 | |
vpblendw $128, %xmm1, %xmm0, %xmm0 | |
vpbroadcastw -403156(%r9,%r13,2), %xmm1 | |
vmovdqa64 %ymm0, %ymm18 | |
vpbroadcastw -396756(%r9,%r13,2), %xmm0 | |
vpunpcklwd %xmm0, %xmm1, %xmm0 | |
vpsrldq $6, %xmm6, %xmm1 | |
vpmovsxbw .LCPI1_112(%rip), %ymm6 | |
vinsertps $220, %xmm5, %xmm0, %xmm0 | |
vpblendw $8, %xmm1, %xmm0, %xmm0 | |
vpsrlq $16, %xmm3, %xmm1 | |
vpslld $16, %xmm4, %xmm3 | |
vshufps $244, %xmm14, %xmm0, %xmm0 | |
vpblendw $32, %xmm1, %xmm0, %xmm0 | |
vpbroadcastd -351956(%r9,%r13,2), %ymm1 | |
vpblendd $8, %xmm2, %xmm0, %xmm0 | |
vpblendw $128, %xmm3, %xmm0, %xmm0 | |
vpbroadcastd -339156(%r9,%r13,2), %ymm3 | |
vpblendd $240, %ymm1, %ymm0, %ymm1 | |
vinserti128 $1, -313568(%r9,%r13,2), %ymm0, %ymm0 | |
vpermt2w %ymm10, %ymm6, %ymm1 | |
vpsrldq $10, %xmm31, %xmm6 | |
vpblendd $32, %ymm3, %ymm1, %ymm1 | |
vpmovsxbw .LCPI1_113(%rip), %ymm3 | |
vpermt2w %ymm9, %ymm3, %ymm1 | |
vpbroadcastd -326356(%r9,%r13,2), %ymm3 | |
vpblendd $192, %ymm3, %ymm1, %ymm1 | |
vpmovsxbw .LCPI1_114(%rip), %ymm3 | |
vpermt2w %ymm15, %ymm3, %ymm1 | |
vpsrldq $14, %xmm13, %xmm3 | |
vpblendd $136, %ymm0, %ymm1, %ymm0 | |
vpsrldq $14, %xmm5, %xmm1 | |
vmovdqa %ymm0, 640(%rsp) | |
vpsrldq $14, %xmm12, %xmm0 | |
vmovdqa -383952(%r9,%r13,2), %xmm12 | |
vpunpcklwd %xmm0, %xmm3, %xmm0 | |
vpbroadcastd -339152(%r9,%r13,2), %ymm3 | |
vpunpckldq %xmm1, %xmm0, %xmm0 | |
vpbroadcastw -383954(%r9,%r13,2), %xmm1 | |
vpblendw $8, %xmm1, %xmm0, %xmm0 | |
vpsrldq $14, %xmm14, %xmm1 | |
vmovdqa -377552(%r9,%r13,2), %xmm14 | |
vpunpcklqdq %xmm1, %xmm0, %xmm0 | |
vpbroadcastw -371154(%r9,%r13,2), %xmm1 | |
vpblendw $32, %xmm1, %xmm0, %xmm0 | |
vpsrld $16, %xmm2, %xmm1 | |
vpblendd $8, %xmm1, %xmm0, %xmm0 | |
vpbroadcastw %xmm7, %xmm1 | |
vpblendw $128, %xmm4, %xmm0, %xmm13 | |
vpbroadcastw -403152(%r9,%r13,2), %ymm0 | |
vpmovsxbw .LCPI1_115(%rip), %ymm4 | |
vpunpcklwd %xmm31, %xmm0, %xmm0 | |
vpunpckldq %xmm16, %xmm0, %xmm0 | |
insertq $48, $16, %xmm12, %xmm0 | |
vpunpcklqdq %xmm14, %xmm0, %xmm0 | |
vpblendw $32, %xmm1, %xmm0, %xmm0 | |
vpbroadcastd -364752(%r9,%r13,2), %xmm1 | |
vpblendd $8, %xmm1, %xmm0, %xmm1 | |
vmovdqa -358352(%r9,%r13,2), %xmm0 | |
vpbroadcastw %xmm0, %xmm2 | |
vpblendw $128, %xmm2, %xmm1, %xmm1 | |
vmovdqa -351968(%r9,%r13,2), %ymm2 | |
vpblendd $240, %ymm2, %ymm1, %ymm1 | |
vpermt2w %ymm2, %ymm17, %ymm30 | |
vmovdqa64 %ymm18, %ymm17 | |
vmovdqa64 %ymm13, %ymm18 | |
vpermt2w %ymm10, %ymm4, %ymm1 | |
vpmovsxbw .LCPI1_116(%rip), %ymm4 | |
vpermt2w %ymm10, %ymm27, %ymm30 | |
vpblendd $32, %ymm3, %ymm1, %ymm1 | |
vpbroadcastq -326352(%r9,%r13,2), %ymm3 | |
vpermt2w %ymm9, %ymm4, %ymm1 | |
vpbroadcastw -371150(%r9,%r13,2), %xmm4 | |
vpblendd $192, %ymm3, %ymm1, %ymm1 | |
vpmovsxbw .LCPI1_117(%rip), %ymm3 | |
vpermt2w %ymm15, %ymm3, %ymm1 | |
vpbroadcastd -313552(%r9,%r13,2), %ymm3 | |
vpblendd $128, %ymm3, %ymm1, %ymm1 | |
vpsrld $16, %xmm14, %xmm3 | |
vmovdqa %ymm1, 608(%rsp) | |
vmovdqa -383968(%r9,%r13,2), %ymm1 | |
vpermt2w %ymm1, %ymm8, %ymm24 | |
vpunpcklqdq %xmm3, %xmm24, %xmm3 | |
vpmovsxbw .LCPI1_118(%rip), %ymm24 | |
vpblendw $32, %xmm4, %xmm3, %xmm4 | |
vmovapd -364752(%r9,%r13,2), %xmm3 | |
vpslldq $10, %xmm3, %xmm5 | |
vpblendd $8, %xmm5, %xmm4, %xmm4 | |
vpbroadcastw -358350(%r9,%r13,2), %xmm5 | |
vpblendw $128, %xmm5, %xmm4, %xmm4 | |
vpshuflw $85, %ymm2, %ymm5 | |
vpblendw $2, %ymm10, %ymm5, %ymm5 | |
vpblendd $240, %ymm5, %ymm4, %ymm4 | |
vpmovsxbw .LCPI1_119(%rip), %ymm5 | |
vmovdqa64 %ymm4, %ymm19 | |
vmovdqa 320(%rsp), %ymm4 | |
vpermt2w %ymm1, %ymm25, %ymm4 | |
vmovdqa64 %ymm21, %ymm25 | |
vshufps $212, %xmm14, %xmm4, %xmm1 | |
vpslldq $6, %xmm7, %xmm4 | |
vpblendw $32, %xmm4, %xmm1, %xmm1 | |
vpbroadcastd -364748(%r9,%r13,2), %ymm4 | |
vpblendd $8, %xmm4, %xmm1, %xmm1 | |
vpslldq $10, %xmm0, %xmm4 | |
vpblendw $128, %xmm4, %xmm1, %xmm1 | |
vpbroadcastd -351948(%r9,%r13,2), %ymm4 | |
vpblendd $240, %ymm4, %ymm1, %ymm4 | |
vpmovsxbw .LCPI1_120(%rip), %ymm1 | |
vpermt2w %ymm10, %ymm1, %ymm4 | |
vmovdqa -339168(%r9,%r13,2), %ymm1 | |
vpblendd $32, %ymm1, %ymm4, %ymm4 | |
vpermt2w %ymm1, %ymm28, %ymm30 | |
vpmovsxbw .LCPI1_100(%rip), %xmm28 | |
vpermt2w %ymm9, %ymm5, %ymm4 | |
vpbroadcastd -326348(%r9,%r13,2), %ymm5 | |
vpermt2w %ymm9, %ymm29, %ymm30 | |
vpblendd $192, %ymm5, %ymm4, %ymm4 | |
vpmovsxbw .LCPI1_121(%rip), %ymm5 | |
vpermt2w %ymm15, %ymm5, %ymm4 | |
vpbroadcastd -313548(%r9,%r13,2), %ymm5 | |
vpblendd $128, %ymm5, %ymm4, %ymm4 | |
vpsrlq $48, %xmm31, %xmm5 | |
vmovdqa %ymm4, 320(%rsp) | |
vpsrlq $48, %xmm23, %xmm4 | |
vpunpcklwd %xmm5, %xmm4, %xmm4 | |
vpsrlq $48, %xmm16, %xmm5 | |
vpunpckldq %xmm5, %xmm4, %xmm4 | |
vpsrlq $48, %xmm14, %xmm5 | |
vpblendw $8, %xmm12, %xmm4, %xmm4 | |
vpunpcklqdq %xmm5, %xmm4, %xmm4 | |
vpbroadcastw -371146(%r9,%r13,2), %xmm5 | |
vpblendw $32, %xmm5, %xmm4, %xmm4 | |
vpmovzxwd %xmm3, %xmm5 | |
vpblendd $8, %xmm5, %xmm4, %xmm4 | |
vpbroadcastw -358346(%r9,%r13,2), %xmm5 | |
vpblendw $128, %xmm5, %xmm4, %xmm4 | |
vpsrldq $10, %xmm23, %xmm5 | |
vpunpcklwd %xmm6, %xmm5, %xmm5 | |
vpsrldq $10, %xmm16, %xmm6 | |
vpunpckldq %xmm6, %xmm5, %xmm5 | |
vpbroadcastw -383942(%r9,%r13,2), %xmm6 | |
vpblendw $8, %xmm6, %xmm5, %xmm5 | |
vpsrldq $10, %xmm14, %xmm6 | |
vpunpcklqdq %xmm6, %xmm5, %xmm5 | |
vpsllq $16, %xmm3, %xmm6 | |
vpsrld $16, %xmm3, %xmm3 | |
vpblendw $32, %xmm7, %xmm5, %xmm5 | |
vpblendd $8, %xmm6, %xmm5, %xmm5 | |
vpbroadcastw -358342(%r9,%r13,2), %xmm6 | |
vpblendw $128, %xmm6, %xmm5, %xmm11 | |
vpsrldq $14, %xmm23, %xmm6 | |
vpsrldq $14, %xmm31, %xmm5 | |
vpmovsxbw .LCPI1_122(%rip), %ymm23 | |
vpunpcklwd %xmm5, %xmm6, %xmm5 | |
vpsrldq $14, %xmm16, %xmm6 | |
vpunpckldq %xmm6, %xmm5, %xmm5 | |
vpbroadcastw -383938(%r9,%r13,2), %xmm6 | |
vpblendw $8, %xmm6, %xmm5, %xmm5 | |
vpsrldq $14, %xmm14, %xmm6 | |
vpunpcklqdq %xmm6, %xmm5, %xmm5 | |
vpbroadcastw -371138(%r9,%r13,2), %xmm6 | |
vpblendw $32, %xmm6, %xmm5, %xmm5 | |
vpbroadcastw -396740(%r9,%r13,2), %xmm6 | |
vpblendd $8, %xmm3, %xmm5, %xmm3 | |
vpmovsxbw .LCPI1_123(%rip), %ymm5 | |
vpermt2w %ymm2, %ymm5, %ymm20 | |
vpmovsxbw .LCPI1_124(%rip), %ymm5 | |
vpermt2w %ymm2, %ymm5, %ymm17 | |
vpmovsxbw .LCPI1_125(%rip), %ymm5 | |
vpermt2w %ymm2, %ymm5, %ymm18 | |
vpmovsxbw .LCPI1_126(%rip), %ymm5 | |
vpermt2w %ymm2, %ymm5, %ymm11 | |
vpblendw $128, %xmm0, %xmm3, %xmm5 | |
vpmovsxbw .LCPI1_127(%rip), %ymm3 | |
vpermt2w %ymm2, %ymm3, %ymm5 | |
vpbroadcastd .LCPI1_66(%rip), %ymm3 | |
vpermt2w %ymm10, %ymm3, %ymm2 | |
vpmovsxbw .LCPI1_128(%rip), %ymm3 | |
vpermt2w %ymm1, %ymm3, %ymm2 | |
vpbroadcastw -396744(%r9,%r13,2), %xmm3 | |
vpblendw $8, %ymm9, %ymm2, %ymm2 | |
vpblendd $240, %ymm2, %ymm4, %ymm13 | |
vpbroadcastw -403144(%r9,%r13,2), %ymm2 | |
vpmovsxbw .LCPI1_129(%rip), %ymm4 | |
vpunpcklwd %xmm3, %xmm2, %xmm2 | |
vunpcklps -390344(%r9,%r13,2){1to4}, %xmm2, %xmm2 | |
vpsrldq $2, %xmm12, %xmm3 | |
vpblendw $8, %xmm3, %xmm2, %xmm2 | |
vpslld $16, %xmm7, %xmm3 | |
vpblendd $3, %xmm2, %xmm14, %xmm2 | |
vpblendw $32, %xmm3, %xmm2, %xmm2 | |
vpbroadcastd -364744(%r9,%r13,2), %xmm3 | |
vpblendd $8, %xmm3, %xmm2, %xmm2 | |
vpsllq $48, %xmm0, %xmm3 | |
vpslld $16, %xmm0, %xmm0 | |
vpblendw $128, %xmm3, %xmm2, %xmm2 | |
vpbroadcastq -351944(%r9,%r13,2), %ymm3 | |
vpblendd $240, %ymm3, %ymm2, %ymm2 | |
vpmovsxbw .LCPI1_130(%rip), %ymm3 | |
vpermt2w %ymm10, %ymm3, %ymm2 | |
vpbroadcastd -339144(%r9,%r13,2), %ymm3 | |
vpblendd $32, %ymm3, %ymm2, %ymm3 | |
vpmovsxbw .LCPI1_131(%rip), %ymm2 | |
vpermt2w %ymm9, %ymm2, %ymm3 | |
vmovdqa -326368(%r9,%r13,2), %ymm2 | |
vpblendd $192, %ymm2, %ymm3, %ymm3 | |
vpermt2w %ymm2, %ymm24, %ymm30 | |
vpmovsxbw .LCPI1_105(%rip), %ymm24 | |
vpermt2w %ymm15, %ymm4, %ymm3 | |
vpbroadcastd -313544(%r9,%r13,2), %ymm4 | |
vpblendd $128, %ymm4, %ymm3, %ymm8 | |
vpmovsxbw .LCPI1_132(%rip), %ymm4 | |
vmovdqa %ymm11, %ymm3 | |
vpermt2w %ymm10, %ymm4, %ymm3 | |
vpmovsxbw .LCPI1_133(%rip), %ymm4 | |
vpermt2w %ymm1, %ymm4, %ymm3 | |
vpmovsxbw .LCPI1_134(%rip), %ymm4 | |
vpermt2w %ymm9, %ymm4, %ymm3 | |
vpmovsxbw .LCPI1_135(%rip), %ymm4 | |
vpermt2w %ymm2, %ymm4, %ymm3 | |
vpbroadcastd -390340(%r9,%r13,2), %ymm4 | |
vpblendw $32, %ymm15, %ymm3, %ymm3 | |
vpblendd $240, %ymm3, %ymm11, %ymm11 | |
vpbroadcastw -403140(%r9,%r13,2), %ymm3 | |
vpunpcklwd %xmm6, %xmm3, %xmm3 | |
vpblendd $2, %xmm4, %xmm3, %xmm3 | |
vpsrldq $6, %xmm12, %xmm4 | |
vmovdqa64 %ymm18, %ymm12 | |
vmovdqa64 160(%rsp), %xmm18 | |
vpblendw $8, %xmm4, %xmm3, %xmm3 | |
vpsrlq $16, %xmm7, %xmm4 | |
vmovdqa64 %ymm17, %ymm7 | |
vshufps $244, %xmm14, %xmm3, %xmm3 | |
vpmovsxbw .LCPI1_107(%rip), %ymm14 | |
vpblendw $32, %xmm4, %xmm3, %xmm3 | |
vpbroadcastd -364740(%r9,%r13,2), %ymm4 | |
vpblendd $8, %xmm4, %xmm3, %xmm3 | |
vpblendw $128, %xmm0, %xmm3, %xmm0 | |
vpbroadcastd -351940(%r9,%r13,2), %ymm3 | |
vpblendd $240, %ymm3, %ymm0, %ymm0 | |
vpmovsxbw .LCPI1_136(%rip), %ymm3 | |
vpermt2w %ymm10, %ymm3, %ymm0 | |
vpbroadcastd -339140(%r9,%r13,2), %ymm3 | |
vpblendd $32, %ymm3, %ymm0, %ymm0 | |
vpmovsxbw .LCPI1_137(%rip), %ymm3 | |
vpermt2w %ymm9, %ymm3, %ymm0 | |
vpbroadcastd -326340(%r9,%r13,2), %ymm3 | |
vpblendd $192, %ymm3, %ymm0, %ymm0 | |
vpmovsxbw .LCPI1_138(%rip), %ymm3 | |
vpermt2w %ymm10, %ymm3, %ymm20 | |
vpmovsxbw .LCPI1_139(%rip), %ymm3 | |
vpermt2w %ymm10, %ymm3, %ymm7 | |
vpmovsxbw .LCPI1_140(%rip), %ymm3 | |
vpermt2w %ymm10, %ymm3, %ymm12 | |
vpmovsxbw .LCPI1_141(%rip), %ymm3 | |
vpermt2w %ymm10, %ymm3, %ymm5 | |
vpmovsxbw .LCPI1_142(%rip), %ymm3 | |
vmovdqa -217568(%r9,%r13,2), %ymm10 | |
vpermt2w %ymm1, %ymm3, %ymm20 | |
vpmovsxbw .LCPI1_143(%rip), %ymm3 | |
vpermt2w %ymm1, %ymm3, %ymm7 | |
vpmovsxbw .LCPI1_144(%rip), %ymm3 | |
vpermt2w %ymm1, %ymm3, %ymm12 | |
vpmovsxbw .LCPI1_145(%rip), %ymm3 | |
vpermt2w %ymm1, %ymm3, %ymm19 | |
vpmovsxbw .LCPI1_146(%rip), %ymm3 | |
vpermt2w %ymm1, %ymm3, %ymm5 | |
vpmovsxbw .LCPI1_147(%rip), %ymm1 | |
vpmovsxbw .LCPI1_148(%rip), %ymm3 | |
vpermt2w %ymm9, %ymm1, %ymm20 | |
vpmovsxbw .LCPI1_149(%rip), %ymm1 | |
vpermt2w %ymm9, %ymm1, %ymm7 | |
vpmovsxbw .LCPI1_150(%rip), %ymm1 | |
vpermt2w %ymm9, %ymm1, %ymm12 | |
vpmovsxbw .LCPI1_151(%rip), %ymm1 | |
vpermt2w %ymm9, %ymm1, %ymm19 | |
vmovdqa %ymm5, %ymm1 | |
vpermt2w %ymm9, %ymm3, %ymm1 | |
vpmovsxbw .LCPI1_152(%rip), %ymm3 | |
vpermt2w %ymm2, %ymm3, %ymm20 | |
vpmovsxbw .LCPI1_153(%rip), %ymm3 | |
vpermt2w %ymm2, %ymm3, %ymm7 | |
vpmovsxbw .LCPI1_154(%rip), %ymm3 | |
vpermt2w %ymm2, %ymm3, %ymm12 | |
vpmovsxbw .LCPI1_155(%rip), %ymm3 | |
vpermt2w %ymm2, %ymm3, %ymm19 | |
vpmovsxbw .LCPI1_156(%rip), %ymm3 | |
vpermt2w %ymm2, %ymm3, %ymm13 | |
vpmovsxbw .LCPI1_157(%rip), %ymm3 | |
vpermt2w %ymm2, %ymm3, %ymm1 | |
vpmovsxbw .LCPI1_158(%rip), %ymm2 | |
vpmovsxbw .LCPI1_159(%rip), %ymm3 | |
vpermt2w %ymm15, %ymm2, %ymm30 | |
vpmovsxbw .LCPI1_160(%rip), %ymm2 | |
vpermt2w %ymm15, %ymm2, %ymm20 | |
vpmovsxbw .LCPI1_161(%rip), %ymm2 | |
vpermt2w %ymm15, %ymm2, %ymm7 | |
vpmovsxbw .LCPI1_162(%rip), %ymm2 | |
vpermt2w %ymm15, %ymm2, %ymm12 | |
vpmovsxbw .LCPI1_163(%rip), %ymm2 | |
vpermt2w %ymm15, %ymm2, %ymm19 | |
vpmovsxbw .LCPI1_164(%rip), %ymm2 | |
vpermt2w %ymm15, %ymm2, %ymm13 | |
vpmovsxbw .LCPI1_165(%rip), %ymm2 | |
vpermt2w %ymm15, %ymm2, %ymm0 | |
vpmovsxbw .LCPI1_166(%rip), %ymm2 | |
vpermt2w %ymm15, %ymm2, %ymm1 | |
vmovdqa -313568(%r9,%r13,2), %ymm2 | |
vmovdqa -255968(%r9,%r13,2), %xmm15 | |
vpermt2w %ymm2, %ymm3, %ymm30 | |
vpmovsxbw .LCPI1_167(%rip), %ymm3 | |
vpblendd $128, %ymm2, %ymm0, %ymm9 | |
vpmovsxbw .LCPI1_168(%rip), %ymm0 | |
vpbroadcastw %xmm15, %xmm6 | |
vpermt2w %ymm2, %ymm3, %ymm20 | |
vpmovsxbw .LCPI1_169(%rip), %ymm3 | |
vpermt2w %ymm2, %ymm0, %ymm1 | |
vmovdqa -307168(%r9,%r13,2), %ymm0 | |
vpermt2w %ymm2, %ymm3, %ymm7 | |
vpmovsxbw .LCPI1_170(%rip), %ymm3 | |
vpermt2w %ymm2, %ymm3, %ymm12 | |
vpmovsxbw .LCPI1_171(%rip), %ymm3 | |
vpermt2w %ymm2, %ymm3, %ymm19 | |
vpmovsxbw .LCPI1_172(%rip), %ymm3 | |
vpermt2w %ymm2, %ymm3, %ymm13 | |
vpmovsxbw .LCPI1_173(%rip), %ymm3 | |
vpermt2w %ymm2, %ymm3, %ymm11 | |
vmovdqa 704(%rsp), %ymm2 | |
vmovdqa 672(%rsp), %ymm3 | |
vpermt2w %ymm0, %ymm23, %ymm2 | |
vpbroadcastd .LCPI1_66(%rip), %ymm23 | |
vmovdqa %ymm2, 704(%rsp) | |
vpmovsxbw .LCPI1_174(%rip), %ymm2 | |
vpermt2w %ymm0, %ymm2, %ymm30 | |
vpmovsxbw .LCPI1_175(%rip), %ymm2 | |
vmovdqa64 %ymm30, 1344(%rsp) | |
vpermt2w %ymm0, %ymm2, %ymm3 | |
vpmovsxbw .LCPI1_176(%rip), %ymm2 | |
vmovdqa %ymm3, 672(%rsp) | |
vmovdqa 640(%rsp), %ymm3 | |
vpermt2w %ymm0, %ymm2, %ymm20 | |
vpmovsxbw .LCPI1_177(%rip), %ymm2 | |
vmovdqa64 %ymm20, 1664(%rsp) | |
vpmovsxbw .LCPI1_126(%rip), %ymm20 | |
vpermt2w %ymm0, %ymm2, %ymm22 | |
vpmovsxbw .LCPI1_178(%rip), %ymm2 | |
vmovdqa64 %ymm22, 1632(%rsp) | |
vpermt2w %ymm0, %ymm2, %ymm7 | |
vpmovsxbw .LCPI1_179(%rip), %ymm2 | |
vmovdqa %ymm7, 1600(%rsp) | |
vmovdqa -243168(%r9,%r13,2), %ymm7 | |
vpermt2w %ymm0, %ymm2, %ymm3 | |
vpmovsxbw .LCPI1_180(%rip), %ymm2 | |
vmovdqa %ymm3, 640(%rsp) | |
vmovdqa 608(%rsp), %ymm3 | |
vpermt2w %ymm0, %ymm2, %ymm12 | |
vpmovsxbw .LCPI1_181(%rip), %ymm2 | |
vmovdqa %ymm12, 1568(%rsp) | |
vpermt2w %ymm0, %ymm2, %ymm3 | |
vpmovsxbw .LCPI1_182(%rip), %ymm2 | |
vmovdqa %ymm3, 608(%rsp) | |
vmovdqa 320(%rsp), %ymm3 | |
vpermt2w %ymm0, %ymm2, %ymm19 | |
vpmovsxbw .LCPI1_183(%rip), %ymm2 | |
vmovdqa64 %ymm19, 1536(%rsp) | |
vpmovsxbw .LCPI1_97(%rip), %ymm19 | |
vpermt2w %ymm0, %ymm2, %ymm3 | |
vpmovsxbw .LCPI1_184(%rip), %ymm2 | |
vmovdqa %ymm3, 320(%rsp) | |
vpermt2w %ymm0, %ymm2, %ymm13 | |
vpmovsxbw .LCPI1_185(%rip), %ymm2 | |
vmovdqa %ymm13, 1504(%rsp) | |
vpmovsxbw .LCPI1_95(%rip), %ymm13 | |
vpermt2w %ymm0, %ymm2, %ymm8 | |
vpmovsxbw .LCPI1_186(%rip), %ymm2 | |
vmovdqa %ymm8, 1472(%rsp) | |
vmovdqa -287968(%r9,%r13,2), %xmm8 | |
vpermt2w %ymm0, %ymm2, %ymm11 | |
vpmovsxbw .LCPI1_187(%rip), %ymm2 | |
vmovdqa %ymm11, 1440(%rsp) | |
vpbroadcastd -236768(%r9,%r13,2), %ymm11 | |
vpermt2w %ymm0, %ymm2, %ymm9 | |
vpblendw $128, %ymm0, %ymm1, %ymm2 | |
vmovdqa -300768(%r9,%r13,2), %xmm0 | |
vmovdqa -294368(%r9,%r13,2), %xmm1 | |
vpblendd $240, %ymm2, %ymm5, %ymm2 | |
vmovdqa %ymm2, 1376(%rsp) | |
vmovdqa %ymm9, 1408(%rsp) | |
vmovdqa -275168(%r9,%r13,2), %xmm9 | |
vpunpcklwd %xmm1, %xmm0, %xmm3 | |
vpunpckldq %xmm8, %xmm3, %xmm4 | |
vmovdqa -281568(%r9,%r13,2), %xmm3 | |
insertq $48, $16, %xmm3, %xmm4 | |
vpunpcklqdq %xmm9, %xmm4, %xmm2 | |
vmovdqa -268768(%r9,%r13,2), %xmm4 | |
vpbroadcastw %xmm4, %xmm5 | |
vpblendw $32, %xmm5, %xmm2, %xmm5 | |
vmovaps -262368(%r9,%r13,2), %xmm2 | |
vinsertps $48, %xmm2, %xmm5, %xmm5 | |
vpblendw $128, %xmm6, %xmm5, %xmm5 | |
vinserti128 $1, -249568(%r9,%r13,2), %ymm5, %ymm6 | |
vpermt2w %ymm7, %ymm21, %ymm6 | |
vpblendd $32, %ymm11, %ymm6, %ymm12 | |
vmovdqa -230368(%r9,%r13,2), %ymm6 | |
vmovdqa -223968(%r9,%r13,2), %xmm11 | |
vpermt2w %ymm6, %ymm13, %ymm12 | |
vpmovsxbw .LCPI1_96(%rip), %ymm13 | |
vinserti128 $1, %xmm11, %ymm5, %ymm5 | |
vshufpd $2, %ymm5, %ymm12, %ymm12 | |
vpermt2w %ymm10, %ymm13, %ymm12 | |
vpbroadcastd -211168(%r9,%r13,2), %ymm13 | |
vpblendd $128, %ymm13, %ymm12, %ymm12 | |
vpsrld $16, %xmm8, %xmm13 | |
vmovdqa64 %ymm12, %ymm17 | |
vpsrld $16, %xmm0, %xmm12 | |
vpblendw $2, %xmm1, %xmm12, %xmm12 | |
vpunpckldq %xmm13, %xmm12, %xmm12 | |
vpmovsxbw .LCPI1_102(%rip), %xmm13 | |
vpermt2w %xmm3, %xmm13, %xmm12 | |
vpsrld $16, %xmm9, %xmm13 | |
vpunpcklqdq %xmm13, %xmm12, %xmm12 | |
vpbroadcastw -268766(%r9,%r13,2), %xmm13 | |
vpblendw $32, %xmm13, %xmm12, %xmm12 | |
vpslldq $10, %xmm2, %xmm13 | |
vpblendd $8, %xmm13, %xmm12, %xmm12 | |
vpbroadcastw -255966(%r9,%r13,2), %xmm13 | |
vpblendw $128, %xmm13, %xmm12, %xmm5 | |
vpbroadcastw -294364(%r9,%r13,2), %xmm12 | |
vpbroadcastw -300764(%r9,%r13,2), %xmm13 | |
vmovdqa64 %ymm5, %ymm22 | |
vpunpcklwd %xmm12, %xmm13, %xmm12 | |
vpmovsxbw .LCPI1_106(%rip), %xmm13 | |
vpblendd $2, %xmm8, %xmm12, %xmm12 | |
vpermt2w %xmm3, %xmm13, %xmm12 | |
vpslldq $6, %xmm4, %xmm13 | |
vshufps $212, %xmm9, %xmm12, %xmm12 | |
vpblendw $32, %xmm13, %xmm12, %xmm12 | |
vpslldq $10, %xmm15, %xmm13 | |
vinsertps $112, %xmm2, %xmm12, %xmm12 | |
vpblendw $128, %xmm13, %xmm12, %xmm12 | |
vpbroadcastd -249564(%r9,%r13,2), %ymm13 | |
vpblendd $240, %ymm13, %ymm12, %ymm13 | |
vinserti128 $1, -236768(%r9,%r13,2), %ymm12, %ymm12 | |
vpermt2w %ymm7, %ymm14, %ymm13 | |
vpbroadcastd -223964(%r9,%r13,2), %ymm14 | |
vpblendd $34, %ymm12, %ymm13, %ymm12 | |
vpmovsxbw .LCPI1_108(%rip), %ymm13 | |
vpermt2w %ymm6, %ymm13, %ymm12 | |
vpbroadcastd -211164(%r9,%r13,2), %ymm13 | |
vpblendd $192, %ymm14, %ymm12, %ymm12 | |
vpmovsxbw .LCPI1_109(%rip), %ymm14 | |
vpermt2w %ymm10, %ymm14, %ymm12 | |
vpbroadcastq -249560(%r9,%r13,2), %ymm14 | |
vpblendd $128, %ymm13, %ymm12, %ymm12 | |
vpsrlq $48, %xmm0, %xmm13 | |
vmovdqa %ymm12, 576(%rsp) | |
vpsrlq $48, %xmm1, %xmm12 | |
vpunpcklwd %xmm12, %xmm13, %xmm12 | |
vpsrlq $48, %xmm8, %xmm13 | |
vpunpckldq %xmm13, %xmm12, %xmm12 | |
vpsrlq $48, %xmm9, %xmm13 | |
vpblendw $8, %xmm3, %xmm12, %xmm12 | |
vpunpcklqdq %xmm13, %xmm12, %xmm12 | |
vpbroadcastw -268762(%r9,%r13,2), %xmm13 | |
vpblendw $32, %xmm13, %xmm12, %xmm12 | |
vpmovzxwd %xmm2, %xmm13 | |
vpblendd $8, %xmm13, %xmm12, %xmm12 | |
vpbroadcastw -255962(%r9,%r13,2), %xmm13 | |
vpblendw $128, %xmm13, %xmm12, %xmm5 | |
vpbroadcastw -294360(%r9,%r13,2), %xmm12 | |
vpbroadcastw -300760(%r9,%r13,2), %xmm13 | |
vmovdqa64 %ymm5, %ymm29 | |
vpmovsxbw .LCPI1_105(%rip), %ymm5 | |
vpunpcklwd %xmm12, %xmm13, %xmm12 | |
vpsrldq $2, %xmm3, %xmm13 | |
vpsrldq $6, %xmm3, %xmm3 | |
vinsertps $156, %xmm8, %xmm12, %xmm12 | |
vpblendw $8, %xmm13, %xmm12, %xmm12 | |
vpslld $16, %xmm4, %xmm13 | |
vpblendd $3, %xmm12, %xmm9, %xmm12 | |
vpblendw $32, %xmm13, %xmm12, %xmm12 | |
vpsllq $48, %xmm15, %xmm13 | |
vinsertps $176, %xmm2, %xmm12, %xmm12 | |
vpblendw $128, %xmm13, %xmm12, %xmm12 | |
vpblendd $240, %ymm14, %ymm12, %ymm13 | |
vpbroadcastd -236760(%r9,%r13,2), %ymm14 | |
vinserti128 $1, %xmm11, %ymm12, %ymm11 | |
vpmovsxbw .LCPI1_103(%rip), %ymm12 | |
vpermt2w %ymm7, %ymm5, %ymm13 | |
vpblendd $32, %ymm14, %ymm13, %ymm13 | |
vmovdqa -281552(%r9,%r13,2), %xmm14 | |
vpermt2w %ymm6, %ymm12, %ymm13 | |
vpbroadcastd -211160(%r9,%r13,2), %ymm12 | |
vpblendd $204, %ymm11, %ymm13, %ymm11 | |
vpmovsxbw .LCPI1_111(%rip), %ymm13 | |
vpermt2w %ymm10, %ymm13, %ymm11 | |
vmovapd 192(%rsp), %xmm13 | |
vpblendd $128, %ymm12, %ymm11, %ymm11 | |
vpsrldq $10, %xmm0, %xmm12 | |
vpsrldq $14, %xmm0, %xmm0 | |
vmovdqa64 %ymm11, %ymm31 | |
vpsrldq $10, %xmm1, %xmm11 | |
vpsrldq $14, %xmm1, %xmm1 | |
vpunpcklwd %xmm11, %xmm12, %xmm11 | |
vpsrldq $10, %xmm8, %xmm12 | |
vpunpcklwd %xmm1, %xmm0, %xmm0 | |
vpbroadcastw -281554(%r9,%r13,2), %xmm1 | |
vpunpckldq %xmm12, %xmm11, %xmm11 | |
vpbroadcastw -281558(%r9,%r13,2), %xmm12 | |
vpblendw $8, %xmm12, %xmm11, %xmm11 | |
vpsrldq $10, %xmm9, %xmm12 | |
vpunpcklqdq %xmm12, %xmm11, %xmm11 | |
vpsllq $16, %xmm2, %xmm12 | |
vpblendw $32, %xmm4, %xmm11, %xmm11 | |
vpsrlq $16, %xmm4, %xmm4 | |
vpblendd $8, %xmm12, %xmm11, %xmm11 | |
vpbroadcastw -255958(%r9,%r13,2), %xmm12 | |
vpblendw $128, %xmm12, %xmm11, %xmm5 | |
vpbroadcastw -294356(%r9,%r13,2), %xmm11 | |
vpbroadcastw -300756(%r9,%r13,2), %xmm12 | |
vmovdqa64 %ymm5, %ymm30 | |
vmovdqa64 %ymm30, %ymm21 | |
vpunpcklwd %xmm11, %xmm12, %xmm11 | |
vpmovsxbw .LCPI1_112(%rip), %ymm12 | |
vinsertps $220, %xmm8, %xmm11, %xmm11 | |
vpblendw $8, %xmm3, %xmm11, %xmm3 | |
vpslld $16, %xmm15, %xmm11 | |
vshufps $244, %xmm9, %xmm3, %xmm3 | |
vpblendw $32, %xmm4, %xmm3, %xmm3 | |
vpbroadcastd -249556(%r9,%r13,2), %ymm4 | |
vpblendd $8, %xmm2, %xmm3, %xmm3 | |
vpblendw $128, %xmm11, %xmm3, %xmm3 | |
vpbroadcastd -236756(%r9,%r13,2), %ymm11 | |
vpblendd $240, %ymm4, %ymm3, %ymm4 | |
vinserti128 $1, -211168(%r9,%r13,2), %ymm3, %ymm3 | |
vpermt2w %ymm7, %ymm12, %ymm4 | |
vpsrldq $10, %xmm13, %xmm12 | |
vpblendd $32, %ymm11, %ymm4, %ymm4 | |
vpmovsxbw .LCPI1_113(%rip), %ymm11 | |
vpermt2w %ymm6, %ymm11, %ymm4 | |
vpbroadcastd -223956(%r9,%r13,2), %ymm11 | |
vpblendd $192, %ymm11, %ymm4, %ymm4 | |
vpmovsxbw .LCPI1_114(%rip), %ymm11 | |
vpermt2w %ymm10, %ymm11, %ymm4 | |
vpblendd $136, %ymm3, %ymm4, %ymm3 | |
vpmovsxbw .LCPI1_115(%rip), %ymm4 | |
vmovdqa %ymm3, 544(%rsp) | |
vpsrldq $14, %xmm8, %xmm3 | |
vmovdqa 224(%rsp), %xmm8 | |
vpunpckldq %xmm3, %xmm0, %xmm0 | |
vpbroadcastd -236752(%r9,%r13,2), %ymm3 | |
vpblendw $8, %xmm1, %xmm0, %xmm0 | |
vpsrldq $14, %xmm9, %xmm1 | |
vmovdqa -275152(%r9,%r13,2), %xmm9 | |
vpunpcklqdq %xmm1, %xmm0, %xmm0 | |
vpbroadcastw -268754(%r9,%r13,2), %xmm1 | |
vpblendw $32, %xmm1, %xmm0, %xmm0 | |
vpsrld $16, %xmm2, %xmm1 | |
vpblendd $8, %xmm1, %xmm0, %xmm0 | |
vpblendw $128, %xmm15, %xmm0, %xmm5 | |
vpbroadcastw -300752(%r9,%r13,2), %ymm0 | |
vmovdqa -268752(%r9,%r13,2), %xmm15 | |
vpunpcklwd %xmm13, %xmm0, %xmm0 | |
vpbroadcastw %xmm15, %xmm1 | |
vpunpckldq %xmm18, %xmm0, %xmm0 | |
insertq $48, $16, %xmm14, %xmm0 | |
vpunpcklqdq %xmm9, %xmm0, %xmm0 | |
vpblendw $32, %xmm1, %xmm0, %xmm0 | |
vpbroadcastd -262352(%r9,%r13,2), %xmm1 | |
vpblendd $8, %xmm1, %xmm0, %xmm1 | |
vmovdqa -255952(%r9,%r13,2), %xmm0 | |
vpbroadcastw %xmm0, %xmm2 | |
vpblendw $128, %xmm2, %xmm1, %xmm1 | |
vmovdqa -249568(%r9,%r13,2), %ymm2 | |
vpblendd $240, %ymm2, %ymm1, %ymm1 | |
vpermt2w %ymm7, %ymm4, %ymm1 | |
vpmovsxbw .LCPI1_116(%rip), %ymm4 | |
vpblendd $32, %ymm3, %ymm1, %ymm1 | |
vpbroadcastq -223952(%r9,%r13,2), %ymm3 | |
vpermt2w %ymm6, %ymm4, %ymm1 | |
vmovdqa 256(%rsp), %ymm4 | |
vpblendd $192, %ymm3, %ymm1, %ymm1 | |
vpmovsxbw .LCPI1_117(%rip), %ymm3 | |
vpermt2w %ymm10, %ymm3, %ymm1 | |
vpbroadcastd -211152(%r9,%r13,2), %ymm3 | |
vpblendd $128, %ymm3, %ymm1, %ymm1 | |
vpsrld $16, %xmm9, %xmm3 | |
vmovdqa %ymm1, 512(%rsp) | |
vmovdqa -281568(%r9,%r13,2), %ymm1 | |
vpermt2w %ymm1, %ymm26, %ymm4 | |
vpunpcklqdq %xmm3, %xmm4, %xmm3 | |
vpbroadcastw -268750(%r9,%r13,2), %xmm4 | |
vpblendw $32, %xmm4, %xmm3, %xmm4 | |
vmovapd -262352(%r9,%r13,2), %xmm3 | |
vpslldq $10, %xmm3, %xmm11 | |
vpblendd $8, %xmm11, %xmm4, %xmm4 | |
vpbroadcastw -255950(%r9,%r13,2), %xmm11 | |
vpblendw $128, %xmm11, %xmm4, %xmm4 | |
vpshuflw $85, %ymm2, %ymm11 | |
vpblendw $2, %ymm7, %ymm11, %ymm11 | |
vpblendd $240, %ymm11, %ymm4, %ymm4 | |
vpmovsxbw .LCPI1_119(%rip), %ymm11 | |
vmovdqa64 %ymm4, %ymm16 | |
vmovdqa 288(%rsp), %ymm4 | |
vpermt2w %ymm1, %ymm28, %ymm4 | |
vshufps $212, %xmm9, %xmm4, %xmm1 | |
vpslldq $6, %xmm15, %xmm4 | |
vpblendw $32, %xmm4, %xmm1, %xmm1 | |
vpbroadcastd -262348(%r9,%r13,2), %ymm4 | |
vpblendd $8, %xmm4, %xmm1, %xmm1 | |
vpslldq $10, %xmm0, %xmm4 | |
vpblendw $128, %xmm4, %xmm1, %xmm1 | |
vpbroadcastd -249548(%r9,%r13,2), %ymm4 | |
vpblendd $240, %ymm4, %ymm1, %ymm4 | |
vpmovsxbw .LCPI1_120(%rip), %ymm1 | |
vpermt2w %ymm7, %ymm1, %ymm4 | |
vmovdqa -236768(%r9,%r13,2), %ymm1 | |
vpblendd $32, %ymm1, %ymm4, %ymm4 | |
vpermt2w %ymm6, %ymm11, %ymm4 | |
vpbroadcastd -223948(%r9,%r13,2), %ymm11 | |
vpblendd $192, %ymm11, %ymm4, %ymm4 | |
vpmovsxbw .LCPI1_121(%rip), %ymm11 | |
vpermt2w %ymm10, %ymm11, %ymm4 | |
vpbroadcastd -211148(%r9,%r13,2), %ymm11 | |
vpblendd $128, %ymm11, %ymm4, %ymm4 | |
vpsrlq $48, %xmm13, %xmm11 | |
vmovdqa %ymm4, 288(%rsp) | |
vpsrlq $48, %xmm8, %xmm4 | |
vpunpcklwd %xmm11, %xmm4, %xmm4 | |
vpsrlq $48, %xmm18, %xmm11 | |
vpunpckldq %xmm11, %xmm4, %xmm4 | |
vpsrlq $48, %xmm9, %xmm11 | |
vpblendw $8, %xmm14, %xmm4, %xmm4 | |
vpunpcklqdq %xmm11, %xmm4, %xmm4 | |
vpbroadcastw -268746(%r9,%r13,2), %xmm11 | |
vpblendw $32, %xmm11, %xmm4, %xmm4 | |
vpmovzxwd %xmm3, %xmm11 | |
vpblendd $8, %xmm11, %xmm4, %xmm4 | |
vpbroadcastw -255946(%r9,%r13,2), %xmm11 | |
vpblendw $128, %xmm11, %xmm4, %xmm4 | |
vpsrldq $10, %xmm8, %xmm11 | |
vpunpcklwd %xmm12, %xmm11, %xmm11 | |
vpsrldq $10, %xmm18, %xmm12 | |
vpunpckldq %xmm12, %xmm11, %xmm11 | |
vpbroadcastw -281542(%r9,%r13,2), %xmm12 | |
vpblendw $8, %xmm12, %xmm11, %xmm11 | |
vpsrldq $10, %xmm9, %xmm12 | |
vpunpcklqdq %xmm12, %xmm11, %xmm11 | |
vpsllq $16, %xmm3, %xmm12 | |
vpsrld $16, %xmm3, %xmm3 | |
vpblendw $32, %xmm15, %xmm11, %xmm11 | |
vpblendd $8, %xmm12, %xmm11, %xmm11 | |
vpbroadcastw -255942(%r9,%r13,2), %xmm12 | |
vpblendw $128, %xmm12, %xmm11, %xmm12 | |
vpsrldq $14, %xmm13, %xmm11 | |
vpsrldq $14, %xmm8, %xmm13 | |
vpunpcklwd %xmm11, %xmm13, %xmm11 | |
vpsrldq $14, %xmm18, %xmm13 | |
vmovdqa64 %ymm22, %ymm18 | |
vmovdqa64 %ymm5, %ymm22 | |
vpermt2w %ymm2, %ymm20, %ymm12 | |
vpermt2w %ymm2, %ymm19, %ymm18 | |
vpmovsxbw .LCPI1_104(%rip), %ymm20 | |
vpunpckldq %xmm13, %xmm11, %xmm11 | |
vpbroadcastw -281538(%r9,%r13,2), %xmm13 | |
vpblendw $8, %xmm13, %xmm11, %xmm11 | |
vpsrldq $14, %xmm9, %xmm13 | |
vpunpcklqdq %xmm13, %xmm11, %xmm11 | |
vpbroadcastw -268738(%r9,%r13,2), %xmm13 | |
vpblendw $32, %xmm13, %xmm11, %xmm11 | |
vpblendd $8, %xmm3, %xmm11, %xmm3 | |
vpmovsxbw .LCPI1_123(%rip), %ymm11 | |
vpermt2w %ymm2, %ymm11, %ymm29 | |
vpmovsxbw .LCPI1_124(%rip), %ymm11 | |
vpermt2w %ymm2, %ymm11, %ymm21 | |
vpmovsxbw .LCPI1_125(%rip), %ymm11 | |
vpermt2w %ymm2, %ymm11, %ymm22 | |
vpblendw $128, %xmm0, %xmm3, %xmm11 | |
vpmovsxbw .LCPI1_127(%rip), %ymm3 | |
vpermt2w %ymm2, %ymm3, %ymm11 | |
vpmovsxbw .LCPI1_128(%rip), %ymm3 | |
vpermt2w %ymm7, %ymm23, %ymm2 | |
vpmovsxbw .LCPI1_118(%rip), %ymm23 | |
vpermt2w %ymm1, %ymm3, %ymm2 | |
vpbroadcastw -294344(%r9,%r13,2), %xmm3 | |
vpblendw $8, %ymm6, %ymm2, %ymm2 | |
vpblendd $240, %ymm2, %ymm4, %ymm13 | |
vpbroadcastw -300744(%r9,%r13,2), %ymm2 | |
vpmovsxbw .LCPI1_129(%rip), %ymm4 | |
vpunpcklwd %xmm3, %xmm2, %xmm2 | |
vunpcklps -287944(%r9,%r13,2){1to4}, %xmm2, %xmm2 | |
vpsrldq $2, %xmm14, %xmm3 | |
vpblendw $8, %xmm3, %xmm2, %xmm2 | |
vpslld $16, %xmm15, %xmm3 | |
vpblendd $3, %xmm2, %xmm9, %xmm2 | |
vpblendw $32, %xmm3, %xmm2, %xmm2 | |
vpbroadcastd -262344(%r9,%r13,2), %xmm3 | |
vpblendd $8, %xmm3, %xmm2, %xmm2 | |
vpsllq $48, %xmm0, %xmm3 | |
vpslld $16, %xmm0, %xmm0 | |
vpblendw $128, %xmm3, %xmm2, %xmm2 | |
vpbroadcastq -249544(%r9,%r13,2), %ymm3 | |
vpblendd $240, %ymm3, %ymm2, %ymm2 | |
vpmovsxbw .LCPI1_130(%rip), %ymm3 | |
vpermt2w %ymm7, %ymm3, %ymm2 | |
vpbroadcastd -236744(%r9,%r13,2), %ymm3 | |
vpblendd $32, %ymm3, %ymm2, %ymm3 | |
vpmovsxbw .LCPI1_131(%rip), %ymm2 | |
vpermt2w %ymm6, %ymm2, %ymm3 | |
vmovdqa -223968(%r9,%r13,2), %ymm2 | |
vpblendd $192, %ymm2, %ymm3, %ymm3 | |
vpermt2w %ymm10, %ymm4, %ymm3 | |
vpbroadcastd -211144(%r9,%r13,2), %ymm4 | |
vpblendd $128, %ymm4, %ymm3, %ymm5 | |
vpmovsxbw .LCPI1_132(%rip), %ymm4 | |
vmovdqa %ymm12, %ymm3 | |
vpermt2w %ymm7, %ymm4, %ymm3 | |
vpmovsxbw .LCPI1_133(%rip), %ymm4 | |
vpermt2w %ymm1, %ymm4, %ymm3 | |
vpmovsxbw .LCPI1_134(%rip), %ymm4 | |
vpermt2w %ymm6, %ymm4, %ymm3 | |
vpmovsxbw .LCPI1_135(%rip), %ymm4 | |
vpermt2w %ymm2, %ymm4, %ymm3 | |
vpbroadcastd -287940(%r9,%r13,2), %ymm4 | |
vpblendw $32, %ymm10, %ymm3, %ymm3 | |
vpblendd $240, %ymm3, %ymm12, %ymm8 | |
vpbroadcastw -300740(%r9,%r13,2), %ymm3 | |
vpbroadcastw -294340(%r9,%r13,2), %xmm12 | |
vpunpcklwd %xmm12, %xmm3, %xmm3 | |
vpblendd $2, %xmm4, %xmm3, %xmm3 | |
vpsrldq $6, %xmm14, %xmm4 | |
vmovdqa64 %ymm21, %ymm14 | |
vpblendw $8, %xmm4, %xmm3, %xmm3 | |
vpsrlq $16, %xmm15, %xmm4 | |
vmovdqa64 %ymm18, %ymm15 | |
vpermt2w %ymm7, %ymm27, %ymm15 | |
vpmovsxbw .LCPI1_107(%rip), %ymm18 | |
vshufps $244, %xmm9, %xmm3, %xmm3 | |
vmovdqa -179168(%r9,%r13,2), %xmm9 | |
vpblendw $32, %xmm4, %xmm3, %xmm3 | |
vpbroadcastd -262340(%r9,%r13,2), %ymm4 | |
vpermt2w %ymm1, %ymm20, %ymm15 | |
vpblendd $8, %xmm4, %xmm3, %xmm3 | |
vmovdqa64 %ymm22, %ymm4 | |
vpmovsxbw .LCPI1_122(%rip), %ymm22 | |
vpblendw $128, %xmm0, %xmm3, %xmm0 | |
vpbroadcastd -249540(%r9,%r13,2), %ymm3 | |
vpblendd $240, %ymm3, %ymm0, %ymm0 | |
vpmovsxbw .LCPI1_136(%rip), %ymm3 | |
vpermt2w %ymm7, %ymm3, %ymm0 | |
vpbroadcastd -236740(%r9,%r13,2), %ymm3 | |
vpblendd $32, %ymm3, %ymm0, %ymm0 | |
vpmovsxbw .LCPI1_137(%rip), %ymm3 | |
vpermt2w %ymm6, %ymm3, %ymm0 | |
vpbroadcastd -223940(%r9,%r13,2), %ymm3 | |
vpblendd $192, %ymm3, %ymm0, %ymm0 | |
vpmovsxbw .LCPI1_138(%rip), %ymm3 | |
vpermt2w %ymm7, %ymm3, %ymm29 | |
vpmovsxbw .LCPI1_139(%rip), %ymm3 | |
vpermt2w %ymm7, %ymm3, %ymm14 | |
vpmovsxbw .LCPI1_140(%rip), %ymm3 | |
vpermt2w %ymm7, %ymm3, %ymm4 | |
vpmovsxbw .LCPI1_141(%rip), %ymm3 | |
vpermt2w %ymm7, %ymm3, %ymm11 | |
vpmovsxbw .LCPI1_142(%rip), %ymm3 | |
vmovaps -159968(%r9,%r13,2), %xmm7 | |
vpermt2w %ymm1, %ymm3, %ymm29 | |
vpmovsxbw .LCPI1_143(%rip), %ymm3 | |
vpermt2w %ymm1, %ymm3, %ymm14 | |
vpmovsxbw .LCPI1_144(%rip), %ymm3 | |
vpermt2w %ymm1, %ymm3, %ymm4 | |
vpmovsxbw .LCPI1_145(%rip), %ymm3 | |
vpermt2w %ymm1, %ymm3, %ymm16 | |
vpmovsxbw .LCPI1_146(%rip), %ymm3 | |
vpermt2w %ymm1, %ymm3, %ymm11 | |
vpmovsxbw .LCPI1_101(%rip), %ymm1 | |
vpmovsxbw .LCPI1_148(%rip), %ymm3 | |
vpermt2w %ymm6, %ymm1, %ymm15 | |
vpmovsxbw .LCPI1_147(%rip), %ymm1 | |
vpermt2w %ymm2, %ymm23, %ymm15 | |
vpermt2w %ymm6, %ymm1, %ymm29 | |
vpmovsxbw .LCPI1_149(%rip), %ymm1 | |
vpermt2w %ymm6, %ymm1, %ymm14 | |
vpmovsxbw .LCPI1_150(%rip), %ymm1 | |
vpermt2w %ymm6, %ymm1, %ymm4 | |
vpmovsxbw .LCPI1_151(%rip), %ymm1 | |
vpermt2w %ymm6, %ymm1, %ymm16 | |
vmovdqa %ymm11, %ymm1 | |
vpermt2w %ymm6, %ymm3, %ymm1 | |
vpmovsxbw .LCPI1_152(%rip), %ymm3 | |
vpermt2w %ymm2, %ymm3, %ymm29 | |
vpmovsxbw .LCPI1_153(%rip), %ymm3 | |
vpermt2w %ymm2, %ymm3, %ymm14 | |
vpmovsxbw .LCPI1_154(%rip), %ymm3 | |
vpermt2w %ymm2, %ymm3, %ymm4 | |
vpmovsxbw .LCPI1_155(%rip), %ymm3 | |
vpermt2w %ymm2, %ymm3, %ymm16 | |
vpmovsxbw .LCPI1_156(%rip), %ymm3 | |
vpermt2w %ymm2, %ymm3, %ymm13 | |
vpmovsxbw .LCPI1_157(%rip), %ymm3 | |
vpermt2w %ymm2, %ymm3, %ymm1 | |
vpmovsxbw .LCPI1_158(%rip), %ymm2 | |
vpmovsxbw .LCPI1_159(%rip), %ymm3 | |
vpermt2w %ymm10, %ymm2, %ymm15 | |
vpmovsxbw .LCPI1_160(%rip), %ymm2 | |
vpermt2w %ymm10, %ymm2, %ymm29 | |
vpmovsxbw .LCPI1_161(%rip), %ymm2 | |
vpermt2w %ymm10, %ymm2, %ymm14 | |
vpmovsxbw .LCPI1_162(%rip), %ymm2 | |
vpermt2w %ymm10, %ymm2, %ymm4 | |
vpmovsxbw .LCPI1_163(%rip), %ymm2 | |
vpermt2w %ymm10, %ymm2, %ymm16 | |
vpmovsxbw .LCPI1_164(%rip), %ymm2 | |
vpermt2w %ymm10, %ymm2, %ymm13 | |
vpmovsxbw .LCPI1_165(%rip), %ymm2 | |
vpermt2w %ymm10, %ymm2, %ymm0 | |
vpmovsxbw .LCPI1_166(%rip), %ymm2 | |
vpermt2w %ymm10, %ymm2, %ymm1 | |
vmovdqa -211168(%r9,%r13,2), %ymm2 | |
vmovdqa -166368(%r9,%r13,2), %xmm10 | |
vpermt2w %ymm2, %ymm3, %ymm15 | |
vpmovsxbw .LCPI1_167(%rip), %ymm3 | |
vpblendd $128, %ymm2, %ymm0, %ymm6 | |
vpmovsxbw .LCPI1_168(%rip), %ymm0 | |
vpermt2w %ymm2, %ymm3, %ymm29 | |
vpmovsxbw .LCPI1_169(%rip), %ymm3 | |
vpermt2w %ymm2, %ymm0, %ymm1 | |
vmovdqa -204768(%r9,%r13,2), %ymm0 | |
vpermt2w %ymm2, %ymm3, %ymm14 | |
vpmovsxbw .LCPI1_170(%rip), %ymm3 | |
vpermt2w %ymm0, %ymm22, %ymm17 | |
vmovdqa64 %ymm17, 1088(%rsp) | |
vpmovsxbw .LCPI1_106(%rip), %xmm17 | |
vpermt2w %ymm2, %ymm3, %ymm4 | |
vpmovsxbw .LCPI1_171(%rip), %ymm3 | |
vpermt2w %ymm2, %ymm3, %ymm16 | |
vpmovsxbw .LCPI1_172(%rip), %ymm3 | |
vpermt2w %ymm2, %ymm3, %ymm13 | |
vpmovsxbw .LCPI1_173(%rip), %ymm3 | |
vpermt2w %ymm2, %ymm3, %ymm8 | |
vpmovsxbw .LCPI1_174(%rip), %ymm2 | |
vmovdqa 576(%rsp), %ymm3 | |
vpermt2w %ymm0, %ymm2, %ymm15 | |
vpmovsxbw .LCPI1_175(%rip), %ymm2 | |
vmovdqa %ymm15, 992(%rsp) | |
vmovdqa 352(%rsp), %xmm15 | |
vpermt2w %ymm0, %ymm2, %ymm3 | |
vpmovsxbw .LCPI1_176(%rip), %ymm2 | |
vmovdqa %ymm3, 576(%rsp) | |
vmovdqa 544(%rsp), %ymm3 | |
vpermt2w %ymm0, %ymm2, %ymm29 | |
vpmovsxbw .LCPI1_177(%rip), %ymm2 | |
vmovdqa64 %ymm29, 864(%rsp) | |
vmovdqa64 %ymm25, %ymm29 | |
vpermt2w %ymm0, %ymm2, %ymm31 | |
vpmovsxbw .LCPI1_178(%rip), %ymm2 | |
vmovdqa64 %ymm31, 1312(%rsp) | |
vpermt2w %ymm0, %ymm2, %ymm14 | |
vpmovsxbw .LCPI1_179(%rip), %ymm2 | |
vmovdqa %ymm14, 1024(%rsp) | |
vpmovsxbw .LCPI1_107(%rip), %ymm14 | |
vpermt2w %ymm0, %ymm2, %ymm3 | |
vpmovsxbw .LCPI1_180(%rip), %ymm2 | |
vmovdqa %ymm3, 544(%rsp) | |
vmovdqa 512(%rsp), %ymm3 | |
vpermt2w %ymm0, %ymm2, %ymm4 | |
vpmovsxbw .LCPI1_181(%rip), %ymm2 | |
vmovdqa %ymm4, 1280(%rsp) | |
vmovdqa -185568(%r9,%r13,2), %xmm4 | |
vpermt2w %ymm0, %ymm2, %ymm3 | |
vpmovsxbw .LCPI1_182(%rip), %ymm2 | |
vmovdqa %ymm3, 512(%rsp) | |
vmovdqa 288(%rsp), %ymm3 | |
vpermt2w %ymm0, %ymm2, %ymm16 | |
vpmovsxbw .LCPI1_183(%rip), %ymm2 | |
vmovdqa64 %ymm16, 1248(%rsp) | |
vpermt2w %ymm0, %ymm2, %ymm3 | |
vpmovsxbw .LCPI1_184(%rip), %ymm2 | |
vmovdqa %ymm3, 288(%rsp) | |
vmovdqa -198368(%r9,%r13,2), %xmm3 | |
vpermt2w %ymm0, %ymm2, %ymm13 | |
vpmovsxbw .LCPI1_185(%rip), %ymm2 | |
vmovdqa %ymm13, 1216(%rsp) | |
vpmovsxbw .LCPI1_95(%rip), %ymm13 | |
vpermt2w %ymm0, %ymm2, %ymm5 | |
vpmovsxbw .LCPI1_186(%rip), %ymm2 | |
vmovdqa %ymm5, 1184(%rsp) | |
vmovdqa -172768(%r9,%r13,2), %xmm5 | |
vpermt2w %ymm0, %ymm2, %ymm8 | |
vpmovsxbw .LCPI1_187(%rip), %ymm2 | |
vmovdqa %ymm8, 1152(%rsp) | |
vmovdqa -153568(%r9,%r13,2), %xmm8 | |
vpermt2w %ymm0, %ymm2, %ymm6 | |
vpblendw $128, %ymm0, %ymm1, %ymm0 | |
vmovdqa -140768(%r9,%r13,2), %ymm2 | |
vpblendd $240, %ymm0, %ymm11, %ymm0 | |
vpbroadcastd -134368(%r9,%r13,2), %ymm11 | |
vmovdqa %ymm0, 1056(%rsp) | |
vmovdqa %ymm6, 1120(%rsp) | |
vmovdqa -191968(%r9,%r13,2), %xmm6 | |
vpunpcklwd %xmm6, %xmm3, %xmm1 | |
vpunpckldq %xmm4, %xmm1, %xmm1 | |
insertq $48, $16, %xmm9, %xmm1 | |
vpunpcklqdq %xmm5, %xmm1, %xmm0 | |
vpbroadcastw %xmm10, %xmm1 | |
vpblendw $32, %xmm1, %xmm0, %xmm0 | |
vpbroadcastw %xmm8, %xmm1 | |
vinsertps $48, %xmm7, %xmm0, %xmm0 | |
vpblendw $128, %xmm1, %xmm0, %xmm0 | |
vinserti128 $1, -147168(%r9,%r13,2), %ymm0, %ymm1 | |
vpermt2w %ymm2, %ymm25, %ymm1 | |
vpblendd $32, %ymm11, %ymm1, %ymm12 | |
vmovdqa -127968(%r9,%r13,2), %ymm1 | |
vmovdqa -121568(%r9,%r13,2), %xmm11 | |
vpermt2w %ymm1, %ymm13, %ymm12 | |
vinserti128 $1, %xmm11, %ymm0, %ymm0 | |
vpmovsxbw .LCPI1_96(%rip), %ymm13 | |
vshufpd $2, %ymm0, %ymm12, %ymm12 | |
vmovdqa -115168(%r9,%r13,2), %ymm0 | |
vpermt2w %ymm0, %ymm13, %ymm12 | |
vpbroadcastd -108768(%r9,%r13,2), %ymm13 | |
vpblendd $128, %ymm13, %ymm12, %ymm12 | |
vpsrld $16, %xmm4, %xmm13 | |
vmovdqa %ymm12, 256(%rsp) | |
vpsrld $16, %xmm3, %xmm12 | |
vpblendw $2, %xmm6, %xmm12, %xmm12 | |
vpunpckldq %xmm13, %xmm12, %xmm12 | |
vpmovsxbw .LCPI1_102(%rip), %xmm13 | |
vpermt2w %xmm9, %xmm13, %xmm12 | |
vpsrld $16, %xmm5, %xmm13 | |
vpunpcklqdq %xmm13, %xmm12, %xmm12 | |
vpbroadcastw -166366(%r9,%r13,2), %xmm13 | |
vpblendw $32, %xmm13, %xmm12, %xmm12 | |
vpslldq $10, %xmm7, %xmm13 | |
vpblendd $8, %xmm13, %xmm12, %xmm12 | |
vpbroadcastw -153566(%r9,%r13,2), %xmm13 | |
vpblendw $128, %xmm13, %xmm12, %xmm12 | |
vpbroadcastw -198364(%r9,%r13,2), %xmm13 | |
vmovdqa64 %ymm12, %ymm21 | |
vpbroadcastw -191964(%r9,%r13,2), %xmm12 | |
vpunpcklwd %xmm12, %xmm13, %xmm12 | |
vpmovsxbw .LCPI1_106(%rip), %xmm13 | |
vpblendd $2, %xmm4, %xmm12, %xmm12 | |
vpermt2w %xmm9, %xmm13, %xmm12 | |
vpslldq $6, %xmm10, %xmm13 | |
vshufps $212, %xmm5, %xmm12, %xmm12 | |
vpblendw $32, %xmm13, %xmm12, %xmm12 | |
vpslldq $10, %xmm8, %xmm13 | |
vinsertps $112, %xmm7, %xmm12, %xmm12 | |
vpblendw $128, %xmm13, %xmm12, %xmm12 | |
vpbroadcastd -147164(%r9,%r13,2), %ymm13 | |
vpblendd $240, %ymm13, %ymm12, %ymm13 | |
vinserti128 $1, -134368(%r9,%r13,2), %ymm12, %ymm12 | |
vpermt2w %ymm2, %ymm14, %ymm13 | |
vpbroadcastd -121564(%r9,%r13,2), %ymm14 | |
vpblendd $34, %ymm12, %ymm13, %ymm12 | |
vpmovsxbw .LCPI1_108(%rip), %ymm13 | |
vpermt2w %ymm1, %ymm13, %ymm12 | |
vpbroadcastd -108764(%r9,%r13,2), %ymm13 | |
vpblendd $192, %ymm14, %ymm12, %ymm12 | |
vpmovsxbw .LCPI1_109(%rip), %ymm14 | |
vpermt2w %ymm0, %ymm14, %ymm12 | |
vpbroadcastq -147160(%r9,%r13,2), %ymm14 | |
vpblendd $128, %ymm13, %ymm12, %ymm12 | |
vpsrlq $48, %xmm3, %xmm13 | |
vmovdqa %ymm12, 224(%rsp) | |
vpsrlq $48, %xmm6, %xmm12 | |
vpunpcklwd %xmm12, %xmm13, %xmm12 | |
vpsrlq $48, %xmm4, %xmm13 | |
vpunpckldq %xmm13, %xmm12, %xmm12 | |
vpsrlq $48, %xmm5, %xmm13 | |
vpblendw $8, %xmm9, %xmm12, %xmm12 | |
vpunpcklqdq %xmm13, %xmm12, %xmm12 | |
vpbroadcastw -166362(%r9,%r13,2), %xmm13 | |
vpblendw $32, %xmm13, %xmm12, %xmm12 | |
vpmovzxwd %xmm7, %xmm13 | |
vpblendd $8, %xmm13, %xmm12, %xmm12 | |
vpbroadcastw -153562(%r9,%r13,2), %xmm13 | |
vpblendw $128, %xmm13, %xmm12, %xmm12 | |
vpbroadcastw -198360(%r9,%r13,2), %xmm13 | |
vmovdqa64 %ymm12, %ymm31 | |
vpbroadcastw -191960(%r9,%r13,2), %xmm12 | |
vpunpcklwd %xmm12, %xmm13, %xmm12 | |
vpsrldq $2, %xmm9, %xmm13 | |
vpsrldq $6, %xmm9, %xmm9 | |
vinsertps $156, %xmm4, %xmm12, %xmm12 | |
vpblendw $8, %xmm13, %xmm12, %xmm12 | |
vpslld $16, %xmm10, %xmm13 | |
vpblendd $3, %xmm12, %xmm5, %xmm12 | |
vpblendw $32, %xmm13, %xmm12, %xmm12 | |
vpsllq $48, %xmm8, %xmm13 | |
vinsertps $176, %xmm7, %xmm12, %xmm12 | |
vpblendw $128, %xmm13, %xmm12, %xmm12 | |
vpblendd $240, %ymm14, %ymm12, %ymm13 | |
vpbroadcastd -134360(%r9,%r13,2), %ymm14 | |
vinserti128 $1, %xmm11, %ymm12, %ymm11 | |
vpmovsxbw .LCPI1_103(%rip), %ymm12 | |
vpermt2w %ymm2, %ymm24, %ymm13 | |
vpblendd $32, %ymm14, %ymm13, %ymm13 | |
vmovdqa 384(%rsp), %xmm14 | |
vpermt2w %ymm1, %ymm12, %ymm13 | |
vpbroadcastd -108760(%r9,%r13,2), %ymm12 | |
vpblendd $204, %ymm11, %ymm13, %ymm11 | |
vpmovsxbw .LCPI1_111(%rip), %ymm13 | |
vpermt2w %ymm0, %ymm13, %ymm11 | |
vmovdqa 448(%rsp), %xmm13 | |
vpblendd $128, %ymm12, %ymm11, %ymm11 | |
vpsrldq $10, %xmm3, %xmm12 | |
vpsrldq $14, %xmm3, %xmm3 | |
vmovdqa %ymm11, 192(%rsp) | |
vpsrldq $10, %xmm6, %xmm11 | |
vpsrldq $14, %xmm6, %xmm6 | |
vpunpcklwd %xmm11, %xmm12, %xmm11 | |
vpsrldq $10, %xmm4, %xmm12 | |
vpunpcklwd %xmm6, %xmm3, %xmm3 | |
vpunpckldq %xmm12, %xmm11, %xmm11 | |
vpbroadcastw -179158(%r9,%r13,2), %xmm12 | |
vpblendw $8, %xmm12, %xmm11, %xmm11 | |
vpsrldq $10, %xmm5, %xmm12 | |
vpunpcklqdq %xmm12, %xmm11, %xmm11 | |
vpsllq $16, %xmm7, %xmm12 | |
vpblendw $32, %xmm10, %xmm11, %xmm11 | |
vpsrlq $16, %xmm10, %xmm10 | |
vpblendd $8, %xmm12, %xmm11, %xmm11 | |
vpbroadcastw -153558(%r9,%r13,2), %xmm12 | |
vpblendw $128, %xmm12, %xmm11, %xmm11 | |
vpbroadcastw -198356(%r9,%r13,2), %xmm12 | |
vmovdqa64 %ymm11, %ymm16 | |
vpbroadcastw -191956(%r9,%r13,2), %xmm11 | |
vpunpcklwd %xmm11, %xmm12, %xmm11 | |
vpmovsxbw .LCPI1_112(%rip), %ymm12 | |
vinsertps $220, %xmm4, %xmm11, %xmm11 | |
vpsrldq $14, %xmm4, %xmm4 | |
vpunpckldq %xmm4, %xmm3, %xmm3 | |
vpbroadcastw -179154(%r9,%r13,2), %xmm4 | |
vpblendw $8, %xmm9, %xmm11, %xmm9 | |
vpslld $16, %xmm8, %xmm11 | |
vshufps $244, %xmm5, %xmm9, %xmm9 | |
vpblendw $32, %xmm10, %xmm9, %xmm9 | |
vpbroadcastd -147156(%r9,%r13,2), %ymm10 | |
vpblendd $8, %xmm7, %xmm9, %xmm9 | |
vpblendw $128, %xmm11, %xmm9, %xmm9 | |
vpbroadcastd -134356(%r9,%r13,2), %ymm11 | |
vpblendw $8, %xmm4, %xmm3, %xmm3 | |
vpsrldq $14, %xmm5, %xmm4 | |
vpunpcklqdq %xmm4, %xmm3, %xmm3 | |
vpbroadcastw -166354(%r9,%r13,2), %xmm4 | |
vpblendd $240, %ymm10, %ymm9, %ymm10 | |
vinserti128 $1, -108768(%r9,%r13,2), %ymm9, %ymm9 | |
vpermt2w %ymm2, %ymm12, %ymm10 | |
vpblendd $32, %ymm11, %ymm10, %ymm10 | |
vpmovsxbw .LCPI1_113(%rip), %ymm11 | |
vpblendw $32, %xmm4, %xmm3, %xmm3 | |
vpsrld $16, %xmm7, %xmm4 | |
vpblendd $8, %xmm4, %xmm3, %xmm3 | |
vmovdqa -172752(%r9,%r13,2), %xmm4 | |
vpblendw $128, %xmm8, %xmm3, %xmm3 | |
vmovdqa64 %ymm3, %ymm25 | |
vpbroadcastw -198352(%r9,%r13,2), %ymm3 | |
vpermt2w %ymm1, %ymm11, %ymm10 | |
vpbroadcastd -121556(%r9,%r13,2), %ymm11 | |
vpunpcklwd %xmm14, %xmm3, %xmm3 | |
vpunpckldq %xmm15, %xmm3, %xmm5 | |
vmovdqa -179152(%r9,%r13,2), %xmm3 | |
vpblendd $192, %ymm11, %ymm10, %ymm10 | |
vpmovsxbw .LCPI1_114(%rip), %ymm11 | |
insertq $48, $16, %xmm3, %xmm5 | |
vpunpcklqdq %xmm4, %xmm5, %xmm6 | |
vmovdqa -166352(%r9,%r13,2), %xmm5 | |
vpermt2w %ymm0, %ymm11, %ymm10 | |
vpblendd $136, %ymm9, %ymm10, %ymm9 | |
vpmovsxbw .LCPI1_115(%rip), %ymm10 | |
vmovdqa %ymm9, 160(%rsp) | |
vpbroadcastd -134352(%r9,%r13,2), %ymm9 | |
vpbroadcastw %xmm5, %xmm7 | |
vmovdqa %xmm5, %xmm12 | |
vmovdqa -153552(%r9,%r13,2), %xmm5 | |
vpblendw $32, %xmm7, %xmm6, %xmm6 | |
vpbroadcastd -159952(%r9,%r13,2), %xmm7 | |
vpbroadcastw %xmm5, %xmm8 | |
vpblendd $8, %xmm7, %xmm6, %xmm7 | |
vpblendw $128, %xmm8, %xmm7, %xmm7 | |
vmovdqa -147168(%r9,%r13,2), %ymm8 | |
vpblendd $240, %ymm8, %ymm7, %ymm7 | |
vpermt2w %ymm8, %ymm19, %ymm21 | |
vpermt2w %ymm2, %ymm10, %ymm7 | |
vpmovsxbw .LCPI1_116(%rip), %ymm10 | |
vpblendd $32, %ymm9, %ymm7, %ymm7 | |
vpbroadcastq -121552(%r9,%r13,2), %ymm9 | |
vpermt2w %ymm1, %ymm10, %ymm7 | |
vmovdqa 416(%rsp), %ymm10 | |
vpblendd $192, %ymm9, %ymm7, %ymm7 | |
vpmovsxbw .LCPI1_117(%rip), %ymm9 | |
vpermt2w %ymm0, %ymm9, %ymm7 | |
vpbroadcastd -108752(%r9,%r13,2), %ymm9 | |
vpblendd $128, %ymm9, %ymm7, %ymm7 | |
vpsrld $16, %xmm4, %xmm9 | |
vmovdqa %ymm7, 480(%rsp) | |
vmovdqa -179168(%r9,%r13,2), %ymm7 | |
vpermt2w %ymm7, %ymm26, %ymm10 | |
vpunpcklqdq %xmm9, %xmm10, %xmm9 | |
vpbroadcastw -166350(%r9,%r13,2), %xmm10 | |
vpblendw $32, %xmm10, %xmm9, %xmm10 | |
vmovapd -159952(%r9,%r13,2), %xmm9 | |
vpslldq $10, %xmm9, %xmm11 | |
vpblendd $8, %xmm11, %xmm10, %xmm10 | |
vpbroadcastw -153550(%r9,%r13,2), %xmm11 | |
vpblendw $128, %xmm11, %xmm10, %xmm10 | |
vpshuflw $85, %ymm8, %ymm11 | |
vpblendw $2, %ymm2, %ymm11, %ymm11 | |
vpblendd $240, %ymm11, %ymm10, %ymm6 | |
vmovdqa 128(%rsp), %ymm10 | |
vpmovsxbw .LCPI1_119(%rip), %ymm11 | |
vmovdqa64 %ymm6, %ymm30 | |
vmovdqa %xmm12, %xmm6 | |
vmovdqa64 %xmm6, %xmm26 | |
vpermt2w %ymm7, %ymm28, %ymm10 | |
vshufps $212, %xmm4, %xmm10, %xmm7 | |
vpslldq $6, %xmm12, %xmm10 | |
vpsrldq $10, %xmm14, %xmm12 | |
vpblendw $32, %xmm10, %xmm7, %xmm7 | |
vpbroadcastd -159948(%r9,%r13,2), %ymm10 | |
vpblendd $8, %xmm10, %xmm7, %xmm7 | |
vpslldq $10, %xmm5, %xmm10 | |
vpblendw $128, %xmm10, %xmm7, %xmm7 | |
vpbroadcastd -147148(%r9,%r13,2), %ymm10 | |
vpblendd $240, %ymm10, %ymm7, %ymm10 | |
vpmovsxbw .LCPI1_120(%rip), %ymm7 | |
vpermt2w %ymm2, %ymm7, %ymm10 | |
vmovdqa -134368(%r9,%r13,2), %ymm7 | |
vpblendd $32, %ymm7, %ymm10, %ymm10 | |
vpermt2w %ymm1, %ymm11, %ymm10 | |
vpbroadcastd -121548(%r9,%r13,2), %ymm11 | |
vpblendd $192, %ymm11, %ymm10, %ymm10 | |
vpmovsxbw .LCPI1_121(%rip), %ymm11 | |
vpermt2w %ymm0, %ymm11, %ymm10 | |
vpbroadcastd -108748(%r9,%r13,2), %ymm11 | |
vpblendd $128, %ymm11, %ymm10, %ymm10 | |
vpsrlq $48, %xmm14, %xmm11 | |
vmovdqa %ymm10, 128(%rsp) | |
vpsrlq $48, %xmm13, %xmm10 | |
vpunpcklwd %xmm11, %xmm10, %xmm10 | |
vpsrlq $48, %xmm15, %xmm11 | |
vpunpckldq %xmm11, %xmm10, %xmm10 | |
vpsrlq $48, %xmm4, %xmm11 | |
vpblendw $8, %xmm3, %xmm10, %xmm10 | |
vpunpcklqdq %xmm11, %xmm10, %xmm10 | |
vpbroadcastw -166346(%r9,%r13,2), %xmm11 | |
vpblendw $32, %xmm11, %xmm10, %xmm10 | |
vpmovzxwd %xmm9, %xmm11 | |
vpblendd $8, %xmm11, %xmm10, %xmm10 | |
vpbroadcastw -153546(%r9,%r13,2), %xmm11 | |
vpblendw $128, %xmm11, %xmm10, %xmm10 | |
vpsrldq $10, %xmm13, %xmm11 | |
vpsrldq $14, %xmm13, %xmm13 | |
vpunpcklwd %xmm12, %xmm11, %xmm11 | |
vpsrldq $10, %xmm15, %xmm12 | |
vpunpckldq %xmm12, %xmm11, %xmm11 | |
vpbroadcastw -179142(%r9,%r13,2), %xmm12 | |
vpblendw $8, %xmm12, %xmm11, %xmm11 | |
vpsrldq $10, %xmm4, %xmm12 | |
vpunpcklqdq %xmm12, %xmm11, %xmm11 | |
vpsllq $16, %xmm9, %xmm12 | |
vpsrld $16, %xmm9, %xmm9 | |
vpblendw $32, %xmm6, %xmm11, %xmm11 | |
vpblendd $8, %xmm12, %xmm11, %xmm11 | |
vpbroadcastw -153542(%r9,%r13,2), %xmm12 | |
vpblendw $128, %xmm12, %xmm11, %xmm11 | |
vpsrldq $14, %xmm14, %xmm12 | |
vpunpcklwd %xmm12, %xmm13, %xmm12 | |
vpsrldq $14, %xmm15, %xmm13 | |
vpmovsxbw .LCPI1_102(%rip), %xmm15 | |
vpunpckldq %xmm13, %xmm12, %xmm12 | |
vpbroadcastw -179138(%r9,%r13,2), %xmm13 | |
vpblendw $8, %xmm13, %xmm12, %xmm12 | |
vpsrldq $14, %xmm4, %xmm13 | |
vpunpcklqdq %xmm13, %xmm12, %xmm12 | |
vpbroadcastw -166338(%r9,%r13,2), %xmm13 | |
vpblendw $32, %xmm13, %xmm12, %xmm12 | |
vpblendd $8, %xmm9, %xmm12, %xmm9 | |
vpmovsxbw .LCPI1_123(%rip), %ymm12 | |
vpblendw $128, %xmm5, %xmm9, %xmm9 | |
vpermt2w %ymm8, %ymm12, %ymm31 | |
vpmovsxbw .LCPI1_124(%rip), %ymm12 | |
vpermt2w %ymm8, %ymm12, %ymm16 | |
vpmovsxbw .LCPI1_125(%rip), %ymm12 | |
vpermt2w %ymm8, %ymm12, %ymm25 | |
vpmovsxbw .LCPI1_126(%rip), %ymm12 | |
vpermt2w %ymm8, %ymm12, %ymm11 | |
vpmovsxbw .LCPI1_127(%rip), %ymm12 | |
vpermt2w %ymm8, %ymm12, %ymm9 | |
vpbroadcastd .LCPI1_66(%rip), %ymm12 | |
vpermt2w %ymm2, %ymm12, %ymm8 | |
vpmovsxbw .LCPI1_128(%rip), %ymm12 | |
vpermt2w %ymm7, %ymm12, %ymm8 | |
vpmovsxbw .LCPI1_129(%rip), %ymm12 | |
vpblendw $8, %ymm1, %ymm8, %ymm8 | |
vpblendd $240, %ymm8, %ymm10, %ymm13 | |
vpbroadcastw -198344(%r9,%r13,2), %ymm8 | |
vpbroadcastw -191944(%r9,%r13,2), %xmm10 | |
vpunpcklwd %xmm10, %xmm8, %xmm8 | |
vunpcklps -185544(%r9,%r13,2){1to4}, %xmm8, %xmm8 | |
vpsrldq $2, %xmm3, %xmm10 | |
vpsrldq $6, %xmm3, %xmm3 | |
vpblendw $8, %xmm10, %xmm8, %xmm8 | |
vpslld $16, %xmm6, %xmm10 | |
vpblendd $3, %xmm8, %xmm4, %xmm8 | |
vpblendw $32, %xmm10, %xmm8, %xmm8 | |
vpbroadcastd -159944(%r9,%r13,2), %xmm10 | |
vpblendd $8, %xmm10, %xmm8, %xmm8 | |
vpsllq $48, %xmm5, %xmm10 | |
vpblendw $128, %xmm10, %xmm8, %xmm8 | |
vpbroadcastq -147144(%r9,%r13,2), %ymm10 | |
vpblendd $240, %ymm10, %ymm8, %ymm8 | |
vpmovsxbw .LCPI1_130(%rip), %ymm10 | |
vpermt2w %ymm2, %ymm10, %ymm8 | |
vpbroadcastd -134344(%r9,%r13,2), %ymm10 | |
vpblendd $32, %ymm10, %ymm8, %ymm10 | |
vpmovsxbw .LCPI1_131(%rip), %ymm8 | |
vpermt2w %ymm1, %ymm8, %ymm10 | |
vmovdqa -121568(%r9,%r13,2), %ymm8 | |
vpblendd $192, %ymm8, %ymm10, %ymm10 | |
vpermt2w %ymm0, %ymm12, %ymm10 | |
vpbroadcastd -108744(%r9,%r13,2), %ymm12 | |
vpblendd $128, %ymm12, %ymm10, %ymm14 | |
vpmovsxbw .LCPI1_132(%rip), %ymm12 | |
vmovdqa %ymm11, %ymm10 | |
vpermt2w %ymm2, %ymm12, %ymm10 | |
vpmovsxbw .LCPI1_133(%rip), %ymm12 | |
vpermt2w %ymm7, %ymm12, %ymm10 | |
vpmovsxbw .LCPI1_134(%rip), %ymm12 | |
vpermt2w %ymm1, %ymm12, %ymm10 | |
vpmovsxbw .LCPI1_135(%rip), %ymm12 | |
vpermt2w %ymm8, %ymm12, %ymm10 | |
vpbroadcastw -191940(%r9,%r13,2), %xmm12 | |
vpblendw $32, %ymm0, %ymm10, %ymm10 | |
vpblendd $240, %ymm10, %ymm11, %ymm6 | |
vpbroadcastw -198340(%r9,%r13,2), %ymm10 | |
vpbroadcastd -185540(%r9,%r13,2), %ymm11 | |
vpunpcklwd %xmm12, %xmm10, %xmm10 | |
vpmovsxbw .LCPI1_109(%rip), %ymm12 | |
vpblendd $2, %xmm11, %xmm10, %xmm10 | |
vmovdqa -38368(%r9,%r13,2), %ymm11 | |
vpblendw $8, %xmm3, %xmm10, %xmm3 | |
vmovaps -57568(%r9,%r13,2), %xmm10 | |
vshufps $244, %xmm4, %xmm3, %xmm3 | |
vpsrlq $16, %xmm26, %xmm4 | |
vpblendw $32, %xmm4, %xmm3, %xmm3 | |
vpbroadcastd -159940(%r9,%r13,2), %ymm4 | |
vpblendd $8, %xmm4, %xmm3, %xmm3 | |
vpslld $16, %xmm5, %xmm4 | |
vmovdqa64 %ymm21, %ymm5 | |
vpermt2w %ymm2, %ymm27, %ymm5 | |
vpblendw $128, %xmm4, %xmm3, %xmm3 | |
vpbroadcastd -147140(%r9,%r13,2), %ymm4 | |
vpermt2w %ymm7, %ymm20, %ymm5 | |
vmovdqa64 %ymm30, %ymm20 | |
vpmovsxbw .LCPI1_101(%rip), %ymm30 | |
vpblendd $240, %ymm4, %ymm3, %ymm3 | |
vpmovsxbw .LCPI1_136(%rip), %ymm4 | |
vpermt2w %ymm1, %ymm30, %ymm5 | |
vmovdqa64 -51152(%r9,%r13,2), %xmm30 | |
vpermt2w %ymm8, %ymm23, %ymm5 | |
vmovdqa64 %ymm13, %ymm23 | |
vmovdqa -25568(%r9,%r13,2), %ymm13 | |
vpermt2w %ymm2, %ymm4, %ymm3 | |
vpbroadcastd -134340(%r9,%r13,2), %ymm4 | |
vpblendd $32, %ymm4, %ymm3, %ymm3 | |
vpmovsxbw .LCPI1_137(%rip), %ymm4 | |
vpermt2w %ymm1, %ymm4, %ymm3 | |
vpbroadcastd -121540(%r9,%r13,2), %ymm4 | |
vpblendd $192, %ymm4, %ymm3, %ymm3 | |
vpmovsxbw .LCPI1_138(%rip), %ymm4 | |
vpermt2w %ymm2, %ymm4, %ymm31 | |
vpmovsxbw .LCPI1_139(%rip), %ymm4 | |
vpermt2w %ymm2, %ymm4, %ymm16 | |
vpmovsxbw .LCPI1_140(%rip), %ymm4 | |
vpermt2w %ymm2, %ymm4, %ymm25 | |
vpmovsxbw .LCPI1_141(%rip), %ymm4 | |
vpermt2w %ymm2, %ymm4, %ymm9 | |
vpmovsxbw .LCPI1_142(%rip), %ymm2 | |
vpmovsxbw .LCPI1_148(%rip), %ymm4 | |
vpermt2w %ymm7, %ymm2, %ymm31 | |
vpmovsxbw .LCPI1_143(%rip), %ymm2 | |
vpermt2w %ymm7, %ymm2, %ymm16 | |
vpmovsxbw .LCPI1_144(%rip), %ymm2 | |
vpermt2w %ymm7, %ymm2, %ymm25 | |
vpmovsxbw .LCPI1_145(%rip), %ymm2 | |
vpermt2w %ymm7, %ymm2, %ymm20 | |
vpmovsxbw .LCPI1_146(%rip), %ymm2 | |
vpermt2w %ymm7, %ymm2, %ymm9 | |
vpmovsxbw .LCPI1_147(%rip), %ymm2 | |
vpermt2w %ymm1, %ymm2, %ymm31 | |
vpmovsxbw .LCPI1_149(%rip), %ymm2 | |
vpermt2w %ymm1, %ymm2, %ymm16 | |
vpmovsxbw .LCPI1_150(%rip), %ymm2 | |
vpermt2w %ymm1, %ymm2, %ymm25 | |
vpmovsxbw .LCPI1_151(%rip), %ymm2 | |
vpermt2w %ymm1, %ymm2, %ymm20 | |
vmovdqa %ymm9, %ymm2 | |
vpermt2w %ymm1, %ymm4, %ymm2 | |
vpmovsxbw .LCPI1_152(%rip), %ymm1 | |
vpermt2w %ymm8, %ymm1, %ymm31 | |
vpmovsxbw .LCPI1_153(%rip), %ymm1 | |
vpermt2w %ymm8, %ymm1, %ymm16 | |
vpmovsxbw .LCPI1_154(%rip), %ymm1 | |
vpermt2w %ymm8, %ymm1, %ymm25 | |
vpmovsxbw .LCPI1_155(%rip), %ymm1 | |
vpermt2w %ymm8, %ymm1, %ymm20 | |
vpmovsxbw .LCPI1_156(%rip), %ymm1 | |
vpermt2w %ymm8, %ymm1, %ymm23 | |
vpmovsxbw .LCPI1_157(%rip), %ymm1 | |
vpermt2w %ymm8, %ymm1, %ymm2 | |
vpmovsxbw .LCPI1_158(%rip), %ymm1 | |
vpbroadcastd -31968(%r9,%r13,2), %ymm8 | |
vpermt2w %ymm0, %ymm1, %ymm5 | |
vpmovsxbw .LCPI1_160(%rip), %ymm1 | |
vmovdqa64 %ymm5, %ymm27 | |
vpermt2w %ymm0, %ymm1, %ymm31 | |
vpmovsxbw .LCPI1_161(%rip), %ymm1 | |
vpermt2w %ymm0, %ymm1, %ymm16 | |
vpmovsxbw .LCPI1_162(%rip), %ymm1 | |
vmovdqa64 %ymm16, %ymm21 | |
vmovdqa64 -19168(%r9,%r13,2), %xmm16 | |
vpermt2w %ymm0, %ymm1, %ymm25 | |
vpmovsxbw .LCPI1_163(%rip), %ymm1 | |
vmovdqa64 %ymm25, %ymm19 | |
vpmovsxbw .LCPI1_115(%rip), %ymm25 | |
vpermt2w %ymm0, %ymm1, %ymm20 | |
vpmovsxbw .LCPI1_164(%rip), %ymm1 | |
vpermt2w %ymm0, %ymm1, %ymm23 | |
vpmovsxbw .LCPI1_165(%rip), %ymm1 | |
vpermt2w %ymm0, %ymm1, %ymm3 | |
vpmovsxbw .LCPI1_166(%rip), %ymm1 | |
vpermt2w %ymm0, %ymm1, %ymm2 | |
vmovdqa -108768(%r9,%r13,2), %ymm0 | |
vpmovsxbw .LCPI1_159(%rip), %ymm1 | |
vpermt2w %ymm0, %ymm1, %ymm27 | |
vpmovsxbw .LCPI1_167(%rip), %ymm1 | |
vpblendd $128, %ymm0, %ymm3, %ymm4 | |
vmovdqa 224(%rsp), %ymm3 | |
vpermt2w %ymm0, %ymm1, %ymm31 | |
vpmovsxbw .LCPI1_169(%rip), %ymm1 | |
vpermt2w %ymm0, %ymm1, %ymm21 | |
vpmovsxbw .LCPI1_170(%rip), %ymm1 | |
vpermt2w %ymm0, %ymm1, %ymm19 | |
vpmovsxbw .LCPI1_171(%rip), %ymm1 | |
vpermt2w %ymm0, %ymm1, %ymm20 | |
vpmovsxbw .LCPI1_172(%rip), %ymm1 | |
vpermt2w %ymm0, %ymm1, %ymm23 | |
vpmovsxbw .LCPI1_173(%rip), %ymm1 | |
vpermt2w %ymm0, %ymm1, %ymm6 | |
vpmovsxbw .LCPI1_168(%rip), %ymm1 | |
vpermt2w %ymm0, %ymm1, %ymm2 | |
vmovdqa -102368(%r9,%r13,2), %ymm0 | |
vmovdqa 256(%rsp), %ymm1 | |
vpermt2w %ymm0, %ymm22, %ymm1 | |
vmovdqa64 752(%rsp), %xmm22 | |
vmovdqa %ymm1, 256(%rsp) | |
vpmovsxbw .LCPI1_174(%rip), %ymm1 | |
vpermt2w %ymm0, %ymm1, %ymm27 | |
vpmovsxbw .LCPI1_175(%rip), %ymm1 | |
vpermt2w %ymm0, %ymm1, %ymm3 | |
vpmovsxbw .LCPI1_176(%rip), %ymm1 | |
vmovdqa %ymm3, 224(%rsp) | |
vmovdqa 192(%rsp), %ymm3 | |
vpermt2w %ymm0, %ymm1, %ymm31 | |
vpmovsxbw .LCPI1_177(%rip), %ymm1 | |
vpermt2w %ymm0, %ymm1, %ymm3 | |
vpmovsxbw .LCPI1_178(%rip), %ymm1 | |
vmovdqa %ymm3, 192(%rsp) | |
vmovdqa 160(%rsp), %ymm3 | |
vpermt2w %ymm0, %ymm1, %ymm21 | |
vpmovsxbw .LCPI1_179(%rip), %ymm1 | |
vpermt2w %ymm0, %ymm1, %ymm3 | |
vpmovsxbw .LCPI1_180(%rip), %ymm1 | |
vmovdqa %ymm3, 160(%rsp) | |
vmovdqa 480(%rsp), %ymm3 | |
vpermt2w %ymm0, %ymm1, %ymm19 | |
vpmovsxbw .LCPI1_181(%rip), %ymm1 | |
vpermt2w %ymm0, %ymm1, %ymm3 | |
vpmovsxbw .LCPI1_182(%rip), %ymm1 | |
vmovdqa %ymm3, 480(%rsp) | |
vmovdqa 128(%rsp), %ymm3 | |
vpermt2w %ymm0, %ymm1, %ymm20 | |
vpmovsxbw .LCPI1_183(%rip), %ymm1 | |
vpermt2w %ymm0, %ymm1, %ymm3 | |
vpmovsxbw .LCPI1_184(%rip), %ymm1 | |
vmovdqa %ymm3, 128(%rsp) | |
vpermt2w %ymm0, %ymm1, %ymm23 | |
vpmovsxbw .LCPI1_185(%rip), %ymm1 | |
vpermt2w %ymm0, %ymm1, %ymm14 | |
vpmovsxbw .LCPI1_186(%rip), %ymm1 | |
vmovdqa %ymm14, 448(%rsp) | |
vmovdqa -12768(%r9,%r13,2), %ymm14 | |
vpermt2w %ymm0, %ymm1, %ymm6 | |
vpmovsxbw .LCPI1_187(%rip), %ymm1 | |
vmovdqa %ymm6, 416(%rsp) | |
vpermt2w %ymm0, %ymm1, %ymm4 | |
vmovdqa -83168(%r9,%r13,2), %xmm1 | |
vmovdqa %ymm4, 384(%rsp) | |
vpblendw $128, %ymm0, %ymm2, %ymm4 | |
vmovdqa -95968(%r9,%r13,2), %xmm0 | |
vmovdqa -89568(%r9,%r13,2), %xmm2 | |
vpblendd $240, %ymm4, %ymm9, %ymm4 | |
vmovdqa -63968(%r9,%r13,2), %xmm9 | |
vmovdqa %ymm4, 352(%rsp) | |
vmovdqa -70368(%r9,%r13,2), %xmm4 | |
vpunpcklwd %xmm2, %xmm0, %xmm3 | |
vpunpckldq %xmm1, %xmm3, %xmm5 | |
vmovdqa -76768(%r9,%r13,2), %xmm3 | |
vpbroadcastw %xmm9, %xmm6 | |
insertq $48, $16, %xmm3, %xmm5 | |
vpunpcklqdq %xmm4, %xmm5, %xmm5 | |
vpblendw $32, %xmm6, %xmm5, %xmm5 | |
vmovdqa -51168(%r9,%r13,2), %xmm6 | |
vinsertps $48, %xmm10, %xmm5, %xmm5 | |
vpbroadcastw %xmm6, %xmm7 | |
vpblendw $128, %xmm7, %xmm5, %xmm7 | |
vinserti128 $1, -44768(%r9,%r13,2), %ymm7, %ymm5 | |
vinserti32x4 $1, %xmm16, %ymm7, %ymm7 | |
vpermt2w %ymm11, %ymm29, %ymm5 | |
vpblendd $32, %ymm8, %ymm5, %ymm8 | |
vpmovsxbw .LCPI1_95(%rip), %ymm5 | |
vpermt2w %ymm13, %ymm5, %ymm8 | |
vpmovsxbw .LCPI1_96(%rip), %ymm5 | |
vshufpd $2, %ymm7, %ymm8, %ymm7 | |
vpbroadcastd -6368(%r9,%r13,2), %ymm8 | |
vpermt2w %ymm14, %ymm5, %ymm7 | |
vpblendd $128, %ymm8, %ymm7, %ymm5 | |
vpsrld $16, %xmm0, %xmm7 | |
vpsrld $16, %xmm1, %xmm8 | |
vpblendw $2, %xmm2, %xmm7, %xmm7 | |
vmovdqa %ymm5, 960(%rsp) | |
vpunpckldq %xmm8, %xmm7, %xmm7 | |
vpsrld $16, %xmm4, %xmm8 | |
vpermt2w %xmm3, %xmm15, %xmm7 | |
vpbroadcastd -19164(%r9,%r13,2), %ymm15 | |
vpunpcklqdq %xmm8, %xmm7, %xmm7 | |
vpbroadcastw -63966(%r9,%r13,2), %xmm8 | |
vpblendw $32, %xmm8, %xmm7, %xmm7 | |
vpslldq $10, %xmm10, %xmm8 | |
vpblendd $8, %xmm8, %xmm7, %xmm7 | |
vpbroadcastw -51166(%r9,%r13,2), %xmm8 | |
vpblendw $128, %xmm8, %xmm7, %xmm5 | |
vpbroadcastw -89564(%r9,%r13,2), %xmm7 | |
vpbroadcastw -95964(%r9,%r13,2), %xmm8 | |
vmovdqa64 %ymm5, %ymm28 | |
vpmovsxbw .LCPI1_108(%rip), %ymm5 | |
vpunpcklwd %xmm7, %xmm8, %xmm7 | |
vpslldq $6, %xmm9, %xmm8 | |
vpblendd $2, %xmm1, %xmm7, %xmm7 | |
vpermt2w %xmm3, %xmm17, %xmm7 | |
vpmovsxbw .LCPI1_123(%rip), %ymm17 | |
vshufps $212, %xmm4, %xmm7, %xmm7 | |
vpblendw $32, %xmm8, %xmm7, %xmm7 | |
vpslldq $10, %xmm6, %xmm8 | |
vinsertps $112, %xmm10, %xmm7, %xmm7 | |
vpblendw $128, %xmm8, %xmm7, %xmm7 | |
vpbroadcastd -44764(%r9,%r13,2), %ymm8 | |
vpblendd $240, %ymm8, %ymm7, %ymm8 | |
vinserti128 $1, -31968(%r9,%r13,2), %ymm7, %ymm7 | |
vpermt2w %ymm11, %ymm18, %ymm8 | |
vpmovsxbw .LCPI1_103(%rip), %ymm18 | |
vpblendd $34, %ymm7, %ymm8, %ymm7 | |
vpbroadcastd -6364(%r9,%r13,2), %ymm8 | |
vpermt2w %ymm13, %ymm5, %ymm7 | |
vpblendd $192, %ymm15, %ymm7, %ymm7 | |
vpbroadcastw -95960(%r9,%r13,2), %xmm15 | |
vpermt2w %ymm14, %ymm12, %ymm7 | |
vpblendd $128, %ymm8, %ymm7, %ymm5 | |
vpsrlq $48, %xmm2, %xmm7 | |
vpsrlq $48, %xmm0, %xmm8 | |
vpunpcklwd %xmm7, %xmm8, %xmm7 | |
vpsrlq $48, %xmm1, %xmm8 | |
vmovdqa %ymm5, 928(%rsp) | |
vpbroadcastq -44760(%r9,%r13,2), %ymm5 | |
vpunpckldq %xmm8, %xmm7, %xmm7 | |
vpsrlq $48, %xmm4, %xmm8 | |
vpblendw $8, %xmm3, %xmm7, %xmm7 | |
vpunpcklqdq %xmm8, %xmm7, %xmm7 | |
vpbroadcastw -63962(%r9,%r13,2), %xmm8 | |
vpblendw $32, %xmm8, %xmm7, %xmm7 | |
vpmovzxwd %xmm10, %xmm8 | |
vpblendd $8, %xmm8, %xmm7, %xmm7 | |
vpbroadcastw -51162(%r9,%r13,2), %xmm8 | |
vpblendw $128, %xmm8, %xmm7, %xmm12 | |
vpbroadcastw -89560(%r9,%r13,2), %xmm7 | |
vpmovsxbw .LCPI1_111(%rip), %ymm8 | |
vpunpcklwd %xmm7, %xmm15, %xmm7 | |
vpsrldq $2, %xmm3, %xmm15 | |
vpsrldq $6, %xmm3, %xmm3 | |
vinsertps $156, %xmm1, %xmm7, %xmm7 | |
vpblendw $8, %xmm15, %xmm7, %xmm7 | |
vpslld $16, %xmm9, %xmm15 | |
vpblendd $3, %xmm7, %xmm4, %xmm7 | |
vpblendw $32, %xmm15, %xmm7, %xmm7 | |
vpsllq $48, %xmm6, %xmm15 | |
vinsertps $176, %xmm10, %xmm7, %xmm7 | |
vpblendw $128, %xmm15, %xmm7, %xmm7 | |
vpbroadcastd -31960(%r9,%r13,2), %ymm15 | |
vpblendd $240, %ymm5, %ymm7, %ymm5 | |
vinserti32x4 $1, %xmm16, %ymm7, %ymm7 | |
vpmovsxbw .LCPI1_112(%rip), %ymm16 | |
vpermt2w %ymm11, %ymm24, %ymm5 | |
vmovdqa64 -63952(%r9,%r13,2), %xmm24 | |
vpblendd $32, %ymm15, %ymm5, %ymm5 | |
vpbroadcastd -6360(%r9,%r13,2), %ymm15 | |
vpermt2w %ymm13, %ymm18, %ymm5 | |
vpmovsxbw .LCPI1_113(%rip), %ymm18 | |
vpblendd $204, %ymm7, %ymm5, %ymm5 | |
vpsrldq $10, %xmm0, %xmm7 | |
vpsrldq $14, %xmm0, %xmm0 | |
vpermt2w %ymm14, %ymm8, %ymm5 | |
vpblendd $128, %ymm15, %ymm5, %ymm5 | |
vpbroadcastw -95956(%r9,%r13,2), %xmm15 | |
vmovdqa %ymm5, 896(%rsp) | |
vpsrldq $10, %xmm2, %xmm5 | |
vpsrldq $14, %xmm2, %xmm2 | |
vpunpcklwd %xmm5, %xmm7, %xmm5 | |
vpsrldq $10, %xmm1, %xmm7 | |
vpunpcklwd %xmm2, %xmm0, %xmm0 | |
vmovdqa 1696(%rsp), %ymm2 | |
vpunpckldq %xmm7, %xmm5, %xmm5 | |
vpbroadcastw -76758(%r9,%r13,2), %xmm7 | |
vpblendw $8, %xmm7, %xmm5, %xmm5 | |
vpsrldq $10, %xmm4, %xmm7 | |
vpunpcklqdq %xmm7, %xmm5, %xmm5 | |
vpsllq $16, %xmm10, %xmm7 | |
vpblendw $32, %xmm9, %xmm5, %xmm5 | |
vpblendd $8, %xmm7, %xmm5, %xmm5 | |
vpbroadcastw -51158(%r9,%r13,2), %xmm7 | |
vpblendw $128, %xmm7, %xmm5, %xmm8 | |
vpbroadcastw -89556(%r9,%r13,2), %xmm5 | |
vpmovsxbw .LCPI1_114(%rip), %ymm7 | |
vpunpcklwd %xmm5, %xmm15, %xmm5 | |
vmovdqa 768(%rsp), %xmm15 | |
vinsertps $220, %xmm1, %xmm5, %xmm5 | |
vpsrldq $14, %xmm1, %xmm1 | |
vpblendw $8, %xmm3, %xmm5, %xmm3 | |
vpsrlq $16, %xmm9, %xmm5 | |
vpslld $16, %xmm6, %xmm9 | |
vpunpckldq %xmm1, %xmm0, %xmm0 | |
vpbroadcastw -76754(%r9,%r13,2), %xmm1 | |
vshufps $244, %xmm4, %xmm3, %xmm3 | |
vpblendw $32, %xmm5, %xmm3, %xmm3 | |
vpbroadcastd -44756(%r9,%r13,2), %ymm5 | |
vpblendd $8, %xmm10, %xmm3, %xmm3 | |
vpblendw $128, %xmm9, %xmm3, %xmm3 | |
vpbroadcastd -31956(%r9,%r13,2), %ymm9 | |
vpblendw $8, %xmm1, %xmm0, %xmm0 | |
vpsrldq $14, %xmm4, %xmm1 | |
vmovdqa -31968(%r9,%r13,2), %ymm4 | |
vpunpcklqdq %xmm1, %xmm0, %xmm0 | |
vpbroadcastw -63954(%r9,%r13,2), %xmm1 | |
vpblendd $240, %ymm5, %ymm3, %ymm5 | |
vinserti128 $1, -6368(%r9,%r13,2), %ymm3, %ymm3 | |
vpermt2w %ymm11, %ymm16, %ymm5 | |
vpmovsxbw .LCPI1_116(%rip), %ymm16 | |
vpblendd $32, %ymm9, %ymm5, %ymm5 | |
vpbroadcastd -19156(%r9,%r13,2), %ymm9 | |
vpermt2w %ymm13, %ymm18, %ymm5 | |
vpblendw $32, %xmm1, %xmm0, %xmm0 | |
vpsrld $16, %xmm10, %xmm1 | |
vmovdqa -44768(%r9,%r13,2), %ymm10 | |
vpmovsxbw .LCPI1_117(%rip), %ymm18 | |
vpblendd $8, %xmm1, %xmm0, %xmm0 | |
vmovdqa -76752(%r9,%r13,2), %xmm1 | |
vpblendd $192, %ymm9, %ymm5, %ymm5 | |
vmovdqa -70352(%r9,%r13,2), %xmm9 | |
vpermt2w %ymm10, %ymm17, %ymm12 | |
vpmovsxbw .LCPI1_140(%rip), %ymm17 | |
vpermt2w %ymm14, %ymm7, %ymm5 | |
vmovdqa64 %xmm1, %xmm29 | |
vpblendd $136, %ymm3, %ymm5, %ymm3 | |
vpblendw $128, %xmm6, %xmm0, %xmm5 | |
vpbroadcastw -95952(%r9,%r13,2), %ymm0 | |
vpmovsxbw .LCPI1_119(%rip), %ymm6 | |
vmovdqa %ymm3, 832(%rsp) | |
vpmovsxbw .LCPI1_120(%rip), %ymm3 | |
vpunpcklwd %xmm15, %xmm0, %xmm0 | |
vpunpckldq %xmm22, %xmm0, %xmm0 | |
insertq $48, $16, %xmm1, %xmm0 | |
vpbroadcastw %xmm24, %xmm1 | |
vpunpcklqdq %xmm9, %xmm0, %xmm0 | |
vpblendw $32, %xmm1, %xmm0, %xmm0 | |
vpbroadcastd -57552(%r9,%r13,2), %xmm1 | |
vpblendd $8, %xmm1, %xmm0, %xmm0 | |
vpbroadcastw %xmm30, %xmm1 | |
vpblendw $128, %xmm1, %xmm0, %xmm0 | |
vpbroadcastd -31952(%r9,%r13,2), %ymm1 | |
vpblendd $240, %ymm10, %ymm0, %ymm0 | |
vpermt2w %ymm11, %ymm25, %ymm0 | |
vpmovsxbw .LCPI1_121(%rip), %ymm25 | |
vpblendd $32, %ymm1, %ymm0, %ymm0 | |
vpbroadcastq -19152(%r9,%r13,2), %ymm1 | |
vpermt2w %ymm13, %ymm16, %ymm0 | |
vmovapd -57552(%r9,%r13,2), %xmm16 | |
vpblendd $192, %ymm1, %ymm0, %ymm0 | |
vpbroadcastd -6352(%r9,%r13,2), %ymm1 | |
vpermt2w %ymm14, %ymm18, %ymm0 | |
vpmovsxbw .LCPI1_128(%rip), %ymm18 | |
vpblendd $128, %ymm1, %ymm0, %ymm0 | |
vpmovsxbw .LCPI1_110(%rip), %xmm1 | |
vmovdqa %ymm0, 800(%rsp) | |
vmovdqa -76768(%r9,%r13,2), %ymm0 | |
vpermt2w %ymm0, %ymm1, %ymm2 | |
vpsrld $16, %xmm9, %xmm1 | |
vpunpcklqdq %xmm1, %xmm2, %xmm1 | |
vpbroadcastw -63950(%r9,%r13,2), %xmm2 | |
vpblendw $32, %xmm2, %xmm1, %xmm1 | |
vpslldq $10, %xmm16, %xmm2 | |
vpblendd $8, %xmm2, %xmm1, %xmm1 | |
vpbroadcastw -51150(%r9,%r13,2), %xmm2 | |
vpblendw $128, %xmm2, %xmm1, %xmm1 | |
vpshuflw $85, %ymm10, %ymm2 | |
vpblendw $2, %ymm11, %ymm2, %ymm2 | |
vpblendd $240, %ymm2, %ymm1, %ymm7 | |
vpmovsxbw .LCPI1_100(%rip), %xmm2 | |
vmovdqa 1728(%rsp), %ymm1 | |
vpermt2w %ymm0, %ymm2, %ymm1 | |
vpsrldq $10, %xmm15, %xmm2 | |
vshufps $212, %xmm9, %xmm1, %xmm0 | |
vpslldq $6, %xmm24, %xmm1 | |
vpblendw $32, %xmm1, %xmm0, %xmm0 | |
vpbroadcastd -57548(%r9,%r13,2), %ymm1 | |
vpblendd $8, %xmm1, %xmm0, %xmm0 | |
vpslldq $10, %xmm30, %xmm1 | |
vpblendw $128, %xmm1, %xmm0, %xmm0 | |
vpbroadcastd -44748(%r9,%r13,2), %ymm1 | |
vpblendd $240, %ymm1, %ymm0, %ymm0 | |
vpbroadcastd -19148(%r9,%r13,2), %ymm1 | |
vpermt2w %ymm11, %ymm3, %ymm0 | |
vmovdqa 784(%rsp), %xmm3 | |
vpblendd $32, %ymm4, %ymm0, %ymm0 | |
vpermt2w %ymm13, %ymm6, %ymm0 | |
vmovdqa64 %xmm22, %xmm6 | |
vpblendd $192, %ymm1, %ymm0, %ymm0 | |
vpbroadcastd -6348(%r9,%r13,2), %ymm1 | |
vpermt2w %ymm14, %ymm25, %ymm0 | |
vpblendd $128, %ymm1, %ymm0, %ymm0 | |
vpsrlq $48, %xmm15, %xmm1 | |
vmovdqa64 %ymm0, %ymm25 | |
vpsrlq $48, %xmm3, %xmm0 | |
vpunpcklwd %xmm1, %xmm0, %xmm0 | |
vpsrlq $48, %xmm22, %xmm1 | |
vmovapd %xmm15, %xmm22 | |
vmovdqa %xmm6, %xmm15 | |
vpunpckldq %xmm1, %xmm0, %xmm0 | |
vmovdqa64 %xmm29, %xmm1 | |
vpblendw $8, %xmm1, %xmm0, %xmm0 | |
vpsrlq $48, %xmm9, %xmm1 | |
vpunpcklqdq %xmm1, %xmm0, %xmm0 | |
vpbroadcastw -63946(%r9,%r13,2), %xmm1 | |
vpblendw $32, %xmm1, %xmm0, %xmm0 | |
vpmovzxwd %xmm16, %xmm1 | |
vpblendd $8, %xmm1, %xmm0, %xmm0 | |
vpbroadcastw -51146(%r9,%r13,2), %xmm1 | |
vpblendw $128, %xmm1, %xmm0, %xmm0 | |
vpsrldq $10, %xmm3, %xmm1 | |
vpunpcklwd %xmm2, %xmm1, %xmm1 | |
vpsrldq $10, %xmm6, %xmm2 | |
vpunpckldq %xmm2, %xmm1, %xmm1 | |
vpbroadcastw -76742(%r9,%r13,2), %xmm2 | |
vpblendw $8, %xmm2, %xmm1, %xmm1 | |
vpsrldq $10, %xmm9, %xmm2 | |
vpunpcklqdq %xmm2, %xmm1, %xmm1 | |
vmovdqa64 %xmm24, %xmm2 | |
vpblendw $32, %xmm2, %xmm1, %xmm1 | |
vpsllq $16, %xmm16, %xmm2 | |
vpblendd $8, %xmm2, %xmm1, %xmm1 | |
vpbroadcastw -51142(%r9,%r13,2), %xmm2 | |
vpblendw $128, %xmm2, %xmm1, %xmm6 | |
vpsrldq $14, %xmm22, %xmm1 | |
vpsrldq $14, %xmm3, %xmm2 | |
vpmovsxbw .LCPI1_125(%rip), %ymm3 | |
vmovdqa64 %xmm29, %xmm22 | |
vpunpcklwd %xmm1, %xmm2, %xmm1 | |
vpsrldq $14, %xmm15, %xmm2 | |
vmovdqa64 %xmm30, %xmm15 | |
vpmovsxbw .LCPI1_131(%rip), %ymm30 | |
vpunpckldq %xmm2, %xmm1, %xmm1 | |
vpbroadcastw -76738(%r9,%r13,2), %xmm2 | |
vpermt2w %ymm10, %ymm3, %ymm5 | |
vpblendw $8, %xmm2, %xmm1, %xmm1 | |
vpsrldq $14, %xmm9, %xmm2 | |
vpunpcklqdq %xmm2, %xmm1, %xmm1 | |
vpbroadcastw -63938(%r9,%r13,2), %xmm2 | |
vpermt2w %ymm11, %ymm17, %ymm5 | |
vpmovsxbw .LCPI1_145(%rip), %ymm17 | |
vpblendw $32, %xmm2, %xmm1, %xmm1 | |
vpsrld $16, %xmm16, %xmm2 | |
vpmovsxbw .LCPI1_129(%rip), %ymm16 | |
vpblendd $8, %xmm2, %xmm1, %xmm1 | |
vpmovsxbw .LCPI1_97(%rip), %ymm2 | |
vpermt2w %ymm4, %ymm17, %ymm7 | |
vpmovsxbw .LCPI1_151(%rip), %ymm17 | |
vpblendw $128, %xmm15, %xmm1, %xmm3 | |
vpbroadcastd .LCPI1_66(%rip), %ymm1 | |
vmovdqa64 %ymm2, %ymm26 | |
vpermt2w %ymm10, %ymm2, %ymm28 | |
vpmovsxbw .LCPI1_124(%rip), %ymm2 | |
vpermt2w %ymm13, %ymm17, %ymm7 | |
vpmovsxbw .LCPI1_148(%rip), %ymm17 | |
vpermt2w %ymm10, %ymm2, %ymm8 | |
vpmovsxbw .LCPI1_126(%rip), %ymm2 | |
vpermt2w %ymm10, %ymm2, %ymm6 | |
vpmovsxbw .LCPI1_127(%rip), %ymm2 | |
vpermt2w %ymm10, %ymm2, %ymm3 | |
vpermt2w %ymm11, %ymm1, %ymm10 | |
vpmovsxbw .LCPI1_130(%rip), %ymm1 | |
vpermt2w %ymm4, %ymm18, %ymm10 | |
vpmovsxbw .LCPI1_132(%rip), %ymm18 | |
vpblendw $8, %ymm13, %ymm10, %ymm2 | |
vpbroadcastw -89544(%r9,%r13,2), %xmm10 | |
vpblendd $240, %ymm2, %ymm0, %ymm2 | |
vpbroadcastw -95944(%r9,%r13,2), %ymm0 | |
vpunpcklwd %xmm10, %xmm0, %xmm0 | |
vunpcklps -83144(%r9,%r13,2){1to4}, %xmm0, %xmm0 | |
vpsrldq $2, %xmm29, %xmm10 | |
vmovdqa64 %xmm15, %xmm29 | |
vpblendw $8, %xmm10, %xmm0, %xmm0 | |
vpslld $16, %xmm24, %xmm10 | |
vpblendd $3, %xmm0, %xmm9, %xmm0 | |
vpblendw $32, %xmm10, %xmm0, %xmm0 | |
vpbroadcastd -57544(%r9,%r13,2), %xmm10 | |
vpblendd $8, %xmm10, %xmm0, %xmm0 | |
vpsllq $48, %xmm15, %xmm10 | |
vpbroadcastd -6344(%r9,%r13,2), %ymm15 | |
vpblendw $128, %xmm10, %xmm0, %xmm0 | |
vpbroadcastq -44744(%r9,%r13,2), %ymm10 | |
vpblendd $240, %ymm10, %ymm0, %ymm0 | |
vpbroadcastd -31944(%r9,%r13,2), %ymm10 | |
vpermt2w %ymm11, %ymm1, %ymm0 | |
vpmovsxbw .LCPI1_133(%rip), %ymm1 | |
vpblendd $32, %ymm10, %ymm0, %ymm10 | |
vmovdqa -19168(%r9,%r13,2), %ymm0 | |
vpermt2w %ymm13, %ymm30, %ymm10 | |
vpblendd $192, %ymm0, %ymm10, %ymm10 | |
vpermt2w %ymm14, %ymm16, %ymm10 | |
vpmovsxbw .LCPI1_134(%rip), %ymm16 | |
vpblendd $128, %ymm15, %ymm10, %ymm10 | |
vmovdqa %ymm6, %ymm15 | |
vpermt2w %ymm11, %ymm18, %ymm15 | |
vpmovsxbw .LCPI1_135(%rip), %ymm18 | |
vpermt2w %ymm4, %ymm1, %ymm15 | |
vpbroadcastd -83140(%r9,%r13,2), %ymm1 | |
vpermt2w %ymm13, %ymm16, %ymm15 | |
vpbroadcastw -89540(%r9,%r13,2), %xmm16 | |
vpermt2w %ymm0, %ymm18, %ymm15 | |
vpmovsxbw .LCPI1_136(%rip), %ymm18 | |
vpblendw $32, %ymm14, %ymm15, %ymm15 | |
vpblendd $240, %ymm15, %ymm6, %ymm6 | |
vpbroadcastw -95940(%r9,%r13,2), %ymm15 | |
vpunpcklwd %xmm16, %xmm15, %xmm15 | |
vpmovsxbw .LCPI1_139(%rip), %ymm16 | |
vpblendd $2, %xmm1, %xmm15, %xmm1 | |
vpsrldq $6, %xmm22, %xmm15 | |
vpblendw $8, %xmm15, %xmm1, %xmm1 | |
vpmovsxbw .LCPI1_138(%rip), %ymm15 | |
vshufps $244, %xmm9, %xmm1, %xmm1 | |
vpsrlq $16, %xmm24, %xmm9 | |
vpblendw $32, %xmm9, %xmm1, %xmm1 | |
vpbroadcastd -57540(%r9,%r13,2), %ymm9 | |
vpermt2w %ymm11, %ymm16, %ymm8 | |
vpmovsxbw .LCPI1_143(%rip), %ymm16 | |
vpermt2w %ymm11, %ymm15, %ymm12 | |
vpmovsxbw .LCPI1_147(%rip), %ymm15 | |
vpblendd $8, %xmm9, %xmm1, %xmm1 | |
vpslld $16, %xmm29, %xmm9 | |
vpblendw $128, %xmm9, %xmm1, %xmm1 | |
vpbroadcastd -44740(%r9,%r13,2), %ymm9 | |
vpermt2w %ymm4, %ymm16, %ymm8 | |
vpmovsxbw .LCPI1_150(%rip), %ymm16 | |
vpblendd $240, %ymm9, %ymm1, %ymm1 | |
vpbroadcastd -31940(%r9,%r13,2), %ymm9 | |
vpermt2w %ymm11, %ymm18, %ymm1 | |
vpmovsxbw .LCPI1_137(%rip), %ymm18 | |
vpblendd $32, %ymm9, %ymm1, %ymm1 | |
vpbroadcastd -19140(%r9,%r13,2), %ymm9 | |
vpermt2w %ymm13, %ymm18, %ymm1 | |
vpmovsxbw .LCPI1_141(%rip), %ymm18 | |
vpblendd $192, %ymm9, %ymm1, %ymm9 | |
vpmovsxbw .LCPI1_98(%rip), %ymm1 | |
vpermt2w %ymm11, %ymm18, %ymm3 | |
vpmovsxbw .LCPI1_104(%rip), %ymm18 | |
vpermt2w %ymm11, %ymm1, %ymm28 | |
vmovdqa64 %ymm1, %ymm22 | |
vpmovsxbw .LCPI1_142(%rip), %ymm1 | |
vpmovsxbw .LCPI1_146(%rip), %ymm11 | |
vpermt2w %ymm4, %ymm18, %ymm28 | |
vmovdqa64 800(%rsp), %ymm18 | |
vpermt2w %ymm4, %ymm1, %ymm12 | |
vpmovsxbw .LCPI1_144(%rip), %ymm1 | |
vpermt2w %ymm4, %ymm11, %ymm3 | |
vpmovsxbw .LCPI1_101(%rip), %ymm11 | |
vpermt2w %ymm13, %ymm15, %ymm12 | |
vpmovsxbw .LCPI1_153(%rip), %ymm15 | |
vpermt2w %ymm4, %ymm1, %ymm5 | |
vpermt2w %ymm13, %ymm11, %ymm28 | |
vpmovsxbw .LCPI1_149(%rip), %ymm11 | |
vpmovsxbw .LCPI1_158(%rip), %ymm1 | |
vpmovsxbw .LCPI1_159(%rip), %ymm4 | |
vpermt2w %ymm13, %ymm16, %ymm5 | |
vpmovsxbw .LCPI1_154(%rip), %ymm16 | |
vpermt2w %ymm13, %ymm11, %ymm8 | |
vmovdqa %ymm3, %ymm11 | |
vpermt2w %ymm13, %ymm17, %ymm11 | |
vpmovsxbw .LCPI1_152(%rip), %ymm13 | |
vpmovsxbw .LCPI1_118(%rip), %ymm17 | |
vpermt2w %ymm0, %ymm16, %ymm5 | |
vpmovsxbw .LCPI1_155(%rip), %ymm16 | |
vpermt2w %ymm0, %ymm15, %ymm8 | |
vpmovsxbw .LCPI1_161(%rip), %ymm15 | |
vpermt2w %ymm0, %ymm17, %ymm28 | |
vpermt2w %ymm0, %ymm13, %ymm12 | |
vpmovsxbw .LCPI1_160(%rip), %ymm13 | |
vmovdqa64 832(%rsp), %ymm17 | |
vpermt2w %ymm0, %ymm16, %ymm7 | |
vpmovsxbw .LCPI1_156(%rip), %ymm16 | |
vpermt2w %ymm14, %ymm1, %ymm28 | |
vpmovsxbw .LCPI1_163(%rip), %ymm1 | |
vpermt2w %ymm14, %ymm15, %ymm8 | |
vpmovsxbw .LCPI1_164(%rip), %ymm15 | |
vpermt2w %ymm14, %ymm13, %ymm12 | |
vpmovsxbw .LCPI1_167(%rip), %ymm13 | |
vpermt2w %ymm0, %ymm16, %ymm2 | |
vpmovsxbw .LCPI1_157(%rip), %ymm16 | |
vpermt2w %ymm14, %ymm1, %ymm7 | |
vpmovsxbw .LCPI1_169(%rip), %ymm1 | |
vpermt2w %ymm14, %ymm15, %ymm2 | |
vmovdqa 896(%rsp), %ymm15 | |
vpermt2w %ymm0, %ymm16, %ymm11 | |
vpmovsxbw .LCPI1_162(%rip), %ymm0 | |
vmovdqa64 %ymm28, %ymm16 | |
vpmovsxbw .LCPI1_95(%rip), %ymm28 | |
vpermt2w %ymm14, %ymm0, %ymm5 | |
vpmovsxbw .LCPI1_165(%rip), %ymm0 | |
vpermt2w %ymm14, %ymm0, %ymm9 | |
vpmovsxbw .LCPI1_166(%rip), %ymm0 | |
vpermt2w %ymm14, %ymm0, %ymm11 | |
vmovdqa -6368(%r9,%r13,2), %ymm0 | |
vmovdqa 960(%rsp), %ymm14 | |
vpermt2w %ymm0, %ymm1, %ymm8 | |
vpmovsxbw .LCPI1_171(%rip), %ymm1 | |
vpermt2w %ymm0, %ymm13, %ymm12 | |
vpmovsxbw .LCPI1_170(%rip), %ymm13 | |
vpermt2w %ymm0, %ymm4, %ymm16 | |
vpmovsxbw .LCPI1_174(%rip), %ymm4 | |
vpermt2w %ymm0, %ymm1, %ymm7 | |
vpmovsxbw .LCPI1_173(%rip), %ymm1 | |
vpermt2w %ymm0, %ymm13, %ymm5 | |
vpmovsxbw .LCPI1_172(%rip), %ymm13 | |
vpermt2w %ymm0, %ymm1, %ymm6 | |
vpblendd $128, %ymm0, %ymm9, %ymm1 | |
vpmovsxbw .LCPI1_168(%rip), %ymm9 | |
vpermt2w %ymm0, %ymm13, %ymm2 | |
vmovdqa 928(%rsp), %ymm13 | |
vpermt2w %ymm0, %ymm9, %ymm11 | |
vmovdqa 32(%r9,%r13,2), %ymm0 | |
vpmovsxbw .LCPI1_122(%rip), %ymm9 | |
addq $16, %r13 | |
vpermt2w %ymm0, %ymm4, %ymm16 | |
vpmovsxbw .LCPI1_175(%rip), %ymm4 | |
vpermt2w %ymm0, %ymm9, %ymm14 | |
vmovaps 704(%rsp), %ymm9 | |
vpermt2w %ymm0, %ymm4, %ymm13 | |
vpmovsxbw .LCPI1_176(%rip), %ymm4 | |
vmovups %ymm9, -307680(%r15) | |
vmovaps 672(%rsp), %ymm9 | |
vpermt2w %ymm0, %ymm4, %ymm12 | |
vpmovsxbw .LCPI1_177(%rip), %ymm4 | |
vpermt2w %ymm0, %ymm4, %ymm15 | |
vpmovsxbw .LCPI1_178(%rip), %ymm4 | |
vpermt2w %ymm0, %ymm4, %ymm8 | |
vpmovsxbw .LCPI1_179(%rip), %ymm4 | |
vpermt2w %ymm0, %ymm4, %ymm17 | |
vpmovsxbw .LCPI1_180(%rip), %ymm4 | |
vpermt2w %ymm0, %ymm4, %ymm5 | |
vpmovsxbw .LCPI1_181(%rip), %ymm4 | |
vpermt2w %ymm0, %ymm4, %ymm18 | |
vpmovsxbw .LCPI1_182(%rip), %ymm4 | |
vpermt2w %ymm0, %ymm4, %ymm7 | |
vpmovsxbw .LCPI1_183(%rip), %ymm4 | |
vpermt2w %ymm0, %ymm4, %ymm25 | |
vpmovsxbw .LCPI1_184(%rip), %ymm4 | |
vpermt2w %ymm0, %ymm4, %ymm2 | |
vpmovsxbw .LCPI1_185(%rip), %ymm4 | |
vpermt2w %ymm0, %ymm4, %ymm10 | |
vpmovsxbw .LCPI1_186(%rip), %ymm4 | |
vpermt2w %ymm0, %ymm4, %ymm6 | |
vpmovsxbw .LCPI1_187(%rip), %ymm4 | |
vpermt2w %ymm0, %ymm4, %ymm1 | |
vmovaps 1344(%rsp), %ymm4 | |
vpblendw $128, %ymm0, %ymm11, %ymm0 | |
vpmovsxbw .LCPI1_96(%rip), %ymm11 | |
vpblendd $240, %ymm0, %ymm3, %ymm0 | |
vmovups %ymm4, -307648(%r15) | |
vmovaps 1664(%rsp), %ymm4 | |
vmovups %ymm9, -307616(%r15) | |
vmovaps 1632(%rsp), %ymm9 | |
vmovups %ymm4, -307584(%r15) | |
vmovaps 1600(%rsp), %ymm4 | |
vmovups %ymm9, -307552(%r15) | |
vmovaps 640(%rsp), %ymm9 | |
vmovups %ymm4, -307520(%r15) | |
vmovaps 1568(%rsp), %ymm4 | |
vmovups %ymm9, -307488(%r15) | |
vmovaps 608(%rsp), %ymm9 | |
vmovups %ymm4, -307456(%r15) | |
vmovaps 1536(%rsp), %ymm4 | |
vmovups %ymm9, -307424(%r15) | |
vmovaps 320(%rsp), %ymm9 | |
vmovups %ymm4, -307392(%r15) | |
vmovaps 1504(%rsp), %ymm4 | |
vmovups %ymm9, -307360(%r15) | |
vmovaps 1472(%rsp), %ymm9 | |
vmovups %ymm4, -307328(%r15) | |
vmovaps 1440(%rsp), %ymm4 | |
vmovups %ymm9, -307296(%r15) | |
vmovaps 1408(%rsp), %ymm9 | |
vmovups %ymm4, -307264(%r15) | |
vmovaps 1376(%rsp), %ymm4 | |
vmovups %ymm9, -307232(%r15) | |
vmovaps 1088(%rsp), %ymm9 | |
vmovups %ymm4, -307200(%r15) | |
vmovaps 992(%rsp), %ymm4 | |
vmovups %ymm9, -205280(%r15) | |
vmovaps 576(%rsp), %ymm9 | |
vmovups %ymm4, -205248(%r15) | |
vmovaps 864(%rsp), %ymm4 | |
vmovups %ymm9, -205216(%r15) | |
vmovaps 1312(%rsp), %ymm9 | |
vmovups %ymm4, -205184(%r15) | |
vmovaps 1024(%rsp), %ymm4 | |
vmovups %ymm9, -205152(%r15) | |
vmovaps 544(%rsp), %ymm9 | |
vmovups %ymm4, -205120(%r15) | |
vmovaps 1280(%rsp), %ymm4 | |
vmovups %ymm9, -205088(%r15) | |
vmovaps 512(%rsp), %ymm9 | |
vmovups %ymm4, -205056(%r15) | |
vmovaps 1248(%rsp), %ymm4 | |
vmovups %ymm9, -205024(%r15) | |
vmovaps 288(%rsp), %ymm9 | |
vmovups %ymm4, -204992(%r15) | |
vmovaps 1216(%rsp), %ymm4 | |
vmovups %ymm9, -204960(%r15) | |
vmovaps 1184(%rsp), %ymm9 | |
vmovups %ymm4, -204928(%r15) | |
vmovaps 1152(%rsp), %ymm4 | |
vmovups %ymm9, -204896(%r15) | |
vmovaps 1120(%rsp), %ymm9 | |
vmovups %ymm4, -204864(%r15) | |
vmovaps 1056(%rsp), %ymm4 | |
vmovups %ymm9, -204832(%r15) | |
vmovaps 256(%rsp), %ymm9 | |
vmovups %ymm4, -204800(%r15) | |
vmovaps 224(%rsp), %ymm4 | |
vmovups %ymm9, -102880(%r15) | |
vmovdqu64 %ymm27, -102848(%r15) | |
vmovaps 192(%rsp), %ymm9 | |
vmovups %ymm4, -102816(%r15) | |
vmovaps 160(%rsp), %ymm4 | |
vmovdqu64 %ymm31, -102784(%r15) | |
vmovups %ymm9, -102752(%r15) | |
vmovdqu64 %ymm21, -102720(%r15) | |
vmovaps 480(%rsp), %ymm9 | |
vmovups %ymm4, -102688(%r15) | |
vmovaps 128(%rsp), %ymm4 | |
vmovdqu64 %ymm19, -102656(%r15) | |
vmovups %ymm9, -102624(%r15) | |
vmovdqu64 %ymm20, -102592(%r15) | |
vmovaps 448(%rsp), %ymm9 | |
vmovups %ymm4, -102560(%r15) | |
vmovaps 416(%rsp), %ymm4 | |
vmovdqu64 %ymm23, -102528(%r15) | |
vmovups %ymm9, -102496(%r15) | |
vmovaps 384(%rsp), %ymm9 | |
vmovups %ymm4, -102464(%r15) | |
vmovaps 352(%rsp), %ymm4 | |
vmovups %ymm9, -102432(%r15) | |
vmovups %ymm4, -102400(%r15) | |
vmovdqu %ymm14, -480(%r15) | |
vmovdqu64 %ymm16, -448(%r15) | |
vmovdqu %ymm13, -416(%r15) | |
vmovdqu %ymm12, -384(%r15) | |
vmovdqu %ymm15, -352(%r15) | |
vmovdqu %ymm8, -320(%r15) | |
vmovdqu64 %ymm17, -288(%r15) | |
vmovdqu %ymm5, -256(%r15) | |
vmovdqu64 %ymm18, -224(%r15) | |
vmovdqu %ymm7, -192(%r15) | |
vmovdqu64 %ymm25, -160(%r15) | |
vmovdqu %ymm2, -128(%r15) | |
vmovdqu %ymm10, -96(%r15) | |
vmovdqu %ymm6, -64(%r15) | |
vmovdqu %ymm1, -32(%r15) | |
vmovdqu %ymm0, (%r15) | |
addq $512, %r15 | |
cmpq $48, %r13 | |
jb .LBB1_9 | |
incq %rsi | |
addq $55296000, %r14 | |
cmpq %rdx, %rsi | |
jne .LBB1_8 | |
jmp .LBB1_11 | |
.LBB1_14: | |
xorl %eax, %eax | |
leaq -40(%rbp), %rsp | |
.loc 1 4 3 epilogue_begin | |
popq %rbx | |
popq %r12 | |
popq %r13 | |
popq %r14 | |
popq %r15 | |
popq %rbp | |
.cfi_def_cfa %rsp, 8 | |
vzeroupper | |
retq | |
.Ltmp3: | |
.Lfunc_end1: | |
.size turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack, .Lfunc_end1-turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack | |
.cfi_endproc | |
.section .text.turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32,"ax",@progbits | |
.p2align 4, 0x90 | |
.type turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32,@function | |
turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32: | |
.Lfunc_begin2: | |
.loc 1 1 0 is_stmt 1 | |
.cfi_startproc | |
pushq %rbp | |
.cfi_def_cfa_offset 16 | |
.cfi_offset %rbp, -16 | |
movq %rsp, %rbp | |
.cfi_def_cfa_register %rbp | |
pushq %r15 | |
pushq %r14 | |
pushq %r13 | |
pushq %r12 | |
pushq %rbx | |
subq $120, %rsp | |
.cfi_offset %rbx, -56 | |
.cfi_offset %r12, -48 | |
.cfi_offset %r13, -40 | |
.cfi_offset %r14, -32 | |
.cfi_offset %r15, -24 | |
.Ltmp4: | |
.loc 1 4 3 prologue_end | |
movq 24(%rsi), %r8 | |
movq %rdx, %rax | |
movzwl 8(%rdx), %edx | |
movq 24(%r8), %rcx | |
movq %rdx, -48(%rbp) | |
movq %rcx, -160(%rbp) | |
cmpq %rdx, %rcx | |
jle .LBB2_15 | |
.loc 1 0 3 is_stmt 0 | |
movl 32(%r8), %edx | |
movl 36(%r8), %edi | |
movabsq $593736278999040, %r10 | |
movl 4(%rax), %r12d | |
movq -48(%rbp), %rbx | |
movl (%rax), %r14d | |
movq 32(%rsi), %rcx | |
movl $15361, %r9d | |
movl $15106, %r11d | |
movl 12(%rsi), %r15d | |
bextrq %r9, (%r8), %r9 | |
bextrq %r11, 8(%r8), %r8 | |
.loc 1 4 3 | |
imulq %rdi, %r10 | |
imulq $138240, %rdx, %r13 | |
imulq $102400, %r14, %r11 | |
movq %r14, -112(%rbp) | |
movq %r14, %r14 | |
shlq $10, %r14 | |
shlq $32, %rdi | |
movq %r12, -88(%rbp) | |
movq %r15, -216(%rbp) | |
addq %r10, %r13 | |
imulq $552960, %r12, %r10 | |
movq %r13, %rax | |
imulq %rbx, %rax | |
leaq (%r10,%rax,4), %rax | |
imulq $55296000, %rbx, %r10 | |
addq %rax, %r14 | |
movq 8(%rcx), %rax | |
addq %r10, %r11 | |
leaq (%rdi,%rdx), %r10 | |
addq %rdx, %rdi | |
leaq (%r11,%r9,2), %r11 | |
leaq (%r14,%r8,4), %r9 | |
movq %rdi, %rdx | |
imulq %rbx, %rdx | |
movq %r10, -104(%rbp) | |
leaq (%rax,%r8,4), %r14 | |
movl 16(%rsi), %r8d | |
movzwl 20(%rsi), %esi | |
addq %rax, %r9 | |
imulq $204800, %rdx, %rax | |
imulq $204800, %r12, %rdx | |
movq %r9, -56(%rbp) | |
movq (%rcx), %r9 | |
movq %r15, %rcx | |
shlq $10, %rcx | |
movq %rcx, -208(%rbp) | |
imulq $138240, %r10, %rcx | |
addq %rax, %rdx | |
imulq %rsi, %rdi | |
movq %rcx, -120(%rbp) | |
imulq $55296000, %rsi, %rcx | |
imulq %rsi, %r13 | |
movq %rsi, -144(%rbp) | |
movq %r8, -192(%rbp) | |
leaq 60(%r9,%rdx), %rax | |
addq %r9, %r11 | |
movq %r11, -96(%rbp) | |
imulq $204800, %rdi, %rdx | |
movq %rcx, -128(%rbp) | |
imulq $204800, %r8, %rcx | |
shlq $2, %r13 | |
movq %rax, -80(%rbp) | |
movq %r13, -152(%rbp) | |
movq %rdx, -136(%rbp) | |
imulq $552960, %r8, %rdx | |
movq %rcx, -176(%rbp) | |
movq %rdx, -184(%rbp) | |
imulq $102400, %r15, %rdx | |
movq %rdx, -200(%rbp) | |
jmp .LBB2_2 | |
.p2align 4, 0x90 | |
.LBB2_14: | |
.loc 1 0 3 | |
movq -128(%rbp), %rsi | |
movq -136(%rbp), %rdx | |
movq -48(%rbp), %rax | |
movq -56(%rbp), %rcx | |
.loc 1 4 3 | |
addq -144(%rbp), %rax | |
addq -152(%rbp), %rcx | |
addq %rsi, -96(%rbp) | |
addq %rdx, -80(%rbp) | |
movq %rcx, -56(%rbp) | |
movq %rax, -48(%rbp) | |
cmpq -160(%rbp), %rax | |
jge .LBB2_15 | |
.LBB2_2: | |
.loc 1 0 3 | |
movq -104(%rbp), %rax | |
cmpq -88(%rbp), %rax | |
.loc 1 4 3 | |
jle .LBB2_14 | |
.loc 1 0 3 | |
movq -48(%rbp), %rax | |
movq -80(%rbp), %r12 | |
movq -56(%rbp), %rdx | |
movq -88(%rbp), %rcx | |
imulq -120(%rbp), %rax | |
movq %rdx, -72(%rbp) | |
movq %rcx, -64(%rbp) | |
movq %rax, -168(%rbp) | |
jmp .LBB2_4 | |
.p2align 4, 0x90 | |
.LBB2_13: | |
movq -64(%rbp), %rcx | |
movq -72(%rbp), %rax | |
.loc 1 4 3 | |
addq -176(%rbp), %r12 | |
addq -192(%rbp), %rcx | |
addq -184(%rbp), %rax | |
movq %rax, -72(%rbp) | |
movq %rcx, -64(%rbp) | |
cmpq -104(%rbp), %rcx | |
jge .LBB2_14 | |
.LBB2_4: | |
.loc 1 0 3 | |
cmpl $539, -112(%rbp) | |
.loc 1 4 3 | |
ja .LBB2_13 | |
.loc 1 0 3 | |
imulq $138240, -64(%rbp), %rcx | |
movq -96(%rbp), %r11 | |
movq -72(%rbp), %rbx | |
movq -112(%rbp), %r8 | |
addq -168(%rbp), %rcx | |
.p2align 4, 0x90 | |
.LBB2_6: | |
movq %r8, %rax | |
shlq $8, %rax | |
movq %rbx, %rdx | |
xorl %esi, %esi | |
.p2align 4, 0x90 | |
.LBB2_7: | |
xorl %edi, %edi | |
.p2align 4, 0x90 | |
.LBB2_8: | |
.loc 1 4 3 | |
movl $0, (%rdx,%rdi,4) | |
incq %rdi | |
cmpq $16, %rdi | |
jne .LBB2_8 | |
incq %rsi | |
addq $64, %rdx | |
cmpq $16, %rsi | |
jne .LBB2_7 | |
leaq (%rcx,%rax), %rdx | |
leaq 32(%rcx,%rax), %rsi | |
leaq 160(%rcx,%rax), %r15 | |
leaq 176(%rcx,%rax), %r10 | |
leaq 208(%rcx,%rax), %rdi | |
leaq 224(%rcx,%rax), %r13 | |
leaq 240(%rcx,%rax), %r9 | |
movq %rdx, -288(%rbp) | |
vmovups (%r14,%rdx,4), %zmm0 | |
leaq 16(%rcx,%rax), %rdx | |
movq %rsi, -272(%rbp) | |
vmovups (%r14,%rsi,4), %zmm1 | |
leaq 64(%rcx,%rax), %rsi | |
vmovups (%r14,%r15,4), %zmm9 | |
vmovups (%r14,%r10,4), %zmm12 | |
vmovups (%r14,%rdi,4), %zmm14 | |
vmovups (%r14,%r13,4), %zmm13 | |
vmovups (%r14,%r9,4), %zmm15 | |
movq %rdx, -280(%rbp) | |
vmovups (%r14,%rdx,4), %zmm2 | |
leaq 48(%rcx,%rax), %rdx | |
movq %rsi, -256(%rbp) | |
vmovups (%r14,%rsi,4), %zmm3 | |
leaq 96(%rcx,%rax), %rsi | |
movq %rdx, -264(%rbp) | |
vmovups (%r14,%rdx,4), %zmm4 | |
leaq 80(%rcx,%rax), %rdx | |
movq %rsi, -240(%rbp) | |
vmovups (%r14,%rsi,4), %zmm5 | |
leaq 128(%rcx,%rax), %rsi | |
movq %rdx, -248(%rbp) | |
vmovups (%r14,%rdx,4), %zmm6 | |
leaq 112(%rcx,%rax), %rdx | |
movq %rsi, -224(%rbp) | |
vmovups (%r14,%rsi,4), %zmm7 | |
leaq 144(%rcx,%rax), %rsi | |
movq %rdx, -232(%rbp) | |
vmovups (%r14,%rdx,4), %zmm8 | |
leaq 192(%rcx,%rax), %rdx | |
movl $0, %eax | |
vmovups (%r14,%rsi,4), %zmm10 | |
vmovups (%r14,%rdx,4), %zmm11 | |
.p2align 4, 0x90 | |
.LBB2_11: | |
.loc 1 0 3 | |
vcvtph2ps (%r11,%rax), %zmm16 | |
vfmadd231ps -60(%r12,%rax,2){1to16}, %zmm16, %zmm0 | |
vfmadd231ps -56(%r12,%rax,2){1to16}, %zmm16, %zmm2 | |
vfmadd231ps -52(%r12,%rax,2){1to16}, %zmm16, %zmm1 | |
vfmadd231ps -48(%r12,%rax,2){1to16}, %zmm16, %zmm4 | |
vfmadd231ps -44(%r12,%rax,2){1to16}, %zmm16, %zmm3 | |
vfmadd231ps -40(%r12,%rax,2){1to16}, %zmm16, %zmm6 | |
vfmadd231ps -36(%r12,%rax,2){1to16}, %zmm16, %zmm5 | |
vfmadd231ps -32(%r12,%rax,2){1to16}, %zmm16, %zmm8 | |
vfmadd231ps -28(%r12,%rax,2){1to16}, %zmm16, %zmm7 | |
vfmadd231ps -24(%r12,%rax,2){1to16}, %zmm16, %zmm10 | |
vfmadd231ps -20(%r12,%rax,2){1to16}, %zmm16, %zmm9 | |
vfmadd231ps -16(%r12,%rax,2){1to16}, %zmm16, %zmm12 | |
vfmadd231ps -12(%r12,%rax,2){1to16}, %zmm16, %zmm11 | |
vfmadd231ps -8(%r12,%rax,2){1to16}, %zmm16, %zmm14 | |
vfmadd231ps -4(%r12,%rax,2){1to16}, %zmm16, %zmm13 | |
vfmadd231ps (%r12,%rax,2){1to16}, %zmm16, %zmm15 | |
.loc 1 4 3 | |
addq $32, %rax | |
cmpq $102400, %rax | |
jne .LBB2_11 | |
.loc 1 0 3 | |
movq -288(%rbp), %rax | |
.loc 1 4 3 | |
addq -216(%rbp), %r8 | |
addq -208(%rbp), %rbx | |
addq -200(%rbp), %r11 | |
vmovups %zmm0, (%r14,%rax,4) | |
movq -280(%rbp), %rax | |
vmovups %zmm2, (%r14,%rax,4) | |
movq -272(%rbp), %rax | |
vmovups %zmm1, (%r14,%rax,4) | |
movq -264(%rbp), %rax | |
vmovups %zmm4, (%r14,%rax,4) | |
movq -256(%rbp), %rax | |
vmovups %zmm3, (%r14,%rax,4) | |
movq -248(%rbp), %rax | |
vmovups %zmm6, (%r14,%rax,4) | |
movq -240(%rbp), %rax | |
vmovups %zmm5, (%r14,%rax,4) | |
movq -232(%rbp), %rax | |
vmovups %zmm8, (%r14,%rax,4) | |
movq -224(%rbp), %rax | |
vmovups %zmm7, (%r14,%rax,4) | |
vmovups %zmm10, (%r14,%rsi,4) | |
vmovups %zmm9, (%r14,%r15,4) | |
vmovups %zmm12, (%r14,%r10,4) | |
vmovups %zmm11, (%r14,%rdx,4) | |
vmovups %zmm14, (%r14,%rdi,4) | |
vmovups %zmm13, (%r14,%r13,4) | |
vmovups %zmm15, (%r14,%r9,4) | |
cmpq $540, %r8 | |
jl .LBB2_6 | |
jmp .LBB2_13 | |
.LBB2_15: | |
xorl %eax, %eax | |
.loc 1 4 3 epilogue_begin | |
addq $120, %rsp | |
popq %rbx | |
popq %r12 | |
popq %r13 | |
popq %r14 | |
popq %r15 | |
popq %rbp | |
.cfi_def_cfa %rsp, 8 | |
vzeroupper | |
retq | |
.Ltmp5: | |
.Lfunc_end2: | |
.size turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32, .Lfunc_end2-turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32 | |
.cfi_endproc | |
.section .text.turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32,"ax",@progbits | |
.p2align 4, 0x90 | |
.type turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32,@function | |
turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32: | |
.Lfunc_begin3: | |
.loc 1 1 0 is_stmt 1 | |
.cfi_startproc | |
pushq %rbp | |
.cfi_def_cfa_offset 16 | |
.cfi_offset %rbp, -16 | |
movq %rsp, %rbp | |
.cfi_def_cfa_register %rbp | |
.Ltmp6: | |
pushq %r15 | |
pushq %r14 | |
pushq %r13 | |
pushq %r12 | |
pushq %rbx | |
subq $128, %rsp | |
.cfi_offset %rbx, -56 | |
.cfi_offset %r12, -48 | |
.cfi_offset %r13, -40 | |
.cfi_offset %r14, -32 | |
.cfi_offset %r15, -24 | |
.loc 1 4 3 prologue_end | |
movq 24(%rsi), %rcx | |
movzwl 8(%rdx), %edi | |
movl 20(%rcx), %r15d | |
movl 16(%rcx), %r9d | |
movq %rdi, %r12 | |
shll $6, %edi | |
movq %rdi, -56(%rbp) | |
shlq $32, %r15 | |
leaq (%r15,%r9), %rax | |
movq %rax, -112(%rbp) | |
cmpq %rax, %rdi | |
jge .LBB3_22 | |
.loc 1 0 3 is_stmt 0 | |
movl 12(%rsi), %eax | |
movl 24(%rcx), %r10d | |
movl 28(%rcx), %r11d | |
movl (%rdx), %r13d | |
movl 4(%rdx), %edx | |
movabsq $37108517437440, %rbx | |
movl 16(%rsi), %edi | |
movzwl 20(%rsi), %r8d | |
movq 32(%rsi), %rsi | |
.loc 1 4 3 | |
orq %r9, %r15 | |
movabsq $148434069749760, %r14 | |
subq -56(%rbp), %r15 | |
movq %rax, -48(%rbp) | |
imulq %r11, %rbx | |
imulq $8640, %r10, %rax | |
imulq $2211840, %rdx, %r9 | |
imulq %r11, %r14 | |
shlq $32, %r11 | |
shlq $6, %rdx | |
movq %r13, -216(%rbp) | |
movq %rdx, -104(%rbp) | |
addq %rbx, %rax | |
imulq $3840, %r13, %rbx | |
addq %r9, %rbx | |
addq 8(%rsi), %rbx | |
movq %rax, %r9 | |
imulq %r12, %r9 | |
movq (%rsi), %rsi | |
imulq %r8, %rax | |
shlq $8, %r9 | |
shlq $8, %rax | |
movq %rax, -160(%rbp) | |
addq %r9, %rbx | |
movl $15106, %r9d | |
movq %rbx, -64(%rbp) | |
leaq (%r11,%r10), %rbx | |
orq %r10, %r11 | |
imulq $34560, %r10, %r10 | |
bextrq %r9, (%rcx), %r9 | |
subq %rdx, %r11 | |
movq -48(%rbp), %rdx | |
movq %rbx, -208(%rbp) | |
movq %r11, -136(%rbp) | |
addq %r14, %r10 | |
movq %r10, -280(%rbp) | |
movq 8(%rcx), %r10 | |
leaq (%rsi,%r9,4), %rcx | |
imulq $2211840, %rdi, %rsi | |
shlq $6, %rdi | |
imulq $960, %rdx, %rax | |
imulq $3840, %rdx, %rdx | |
movq %rdi, -192(%rbp) | |
movq %rcx, -184(%rbp) | |
imulq $960, %r13, %rcx | |
movq %rsi, -200(%rbp) | |
imulq %r10, %r12 | |
imulq $552960, %r10, %r9 | |
imulq %r8, %r10 | |
shll $6, %r8d | |
movq %rax, -248(%rbp) | |
movq %rdx, -240(%rbp) | |
movq %r8, -144(%rbp) | |
movq %rcx, -176(%rbp) | |
shlq $6, %r12 | |
shlq $6, %r10 | |
movq %r9, -272(%rbp) | |
movq %r12, -120(%rbp) | |
movq %r10, -152(%rbp) | |
jmp .LBB3_2 | |
.p2align 4, 0x90 | |
.LBB3_21: | |
.loc 1 0 3 | |
movq -152(%rbp), %rdx | |
movq -64(%rbp), %rsi | |
movq -56(%rbp), %rax | |
movq -144(%rbp), %rcx | |
movq -168(%rbp), %r15 | |
.loc 1 4 3 | |
addq -160(%rbp), %rsi | |
addq %rdx, -120(%rbp) | |
addq %rcx, %rax | |
subq %rcx, %r15 | |
movq %rsi, -64(%rbp) | |
movq %rax, -56(%rbp) | |
cmpq -112(%rbp), %rax | |
jge .LBB3_22 | |
.LBB3_2: | |
cmpq $64, %r15 | |
movl $64, %ecx | |
movl $1, %eax | |
movq %r15, -168(%rbp) | |
cmovlq %r15, %rcx | |
cmpq $2, %rcx | |
cmovlq %rax, %rcx | |
movq %rcx, -288(%rbp) | |
cmpq %rbx, -104(%rbp) | |
jge .LBB3_21 | |
.loc 1 0 3 | |
movq -64(%rbp), %rcx | |
movq -112(%rbp), %rax | |
movq -104(%rbp), %rdx | |
.loc 1 4 3 | |
subq -56(%rbp), %rax | |
movq %rcx, -72(%rbp) | |
movq -136(%rbp), %rcx | |
movq %rax, -256(%rbp) | |
jmp .LBB3_4 | |
.p2align 4, 0x90 | |
.LBB3_20: | |
.loc 1 0 3 | |
movq -72(%rbp), %rsi | |
movq -192(%rbp), %rax | |
movq -128(%rbp), %rdx | |
movq -224(%rbp), %rcx | |
movq -208(%rbp), %rbx | |
.loc 1 4 3 | |
addq -200(%rbp), %rsi | |
addq %rax, %rdx | |
subq %rax, %rcx | |
movq %rsi, -72(%rbp) | |
cmpq %rbx, %rdx | |
jge .LBB3_21 | |
.LBB3_4: | |
cmpq $64, %rcx | |
movl $64, %eax | |
movq %rdx, -128(%rbp) | |
movq %rcx, -224(%rbp) | |
cmovlq %rcx, %rax | |
subq %rdx, %rbx | |
movl $64, %edx | |
cmpq $64, %rbx | |
movq %rax, -264(%rbp) | |
movq %rbx, -296(%rbp) | |
cmovlq %rbx, %rdx | |
cmpl $8, -216(%rbp) | |
ja .LBB3_20 | |
.loc 1 0 3 | |
movq -128(%rbp), %rsi | |
movq %rsi, %rax | |
sarq $63, %rax | |
movq %rax, %rcx | |
xorq %rsi, %rcx | |
leaq 15(%rcx), %rsi | |
testq %rcx, %rcx | |
cmovnsq %rcx, %rsi | |
movq -176(%rbp), %rcx | |
sarq $4, %rsi | |
xorq %rax, %rsi | |
.loc 1 4 3 | |
addq -120(%rbp), %rsi | |
movq %rcx, -80(%rbp) | |
imulq $552960, %rsi, %rax | |
addq -184(%rbp), %rax | |
movq -72(%rbp), %rsi | |
movq %rax, -232(%rbp) | |
movq %rsi, -88(%rbp) | |
jmp .LBB3_6 | |
.p2align 4, 0x90 | |
.LBB3_19: | |
.loc 1 0 3 | |
movq -80(%rbp), %rcx | |
movq -88(%rbp), %rax | |
.loc 1 4 3 | |
addq -248(%rbp), %rcx | |
addq -240(%rbp), %rax | |
movq %rax, -88(%rbp) | |
movq %rcx, -80(%rbp) | |
cmpq $8640, %rcx | |
jge .LBB3_20 | |
.LBB3_6: | |
.loc 1 0 3 | |
cmpq $0, -256(%rbp) | |
.loc 1 4 3 | |
jle .LBB3_19 | |
.loc 1 0 3 | |
movq -80(%rbp), %rsi | |
movq %rsi, %rax | |
sarq $63, %rax | |
movq %rax, %rcx | |
xorq %rsi, %rcx | |
leaq 15(%rcx), %rsi | |
testq %rcx, %rcx | |
cmovnsq %rcx, %rsi | |
shrq $4, %rsi | |
xorq %rax, %rsi | |
movq -88(%rbp), %rax | |
.loc 1 4 3 | |
shlq $10, %rsi | |
addq -232(%rbp), %rsi | |
movq %rax, -96(%rbp) | |
movq %rsi, -48(%rbp) | |
xorl %esi, %esi | |
jmp .LBB3_8 | |
.p2align 4, 0x90 | |
.LBB3_18: | |
.loc 1 0 3 | |
movq -96(%rbp), %rax | |
movq -48(%rbp), %rcx | |
.loc 1 4 3 | |
incq %rsi | |
addq -280(%rbp), %rax | |
addq -272(%rbp), %rcx | |
movq %rax, -96(%rbp) | |
movq %rcx, -48(%rbp) | |
cmpq -288(%rbp), %rsi | |
je .LBB3_19 | |
.LBB3_8: | |
.loc 1 0 3 | |
cmpq $0, -296(%rbp) | |
.loc 1 4 3 | |
jle .LBB3_18 | |
.loc 1 0 3 | |
movq -48(%rbp), %rbx | |
movq -96(%rbp), %r12 | |
movq -264(%rbp), %r9 | |
xorl %r13d, %r13d | |
jmp .LBB3_10 | |
.p2align 4, 0x90 | |
.LBB3_17: | |
.loc 1 4 3 | |
addq $16, %r13 | |
addq $-16, %r9 | |
addq $552960, %r12 | |
addq $552960, %rbx | |
cmpq %rdx, %r13 | |
jge .LBB3_18 | |
.LBB3_10: | |
cmpq $16, %r9 | |
movl $16, %r10d | |
movl $1, %eax | |
movq %rbx, %r14 | |
movq %r12, %rcx | |
cmovlq %r9, %r10 | |
cmpq $2, %r10 | |
cmovlq %rax, %r10 | |
xorl %eax, %eax | |
jmp .LBB3_11 | |
.p2align 4, 0x90 | |
.LBB3_16: | |
leaq 16(%rax), %rdi | |
addq $64, %rcx | |
addq $1024, %r14 | |
cmpq $944, %rax | |
movq %rdi, %rax | |
jae .LBB3_17 | |
.LBB3_11: | |
.loc 1 0 3 | |
cmpq %r13, %rdx | |
.loc 1 4 3 | |
jle .LBB3_16 | |
.loc 1 0 3 | |
movq %r14, %rdi | |
movq %rcx, %r15 | |
xorl %r11d, %r11d | |
.p2align 4, 0x90 | |
.LBB3_13: | |
xorl %r8d, %r8d | |
.p2align 4, 0x90 | |
.LBB3_14: | |
.loc 1 4 3 | |
vmovss (%rdi,%r8,4), %xmm0 | |
vmovss %xmm0, (%r15,%r8,4) | |
incq %r8 | |
cmpq $16, %r8 | |
jne .LBB3_14 | |
incq %r11 | |
addq $34560, %r15 | |
addq $64, %rdi | |
cmpq %r10, %r11 | |
jne .LBB3_13 | |
jmp .LBB3_16 | |
.LBB3_22: | |
xorl %eax, %eax | |
.loc 1 4 3 epilogue_begin | |
addq $128, %rsp | |
popq %rbx | |
popq %r12 | |
popq %r13 | |
popq %r14 | |
popq %r15 | |
popq %rbp | |
.cfi_def_cfa %rsp, 8 | |
retq | |
.Ltmp7: | |
.Lfunc_end3: | |
.size turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32, .Lfunc_end3-turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32 | |
.cfi_endproc | |
.section .text.iree_hal_executable_library_query,"ax",@progbits | |
.globl iree_hal_executable_library_query | |
.p2align 4, 0x90 | |
.type iree_hal_executable_library_query,@function | |
iree_hal_executable_library_query: | |
.Liree_hal_executable_library_query$local: | |
.type .Liree_hal_executable_library_query$local,@function | |
.Lfunc_begin4: | |
.cfi_startproc | |
xorl %eax, %eax | |
cmpl $4, %edi | |
leaq iree_hal_executable_library_query_v0(%rip), %rcx | |
cmoveq %rcx, %rax | |
retq | |
.Lfunc_end4: | |
.size iree_hal_executable_library_query, .Lfunc_end4-iree_hal_executable_library_query | |
.size .Liree_hal_executable_library_query$local, .Lfunc_end4-iree_hal_executable_library_query | |
.cfi_endproc | |
.section .text.iree_h2f_ieee,"ax",@progbits | |
.p2align 4, 0x90 | |
.type iree_h2f_ieee,@function | |
iree_h2f_ieee: | |
.Lfunc_begin5: | |
.cfi_startproc | |
movl %edi, %eax | |
andl $32768, %eax | |
movl %edi, %edx | |
movl %edi, %ecx | |
andl $1023, %ecx | |
shll $16, %eax | |
andw $31744, %dx | |
je .LBB5_6 | |
andl $31744, %edi | |
cmpl $31744, %edi | |
jne .LBB5_5 | |
testw %cx, %cx | |
je .LBB5_4 | |
orl $2143289344, %eax | |
vmovd %eax, %xmm0 | |
retq | |
.LBB5_6: | |
movzwl %cx, %ecx | |
orl $864026624, %eax | |
vcvtsi2ss %ecx, %xmm0, %xmm0 | |
vmovd %eax, %xmm1 | |
vmulss %xmm1, %xmm0, %xmm0 | |
retq | |
.LBB5_5: | |
movzwl %cx, %ecx | |
movzwl %dx, %edx | |
addl %ecx, %edx | |
shll $13, %edx | |
leal 939524096(%rdx,%rax), %eax | |
vmovd %eax, %xmm0 | |
retq | |
.LBB5_4: | |
orl $2139095040, %eax | |
vmovd %eax, %xmm0 | |
retq | |
.Lfunc_end5: | |
.size iree_h2f_ieee, .Lfunc_end5-iree_h2f_ieee | |
.cfi_endproc | |
.section .text.iree_f2h_ieee,"ax",@progbits | |
.p2align 4, 0x90 | |
.type iree_f2h_ieee,@function | |
iree_f2h_ieee: | |
.Lfunc_begin6: | |
.cfi_startproc | |
vmovd %xmm0, %edi | |
movl $2071, %edx | |
bextrl %edx, %edi, %esi | |
movl %edi, %eax | |
movl %edi, %ecx | |
shrl $31, %ecx | |
andl $8388607, %eax | |
leal -112(%rsi), %edx | |
leal -113(%rsi), %r8d | |
cmpl $28, %r8d | |
ja .LBB6_2 | |
addl $4096, %eax | |
shll $15, %ecx | |
shll $10, %edx | |
shrl $13, %eax | |
orl %ecx, %eax | |
orl %edx, %eax | |
retq | |
.LBB6_2: | |
testl %edi, %edi | |
je .LBB6_6 | |
cmpl $112, %esi | |
ja .LBB6_7 | |
cmpl $102, %esi | |
jae .LBB6_10 | |
.LBB6_6: | |
xorl %eax, %eax | |
retq | |
.LBB6_7: | |
cmpl $143, %edx | |
jne .LBB6_11 | |
shll $15, %ecx | |
testl %eax, %eax | |
je .LBB6_14 | |
shrl $13, %eax | |
orl %eax, %ecx | |
orl $31744, %ecx | |
movl %ecx, %eax | |
retq | |
.LBB6_10: | |
movb $113, %dl | |
orl $8388608, %eax | |
shll $15, %ecx | |
subb %sil, %dl | |
shrxl %edx, %eax, %eax | |
movl %eax, %edx | |
andl $4096, %edx | |
leal (%rax,%rdx,2), %eax | |
shrl $13, %eax | |
orl %ecx, %eax | |
retq | |
.LBB6_11: | |
testl $4096, %edi | |
je .LBB6_13 | |
leal 8192(%rax), %edi | |
addl $-111, %esi | |
xorl %r8d, %r8d | |
cmpl $8380416, %eax | |
cmovbl %edx, %esi | |
cmovbl %edi, %r8d | |
movl %r8d, %eax | |
movl %esi, %edx | |
.LBB6_13: | |
shll $15, %ecx | |
cmpl $31, %edx | |
jb .LBB6_15 | |
.LBB6_14: | |
orl $31744, %ecx | |
movl %ecx, %eax | |
retq | |
.LBB6_15: | |
shrl $13, %eax | |
shll $10, %edx | |
orl %ecx, %eax | |
orl %edx, %eax | |
retq | |
.Lfunc_end6: | |
.size iree_f2h_ieee, .Lfunc_end6-iree_f2h_ieee | |
.cfi_endproc | |
.section .text.__gnu_h2f_ieee,"ax",@progbits | |
.p2align 4, 0x90 | |
.type __gnu_h2f_ieee,@function | |
__gnu_h2f_ieee: | |
.Lfunc_begin7: | |
.cfi_startproc | |
movl %edi, %eax | |
andl $32768, %eax | |
movl %edi, %edx | |
movl %edi, %ecx | |
andl $1023, %ecx | |
shll $16, %eax | |
andw $31744, %dx | |
je .LBB7_6 | |
andl $31744, %edi | |
cmpl $31744, %edi | |
jne .LBB7_5 | |
testw %cx, %cx | |
je .LBB7_4 | |
orl $2143289344, %eax | |
vmovd %eax, %xmm0 | |
retq | |
.LBB7_6: | |
movzwl %cx, %ecx | |
orl $864026624, %eax | |
vcvtsi2ss %ecx, %xmm0, %xmm0 | |
vmovd %eax, %xmm1 | |
vmulss %xmm1, %xmm0, %xmm0 | |
retq | |
.LBB7_5: | |
movzwl %cx, %ecx | |
movzwl %dx, %edx | |
addl %ecx, %edx | |
shll $13, %edx | |
leal 939524096(%rdx,%rax), %eax | |
vmovd %eax, %xmm0 | |
retq | |
.LBB7_4: | |
orl $2139095040, %eax | |
vmovd %eax, %xmm0 | |
retq | |
.Lfunc_end7: | |
.size __gnu_h2f_ieee, .Lfunc_end7-__gnu_h2f_ieee | |
.cfi_endproc | |
.section .text.__extendhfsf2,"ax",@progbits | |
.p2align 4, 0x90 | |
.type __extendhfsf2,@function | |
__extendhfsf2: | |
.Lfunc_begin8: | |
.cfi_startproc | |
vmovd %xmm0, %ecx | |
movl %ecx, %eax | |
shll $16, %eax | |
movl %ecx, %edx | |
andl $1023, %edx | |
movl %ecx, %esi | |
andl $-2147483648, %eax | |
andl $31744, %esi | |
je .LBB8_6 | |
cmpl $31744, %esi | |
jne .LBB8_5 | |
testw %dx, %dx | |
je .LBB8_4 | |
orl $2143289344, %eax | |
vmovd %eax, %xmm0 | |
retq | |
.LBB8_6: | |
movzwl %dx, %ecx | |
orl $864026624, %eax | |
vcvtsi2ss %ecx, %xmm1, %xmm0 | |
vmovd %eax, %xmm1 | |
vmulss %xmm1, %xmm0, %xmm0 | |
retq | |
.LBB8_5: | |
andl $32767, %ecx | |
shll $13, %ecx | |
leal 939524096(%rcx,%rax), %eax | |
vmovd %eax, %xmm0 | |
retq | |
.LBB8_4: | |
orl $2139095040, %eax | |
vmovd %eax, %xmm0 | |
retq | |
.Lfunc_end8: | |
.size __extendhfsf2, .Lfunc_end8-__extendhfsf2 | |
.cfi_endproc | |
.section .text.__gnu_f2h_ieee,"ax",@progbits | |
.p2align 4, 0x90 | |
.type __gnu_f2h_ieee,@function | |
__gnu_f2h_ieee: | |
.Lfunc_begin9: | |
.cfi_startproc | |
jmp iree_f2h_ieee | |
.Lfunc_end9: | |
.size __gnu_f2h_ieee, .Lfunc_end9-__gnu_f2h_ieee | |
.cfi_endproc | |
.section .text.__truncsfhf2,"ax",@progbits | |
.p2align 4, 0x90 | |
.type __truncsfhf2,@function | |
__truncsfhf2: | |
.Lfunc_begin10: | |
.cfi_startproc | |
pushq %rax | |
.cfi_def_cfa_offset 16 | |
callq iree_f2h_ieee | |
movw %ax, 4(%rsp) | |
vmovss 4(%rsp), %xmm0 | |
popq %rax | |
.cfi_def_cfa_offset 8 | |
retq | |
.Lfunc_end10: | |
.size __truncsfhf2, .Lfunc_end10-__truncsfhf2 | |
.cfi_endproc | |
.section .rodata.cst4,"aM",@progbits,4 | |
.p2align 2, 0x0 | |
.LCPI11_0: | |
.long 0x7b800000 | |
.LCPI11_1: | |
.long 0x80000000 | |
.LCPI11_2: | |
.long 0x3f800000 | |
.section .text.ceilf,"ax",@progbits | |
.p2align 4, 0x90 | |
.type ceilf,@function | |
ceilf: | |
.Lfunc_begin11: | |
.cfi_startproc | |
vmovd %xmm0, %eax | |
movl $2071, %ecx | |
bextrl %ecx, %eax, %ecx | |
cmpl $149, %ecx | |
ja .LBB11_7 | |
cmpl $127, %ecx | |
jb .LBB11_4 | |
addl $-127, %ecx | |
movl $8388607, %edx | |
shrxl %ecx, %edx, %edx | |
testl %eax, %edx | |
je .LBB11_7 | |
vaddss .LCPI11_0(%rip), %xmm0, %xmm0 | |
xorl %esi, %esi | |
testl %eax, %eax | |
movl $-8388608, %edi | |
cmovsl %esi, %edx | |
sarxl %ecx, %edi, %ecx | |
addl %eax, %edx | |
andl %ecx, %edx | |
vmovss %xmm0, -8(%rsp) | |
vmovd %edx, %xmm0 | |
retq | |
.LBB11_4: | |
vaddss .LCPI11_0(%rip), %xmm0, %xmm1 | |
vmovss %xmm1, -4(%rsp) | |
testl %eax, %eax | |
js .LBB11_5 | |
vmovss .LCPI11_2(%rip), %xmm1 | |
sete %al | |
kmovd %eax, %k1 | |
vmovss %xmm0, %xmm1, %xmm1 {%k1} | |
vmovaps %xmm1, %xmm0 | |
.LBB11_7: | |
retq | |
.LBB11_5: | |
vmovss .LCPI11_1(%rip), %xmm0 | |
retq | |
.Lfunc_end11: | |
.size ceilf, .Lfunc_end11-ceilf | |
.cfi_endproc | |
.section .rodata.cst4,"aM",@progbits,4 | |
.p2align 2, 0x0 | |
.LCPI12_0: | |
.long 0x42b17217 | |
.LCPI12_1: | |
.long 0xc2cff1b4 | |
.LCPI12_2: | |
.long 0x10000000 | |
.LCPI12_3: | |
.long 0x70000000 | |
.section .rodata.cst8,"aM",@progbits,8 | |
.p2align 3, 0x0 | |
.LCPI12_4: | |
.quad 0x40471547652b82fe | |
.LCPI12_5: | |
.quad 0x4338000000000000 | |
.LCPI12_6: | |
.quad 0xc338000000000000 | |
.LCPI12_7: | |
.quad 0x3ebc6af84b912394 | |
.LCPI12_8: | |
.quad 0x3f2ebfce50fac4f3 | |
.LCPI12_9: | |
.quad 0x3f962e42ff0c52d6 | |
.LCPI12_10: | |
.quad 0x3ff0000000000000 | |
.section .text.expf,"ax",@progbits | |
.p2align 4, 0x90 | |
.type expf,@function | |
expf: | |
.Lfunc_begin12: | |
.cfi_startproc | |
vmovd %xmm0, %ecx | |
movl $2836, %eax | |
bextrl %eax, %ecx, %eax | |
cmpl $1067, %eax | |
jae .LBB12_1 | |
.LBB12_8: | |
vcvtss2sd %xmm0, %xmm0, %xmm0 | |
vmulsd .LCPI12_4(%rip), %xmm0, %xmm0 | |
leaq __exp2f_data(%rip), %rdx | |
vaddsd .LCPI12_5(%rip), %xmm0, %xmm1 | |
vmovq %xmm1, %rax | |
vaddsd .LCPI12_6(%rip), %xmm1, %xmm1 | |
movl %eax, %ecx | |
andl $31, %ecx | |
shlq $47, %rax | |
addq (%rdx,%rcx,8), %rax | |
vsubsd %xmm1, %xmm0, %xmm0 | |
vmulsd .LCPI12_7(%rip), %xmm0, %xmm2 | |
vmovq %rax, %xmm1 | |
vmulsd %xmm0, %xmm0, %xmm3 | |
vaddsd .LCPI12_8(%rip), %xmm2, %xmm2 | |
vmulsd .LCPI12_9(%rip), %xmm0, %xmm0 | |
vaddsd .LCPI12_10(%rip), %xmm0, %xmm0 | |
vmulsd %xmm2, %xmm3, %xmm2 | |
vaddsd %xmm2, %xmm0, %xmm0 | |
vmulsd %xmm1, %xmm0, %xmm0 | |
vcvtsd2ss %xmm0, %xmm0, %xmm1 | |
.LBB12_9: | |
vmovaps %xmm1, %xmm0 | |
retq | |
.LBB12_1: | |
vxorps %xmm1, %xmm1, %xmm1 | |
cmpl $-8388608, %ecx | |
je .LBB12_9 | |
cmpl $2040, %eax | |
jae .LBB12_3 | |
vucomiss .LCPI12_0(%rip), %xmm0 | |
jbe .LBB12_6 | |
movl $1879048192, -8(%rsp) | |
vmovss -8(%rsp), %xmm0 | |
vmulss .LCPI12_3(%rip), %xmm0, %xmm0 | |
retq | |
.LBB12_3: | |
vaddss %xmm0, %xmm0, %xmm0 | |
retq | |
.LBB12_6: | |
vmovss .LCPI12_1(%rip), %xmm1 | |
vucomiss %xmm0, %xmm1 | |
jbe .LBB12_8 | |
movl $268435456, -4(%rsp) | |
vmovss -4(%rsp), %xmm0 | |
vmulss .LCPI12_2(%rip), %xmm0, %xmm0 | |
retq | |
.Lfunc_end12: | |
.size expf, .Lfunc_end12-expf | |
.cfi_endproc | |
.section .rodata.cst8,"aM",@progbits,8 | |
.p2align 2, 0x0 | |
.LCPI13_0: | |
.long 0xf0000000 | |
.long 0x70000000 | |
.section .rodata.cst4,"aM",@progbits,4 | |
.p2align 2, 0x0 | |
.LCPI13_1: | |
.long 0x70000000 | |
.section .text.__math_oflowf,"ax",@progbits | |
.p2align 4, 0x90 | |
.type __math_oflowf,@function | |
__math_oflowf: | |
.Lfunc_begin13: | |
.cfi_startproc | |
xorl %eax, %eax | |
testl %edi, %edi | |
leaq .LCPI13_0(%rip), %rcx | |
sete %al | |
vmovss (%rcx,%rax,4), %xmm0 | |
vmovss %xmm0, -4(%rsp) | |
vmovss -4(%rsp), %xmm0 | |
vmulss .LCPI13_1(%rip), %xmm0, %xmm0 | |
retq | |
.Lfunc_end13: | |
.size __math_oflowf, .Lfunc_end13-__math_oflowf | |
.cfi_endproc | |
.section .rodata.cst8,"aM",@progbits,8 | |
.p2align 2, 0x0 | |
.LCPI14_0: | |
.long 0x90000000 | |
.long 0x10000000 | |
.section .rodata.cst4,"aM",@progbits,4 | |
.p2align 2, 0x0 | |
.LCPI14_1: | |
.long 0x10000000 | |
.section .text.__math_uflowf,"ax",@progbits | |
.p2align 4, 0x90 | |
.type __math_uflowf,@function | |
__math_uflowf: | |
.Lfunc_begin14: | |
.cfi_startproc | |
xorl %eax, %eax | |
testl %edi, %edi | |
leaq .LCPI14_0(%rip), %rcx | |
sete %al | |
vmovss (%rcx,%rax,4), %xmm0 | |
vmovss %xmm0, -4(%rsp) | |
vmovss -4(%rsp), %xmm0 | |
vmulss .LCPI14_1(%rip), %xmm0, %xmm0 | |
retq | |
.Lfunc_end14: | |
.size __math_uflowf, .Lfunc_end14-__math_uflowf | |
.cfi_endproc | |
.section .rodata.cst4,"aM",@progbits,4 | |
.p2align 2, 0x0 | |
.LCPI15_0: | |
.long 0x80000000 | |
.section .text.__math_xflowf,"ax",@progbits | |
.p2align 4, 0x90 | |
.type __math_xflowf,@function | |
__math_xflowf: | |
.Lfunc_begin15: | |
.cfi_startproc | |
vxorps .LCPI15_0(%rip){1to4}, %xmm0, %xmm1 | |
testl %edi, %edi | |
sete %al | |
kmovd %eax, %k1 | |
vmovss %xmm0, %xmm1, %xmm1 {%k1} | |
vmovss %xmm1, -4(%rsp) | |
vmulss -4(%rsp), %xmm0, %xmm0 | |
retq | |
.Lfunc_end15: | |
.size __math_xflowf, .Lfunc_end15-__math_xflowf | |
.cfi_endproc | |
.section .text.feclearexcept,"ax",@progbits | |
.p2align 4, 0x90 | |
.type feclearexcept,@function | |
feclearexcept: | |
.Lfunc_begin16: | |
.cfi_startproc | |
xorl %eax, %eax | |
retq | |
.Lfunc_end16: | |
.size feclearexcept, .Lfunc_end16-feclearexcept | |
.cfi_endproc | |
.section .text.feraiseexcept,"ax",@progbits | |
.p2align 4, 0x90 | |
.type feraiseexcept,@function | |
feraiseexcept: | |
.Lfunc_begin17: | |
.cfi_startproc | |
xorl %eax, %eax | |
retq | |
.Lfunc_end17: | |
.size feraiseexcept, .Lfunc_end17-feraiseexcept | |
.cfi_endproc | |
.section .text.fetestexcept,"ax",@progbits | |
.p2align 4, 0x90 | |
.type fetestexcept,@function | |
fetestexcept: | |
.Lfunc_begin18: | |
.cfi_startproc | |
xorl %eax, %eax | |
retq | |
.Lfunc_end18: | |
.size fetestexcept, .Lfunc_end18-fetestexcept | |
.cfi_endproc | |
.section .text.fegetround,"ax",@progbits | |
.p2align 4, 0x90 | |
.type fegetround,@function | |
fegetround: | |
.Lfunc_begin19: | |
.cfi_startproc | |
xorl %eax, %eax | |
retq | |
.Lfunc_end19: | |
.size fegetround, .Lfunc_end19-fegetround | |
.cfi_endproc | |
.section .text.__fesetround,"ax",@progbits | |
.p2align 4, 0x90 | |
.type __fesetround,@function | |
__fesetround: | |
.Lfunc_begin20: | |
.cfi_startproc | |
xorl %eax, %eax | |
retq | |
.Lfunc_end20: | |
.size __fesetround, .Lfunc_end20-__fesetround | |
.cfi_endproc | |
.section .text.fegetenv,"ax",@progbits | |
.p2align 4, 0x90 | |
.type fegetenv,@function | |
fegetenv: | |
.Lfunc_begin21: | |
.cfi_startproc | |
xorl %eax, %eax | |
retq | |
.Lfunc_end21: | |
.size fegetenv, .Lfunc_end21-fegetenv | |
.cfi_endproc | |
.section .text.fesetenv,"ax",@progbits | |
.p2align 4, 0x90 | |
.type fesetenv,@function | |
fesetenv: | |
.Lfunc_begin22: | |
.cfi_startproc | |
xorl %eax, %eax | |
retq | |
.Lfunc_end22: | |
.size fesetenv, .Lfunc_end22-fesetenv | |
.cfi_endproc | |
.section .rodata.cst4,"aM",@progbits,4 | |
.p2align 2, 0x0 | |
.LCPI23_0: | |
.long 0x7b800000 | |
.LCPI23_1: | |
.long 0xbf800000 | |
.section .text.floorf,"ax",@progbits | |
.p2align 4, 0x90 | |
.type floorf,@function | |
floorf: | |
.Lfunc_begin23: | |
.cfi_startproc | |
vmovd %xmm0, %eax | |
movl $2071, %ecx | |
bextrl %ecx, %eax, %ecx | |
cmpl $149, %ecx | |
jbe .LBB23_1 | |
retq | |
.LBB23_1: | |
cmpl $127, %ecx | |
jb .LBB23_4 | |
addl $-127, %ecx | |
movl $8388607, %edx | |
shrxl %ecx, %edx, %edx | |
testl %eax, %edx | |
je .LBB23_6 | |
vaddss .LCPI23_0(%rip), %xmm0, %xmm0 | |
movl $-8388608, %esi | |
sarxl %ecx, %esi, %ecx | |
movl %eax, %esi | |
sarl $31, %esi | |
andl %edx, %esi | |
addl %eax, %esi | |
andl %ecx, %esi | |
vmovss %xmm0, -8(%rsp) | |
vmovd %esi, %xmm0 | |
retq | |
.LBB23_4: | |
vaddss .LCPI23_0(%rip), %xmm0, %xmm1 | |
vmovss %xmm1, -4(%rsp) | |
vxorps %xmm1, %xmm1, %xmm1 | |
testl %eax, %eax | |
js .LBB23_7 | |
vmovaps %xmm1, %xmm0 | |
.LBB23_6: | |
retq | |
.LBB23_7: | |
vcmpeqss %xmm1, %xmm0, %k1 | |
vmovss .LCPI23_1(%rip), %xmm1 | |
vmovss %xmm0, %xmm1, %xmm1 {%k1} | |
vmovaps %xmm1, %xmm0 | |
retq | |
.Lfunc_end23: | |
.size floorf, .Lfunc_end23-floorf | |
.cfi_endproc | |
.section .text.fmaf,"ax",@progbits | |
.p2align 4, 0x90 | |
.type fmaf,@function | |
fmaf: | |
.Lfunc_begin24: | |
.cfi_startproc | |
vcvtss2sd %xmm0, %xmm0, %xmm0 | |
movabsq $9218868437227405312, %rdx | |
vcvtss2sd %xmm1, %xmm1, %xmm1 | |
vcvtss2sd %xmm2, %xmm2, %xmm2 | |
vmulsd %xmm1, %xmm0, %xmm1 | |
vaddsd %xmm2, %xmm1, %xmm0 | |
vmovq %xmm0, %rax | |
movl %eax, %ecx | |
andl $536870911, %ecx | |
cmpl $268435456, %ecx | |
setne %cl | |
andnq %rdx, %rax, %rdx | |
sete %dl | |
orb %cl, %dl | |
jne .LBB24_4 | |
vsubsd %xmm1, %xmm0, %xmm3 | |
vucomisd %xmm2, %xmm3 | |
jne .LBB24_3 | |
jp .LBB24_3 | |
vsubsd %xmm2, %xmm0, %xmm3 | |
vucomisd %xmm1, %xmm3 | |
jne .LBB24_3 | |
jp .LBB24_3 | |
.LBB24_4: | |
vcvtsd2ss %xmm0, %xmm0, %xmm0 | |
retq | |
.LBB24_3: | |
testq %rax, %rax | |
vsubsd %xmm0, %xmm1, %xmm3 | |
vsubsd %xmm0, %xmm2, %xmm0 | |
sets %cl | |
vucomisd %xmm1, %xmm2 | |
vaddsd %xmm0, %xmm1, %xmm0 | |
vaddsd %xmm2, %xmm3, %xmm3 | |
vxorpd %xmm1, %xmm1, %xmm1 | |
setbe %dl | |
xorb %cl, %dl | |
kmovd %edx, %k1 | |
vmovsd %xmm3, %xmm0, %xmm0 {%k1} | |
vucomisd %xmm0, %xmm1 | |
setbe %dl | |
xorb %cl, %dl | |
movzbl %dl, %ecx | |
decq %rcx | |
orq $1, %rcx | |
addq %rax, %rcx | |
vmovq %rcx, %xmm0 | |
vcvtsd2ss %xmm0, %xmm0, %xmm0 | |
retq | |
.Lfunc_end24: | |
.size fmaf, .Lfunc_end24-fmaf | |
.cfi_endproc | |
.section .text.fmodf,"ax",@progbits | |
.p2align 4, 0x90 | |
.type fmodf,@function | |
fmodf: | |
.Lfunc_begin25: | |
.cfi_startproc | |
vmovd %xmm1, %edx | |
movl %edx, %esi | |
addl %edx, %esi | |
je .LBB25_2 | |
movl %edx, %r8d | |
vmovd %xmm0, %eax | |
movl $2071, %edi | |
andl $2147483647, %r8d | |
bextrl %edi, %eax, %ecx | |
cmpl $2139095041, %r8d | |
setae %r8b | |
cmpl $255, %ecx | |
sete %r9b | |
orb %r8b, %r9b | |
cmpb $1, %r9b | |
jne .LBB25_3 | |
.LBB25_2: | |
vmulss %xmm1, %xmm0, %xmm0 | |
vdivss %xmm0, %xmm0, %xmm0 | |
retq | |
.LBB25_3: | |
leal (%rax,%rax), %r8d | |
cmpl %esi, %r8d | |
jbe .LBB25_4 | |
bextrl %edi, %edx, %edi | |
testl %ecx, %ecx | |
je .LBB25_6 | |
movl %eax, %esi | |
andl $8388607, %esi | |
orl $8388608, %esi | |
testl %edi, %edi | |
je .LBB25_11 | |
.LBB25_14: | |
andl $8388607, %edx | |
orl $8388608, %edx | |
cmpl %edi, %ecx | |
jg .LBB25_16 | |
.LBB25_21: | |
movl %esi, %edi | |
subl %edx, %edi | |
jns .LBB25_22 | |
jmp .LBB25_23 | |
.LBB25_4: | |
vpxor %xmm1, %xmm1, %xmm1 | |
sete %al | |
vmulss %xmm1, %xmm0, %xmm1 | |
kmovd %eax, %k1 | |
vmovss %xmm1, %xmm0, %xmm0 {%k1} | |
retq | |
.LBB25_6: | |
movl %eax, %esi | |
xorl %ecx, %ecx | |
shll $9, %esi | |
js .LBB25_8 | |
.p2align 4, 0x90 | |
.LBB25_7: | |
decl %ecx | |
addl %esi, %esi | |
jns .LBB25_7 | |
.LBB25_8: | |
movb $1, %sil | |
subb %cl, %sil | |
shlxl %esi, %eax, %esi | |
testl %edi, %edi | |
jne .LBB25_14 | |
.LBB25_11: | |
movl %edx, %r8d | |
xorl %edi, %edi | |
shll $9, %r8d | |
js .LBB25_13 | |
.p2align 4, 0x90 | |
.LBB25_12: | |
decl %edi | |
addl %r8d, %r8d | |
jns .LBB25_12 | |
.LBB25_13: | |
movb $1, %r8b | |
subb %dil, %r8b | |
shlxl %r8d, %edx, %edx | |
cmpl %edi, %ecx | |
jg .LBB25_16 | |
jmp .LBB25_21 | |
.p2align 4, 0x90 | |
.LBB25_19: | |
addl %esi, %esi | |
decl %ecx | |
cmpl %edi, %ecx | |
jle .LBB25_20 | |
.LBB25_16: | |
movl %esi, %r8d | |
subl %edx, %r8d | |
js .LBB25_19 | |
movl %r8d, %esi | |
jne .LBB25_19 | |
jmp .LBB25_18 | |
.LBB25_20: | |
movl %edi, %ecx | |
movl %esi, %edi | |
subl %edx, %edi | |
js .LBB25_23 | |
.LBB25_22: | |
movl %edi, %esi | |
je .LBB25_18 | |
.LBB25_23: | |
cmpl $8388607, %esi | |
ja .LBB25_24 | |
.p2align 4, 0x90 | |
.LBB25_25: | |
leal (%rsi,%rsi), %edx | |
decl %ecx | |
cmpl $4194304, %esi | |
movl %edx, %esi | |
jb .LBB25_25 | |
andl $-2147483648, %eax | |
testl %ecx, %ecx | |
jle .LBB25_28 | |
.LBB25_27: | |
addl $-8388608, %edx | |
shll $23, %ecx | |
orl %edx, %ecx | |
orl %eax, %ecx | |
vmovd %ecx, %xmm0 | |
retq | |
.LBB25_18: | |
vpxor %xmm1, %xmm1, %xmm1 | |
vmulss %xmm1, %xmm0, %xmm0 | |
retq | |
.LBB25_24: | |
movl %esi, %edx | |
andl $-2147483648, %eax | |
testl %ecx, %ecx | |
jg .LBB25_27 | |
.LBB25_28: | |
movb $1, %sil | |
subb %cl, %sil | |
shrxl %esi, %edx, %ecx | |
orl %eax, %ecx | |
vmovd %ecx, %xmm0 | |
retq | |
.Lfunc_end25: | |
.size fmodf, .Lfunc_end25-fmodf | |
.cfi_endproc | |
.section .text.__math_invalidf,"ax",@progbits | |
.p2align 4, 0x90 | |
.type __math_invalidf,@function | |
__math_invalidf: | |
.Lfunc_begin26: | |
.cfi_startproc | |
vsubss %xmm0, %xmm0, %xmm0 | |
vdivss %xmm0, %xmm0, %xmm0 | |
retq | |
.Lfunc_end26: | |
.size __math_invalidf, .Lfunc_end26-__math_invalidf | |
.cfi_endproc | |
.section .rodata.cst4,"aM",@progbits,4 | |
.p2align 2, 0x0 | |
.LCPI27_0: | |
.long 0x3f800000 | |
.LCPI27_1: | |
.long 0x80000000 | |
.LCPI27_2: | |
.long 0x4b000000 | |
.LCPI27_12: | |
.long 0x10000000 | |
.LCPI27_20: | |
.long 0x70000000 | |
.section .rodata.cst8,"aM",@progbits,8 | |
.p2align 3, 0x0 | |
.LCPI27_3: | |
.quad 0xbff0000000000000 | |
.LCPI27_4: | |
.quad 0x3fd27616c9496e0b | |
.LCPI27_5: | |
.quad 0xbfd71969a075c67a | |
.LCPI27_6: | |
.quad 0x3fdec70a6ca7badd | |
.LCPI27_7: | |
.quad 0xbfe7154748bef6c8 | |
.LCPI27_8: | |
.quad 0x3ff71547652ab82b | |
.LCPI27_9: | |
.quad 0x405fffffffd1d571 | |
.LCPI27_10: | |
.quad 0xc062c00000000000 | |
.LCPI27_11: | |
.long 0x90000000 | |
.long 0x10000000 | |
.LCPI27_13: | |
.quad 0x42e8000000000000 | |
.LCPI27_14: | |
.quad 0xc2e8000000000000 | |
.LCPI27_15: | |
.quad 0x3fac6af84b912394 | |
.LCPI27_16: | |
.quad 0x3fcebfce50fac4f3 | |
.LCPI27_17: | |
.quad 0x3fe62e42ff0c52d6 | |
.LCPI27_18: | |
.quad 0x3ff0000000000000 | |
.LCPI27_19: | |
.long 0xf0000000 | |
.long 0x70000000 | |
.section .text.powf,"ax",@progbits | |
.p2align 4, 0x90 | |
.type powf,@function | |
powf: | |
.Lfunc_begin27: | |
.cfi_startproc | |
vmovd %xmm0, %edx | |
vmovd %xmm1, %eax | |
leal -2139095040(%rdx), %ecx | |
cmpl $-2130706432, %ecx | |
jb .LBB27_2 | |
leal 16777216(%rax,%rax), %esi | |
xorl %ecx, %ecx | |
cmpl $16777216, %esi | |
jbe .LBB27_2 | |
.LBB27_24: | |
leal -1060306944(%rdx), %eax | |
movl %eax, %esi | |
movl %eax, %edi | |
shrl $19, %esi | |
andl $-8388608, %edi | |
sarl $23, %eax | |
subl %edi, %edx | |
shll $4, %esi | |
leaq __powf_log2_data(%rip), %rdi | |
vmovd %edx, %xmm0 | |
movzbl %sil, %esi | |
movabsq $9223231299366420480, %rdx | |
vcvtss2sd %xmm0, %xmm0, %xmm0 | |
vmulsd (%rsi,%rdi), %xmm0, %xmm0 | |
vaddsd .LCPI27_3(%rip), %xmm0, %xmm0 | |
vmulsd .LCPI27_6(%rip), %xmm0, %xmm5 | |
vmulsd .LCPI27_4(%rip), %xmm0, %xmm4 | |
vaddsd .LCPI27_7(%rip), %xmm5, %xmm5 | |
vaddsd .LCPI27_5(%rip), %xmm4, %xmm4 | |
vcvtsi2sd %eax, %xmm2, %xmm2 | |
vmulsd %xmm0, %xmm0, %xmm3 | |
vaddsd 8(%rsi,%rdi), %xmm2, %xmm2 | |
vmulsd .LCPI27_8(%rip), %xmm0, %xmm0 | |
movabsq $4638426141214900225, %rsi | |
vmulsd %xmm3, %xmm3, %xmm6 | |
vmulsd %xmm5, %xmm3, %xmm3 | |
vmulsd %xmm6, %xmm4, %xmm4 | |
vaddsd %xmm0, %xmm2, %xmm0 | |
vaddsd %xmm3, %xmm0, %xmm0 | |
vaddsd %xmm0, %xmm4, %xmm0 | |
vcvtss2sd %xmm1, %xmm1, %xmm1 | |
vmulsd %xmm1, %xmm0, %xmm0 | |
vmovq %xmm0, %rax | |
andq %rax, %rdx | |
cmpq %rsi, %rdx | |
jae .LBB27_25 | |
.LBB27_29: | |
vaddsd .LCPI27_13(%rip), %xmm0, %xmm1 | |
leaq __exp2f_data(%rip), %rdx | |
vmovq %xmm1, %rax | |
vaddsd .LCPI27_14(%rip), %xmm1, %xmm1 | |
addl %eax, %ecx | |
andl $31, %eax | |
shlq $47, %rcx | |
addq (%rdx,%rax,8), %rcx | |
vsubsd %xmm1, %xmm0, %xmm0 | |
vmulsd .LCPI27_15(%rip), %xmm0, %xmm2 | |
vmovq %rcx, %xmm1 | |
vmulsd %xmm0, %xmm0, %xmm3 | |
vaddsd .LCPI27_16(%rip), %xmm2, %xmm2 | |
vmulsd .LCPI27_17(%rip), %xmm0, %xmm0 | |
vaddsd .LCPI27_18(%rip), %xmm0, %xmm0 | |
vmulsd %xmm2, %xmm3, %xmm2 | |
vaddsd %xmm2, %xmm0, %xmm0 | |
vmulsd %xmm1, %xmm0, %xmm0 | |
vcvtsd2ss %xmm0, %xmm0, %xmm0 | |
.LBB27_30: | |
retq | |
.LBB27_2: | |
leal (%rax,%rax), %ecx | |
leal -1(%rcx), %esi | |
cmpl $-16777217, %esi | |
jae .LBB27_3 | |
leal -1(%rdx,%rdx), %ecx | |
cmpl $-16777217, %ecx | |
jae .LBB27_10 | |
xorl %ecx, %ecx | |
testl %edx, %edx | |
js .LBB27_16 | |
cmpl $8388607, %edx | |
ja .LBB27_24 | |
.LBB27_23: | |
vmulss .LCPI27_2(%rip), %xmm0, %xmm0 | |
vmovd %xmm0, %edx | |
andl $2147483647, %edx | |
addl $-192937984, %edx | |
jmp .LBB27_24 | |
.LBB27_25: | |
vucomisd .LCPI27_9(%rip), %xmm0 | |
jbe .LBB27_27 | |
xorl %eax, %eax | |
testl %ecx, %ecx | |
leaq .LCPI27_19(%rip), %rcx | |
sete %al | |
vmovss (%rcx,%rax,4), %xmm0 | |
vmovss %xmm0, -8(%rsp) | |
vmovss -8(%rsp), %xmm0 | |
vmulss .LCPI27_20(%rip), %xmm0, %xmm0 | |
retq | |
.LBB27_16: | |
movl $2071, %ecx | |
bextrl %ecx, %eax, %ecx | |
cmpl $127, %ecx | |
jb .LBB27_31 | |
cmpl $150, %ecx | |
jbe .LBB27_18 | |
.LBB27_20: | |
xorl %ecx, %ecx | |
.LBB27_21: | |
vmovd %xmm0, %edx | |
andl $2147483647, %edx | |
cmpl $8388607, %edx | |
ja .LBB27_24 | |
jmp .LBB27_23 | |
.LBB27_27: | |
vmovsd .LCPI27_10(%rip), %xmm1 | |
vucomisd %xmm0, %xmm1 | |
jb .LBB27_29 | |
xorl %eax, %eax | |
testl %ecx, %ecx | |
leaq .LCPI27_11(%rip), %rcx | |
sete %al | |
vmovss (%rcx,%rax,4), %xmm0 | |
vmovss %xmm0, -4(%rsp) | |
vmovss -4(%rsp), %xmm0 | |
vmulss .LCPI27_12(%rip), %xmm0, %xmm0 | |
retq | |
.LBB27_18: | |
movb $-106, %dl | |
subb %cl, %dl | |
bzhil %edx, %eax, %ecx | |
je .LBB27_19 | |
.LBB27_31: | |
vsubss %xmm0, %xmm0, %xmm0 | |
vdivss %xmm0, %xmm0, %xmm0 | |
retq | |
.LBB27_19: | |
movl $1, %ecx | |
shlxl %edx, %ecx, %edx | |
movl $65536, %ecx | |
testl %eax, %edx | |
jne .LBB27_21 | |
jmp .LBB27_20 | |
.LBB27_3: | |
vmovss .LCPI27_0(%rip), %xmm2 | |
testl %ecx, %ecx | |
sete %sil | |
cmpl $1065353216, %edx | |
sete %dil | |
orb %sil, %dil | |
je .LBB27_5 | |
vmovaps %xmm2, %xmm0 | |
retq | |
.LBB27_10: | |
vmulss %xmm0, %xmm0, %xmm0 | |
testl %edx, %edx | |
jns .LBB27_13 | |
movl $2071, %ecx | |
bextrl %ecx, %eax, %ecx | |
leal -151(%rcx), %edx | |
cmpl $-24, %edx | |
jb .LBB27_13 | |
vxorps .LCPI27_1(%rip){1to4}, %xmm0, %xmm1 | |
movb $-106, %dl | |
subb %cl, %dl | |
bzhil %edx, %eax, %ecx | |
movzbl %dl, %edx | |
setne %cl | |
btl %edx, %eax | |
setae %dl | |
kmovd %ecx, %k1 | |
kmovd %edx, %k2 | |
vmovss %xmm0, %xmm1, %xmm1 {%k2} | |
vmovss %xmm0, %xmm1, %xmm1 {%k1} | |
vmovaps %xmm1, %xmm0 | |
.LBB27_13: | |
testl %eax, %eax | |
jns .LBB27_30 | |
vmovss .LCPI27_0(%rip), %xmm1 | |
vdivss %xmm0, %xmm1, %xmm0 | |
vmovss %xmm0, -12(%rsp) | |
vmovss -12(%rsp), %xmm0 | |
retq | |
.LBB27_5: | |
addl %edx, %edx | |
cmpl $-16777215, %edx | |
setae %sil | |
cmpl $-16777215, %ecx | |
setae %cl | |
orb %sil, %cl | |
cmpb $1, %cl | |
jne .LBB27_7 | |
vaddss %xmm1, %xmm0, %xmm0 | |
retq | |
.LBB27_7: | |
vmovaps %xmm2, %xmm0 | |
cmpl $2130706432, %edx | |
je .LBB27_30 | |
setae %cl | |
testl %eax, %eax | |
vmulss %xmm1, %xmm1, %xmm0 | |
vxorps %xmm1, %xmm1, %xmm1 | |
setns %al | |
xorb %cl, %al | |
kmovd %eax, %k1 | |
vmovss %xmm1, %xmm0, %xmm0 {%k1} | |
retq | |
.Lfunc_end27: | |
.size powf, .Lfunc_end27-powf | |
.cfi_endproc | |
.section .rodata.cst4,"aM",@progbits,4 | |
.p2align 2, 0x0 | |
.LCPI28_0: | |
.long 0x7fffffff | |
.LCPI28_1: | |
.long 0x4b000000 | |
.LCPI28_2: | |
.long 0xcb000000 | |
.LCPI28_3: | |
.long 0x3f000000 | |
.LCPI28_4: | |
.long 0xbf000000 | |
.LCPI28_5: | |
.long 0x3f800000 | |
.LCPI28_6: | |
.long 0xbf800000 | |
.LCPI28_7: | |
.long 0x80000000 | |
.section .text.roundf,"ax",@progbits | |
.p2align 4, 0x90 | |
.type roundf,@function | |
roundf: | |
.Lfunc_begin28: | |
.cfi_startproc | |
vmovd %xmm0, %eax | |
movl $2071, %ecx | |
bextrl %ecx, %eax, %ecx | |
cmpl $149, %ecx | |
ja .LBB28_8 | |
vpandd .LCPI28_0(%rip){1to4}, %xmm0, %xmm1 | |
vaddss .LCPI28_1(%rip), %xmm1, %xmm2 | |
cmpl $125, %ecx | |
ja .LBB28_3 | |
vxorps %xmm1, %xmm1, %xmm1 | |
vmovss %xmm2, -4(%rsp) | |
vmulss %xmm1, %xmm0, %xmm0 | |
retq | |
.LBB28_3: | |
vaddss .LCPI28_2(%rip), %xmm2, %xmm0 | |
vsubss %xmm1, %xmm0, %xmm0 | |
vucomiss .LCPI28_3(%rip), %xmm0 | |
jbe .LBB28_5 | |
vaddss %xmm0, %xmm1, %xmm0 | |
vaddss .LCPI28_6(%rip), %xmm0, %xmm0 | |
jmp .LBB28_7 | |
.LBB28_5: | |
vmovss .LCPI28_4(%rip), %xmm2 | |
vucomiss %xmm0, %xmm2 | |
vaddss %xmm0, %xmm1, %xmm0 | |
jb .LBB28_7 | |
vaddss .LCPI28_5(%rip), %xmm0, %xmm0 | |
.LBB28_7: | |
vxorps .LCPI28_7(%rip){1to4}, %xmm0, %xmm1 | |
testl %eax, %eax | |
sets %al | |
kmovd %eax, %k1 | |
vmovss %xmm1, %xmm0, %xmm0 {%k1} | |
.LBB28_8: | |
retq | |
.Lfunc_end28: | |
.size roundf, .Lfunc_end28-roundf | |
.cfi_endproc | |
.type __unnamed_1,@object | |
.section .rodata.__unnamed_1,"a",@progbits | |
__unnamed_1: | |
.asciz "mmt3d_kernel_linked_llvm_cpu" | |
.size __unnamed_1, 29 | |
.type iree_hal_executable_library_query_v0_header,@object | |
.section .data.rel.ro.iree_hal_executable_library_query_v0_header,"aw",@progbits | |
.p2align 4, 0x0 | |
iree_hal_executable_library_query_v0_header: | |
.long 4 | |
.zero 4 | |
.quad __unnamed_1 | |
.long 0 | |
.long 0 | |
.size iree_hal_executable_library_query_v0_header, 24 | |
.type iree_hal_executable_library_query_v0_funcs,@object | |
.section .data.rel.ro.iree_hal_executable_library_query_v0_funcs,"aw",@progbits | |
.p2align 4, 0x0 | |
iree_hal_executable_library_query_v0_funcs: | |
.quad turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32 | |
.quad turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack | |
.quad turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32 | |
.quad turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32 | |
.size iree_hal_executable_library_query_v0_funcs, 32 | |
.type iree_hal_executable_library_query_v0_attrs,@object | |
.section .rodata.iree_hal_executable_library_query_v0_attrs,"a",@progbits | |
.p2align 3, 0x0 | |
iree_hal_executable_library_query_v0_attrs: | |
.zero 16 | |
.size iree_hal_executable_library_query_v0_attrs, 16 | |
.type __unnamed_2,@object | |
.section .rodata.__unnamed_2,"a",@progbits | |
__unnamed_2: | |
.asciz "turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32" | |
.size __unnamed_2, 58 | |
.type __unnamed_3,@object | |
.section .rodata.__unnamed_3,"a",@progbits | |
__unnamed_3: | |
.asciz "turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack" | |
.size __unnamed_3, 78 | |
.type __unnamed_4,@object | |
.section .rodata.__unnamed_4,"a",@progbits | |
__unnamed_4: | |
.asciz "turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32" | |
.size __unnamed_4, 94 | |
.type __unnamed_5,@object | |
.section .rodata.__unnamed_5,"a",@progbits | |
__unnamed_5: | |
.asciz "turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32" | |
.size __unnamed_5, 60 | |
.type iree_hal_executable_library_query_v0_names,@object | |
.section .data.rel.ro.iree_hal_executable_library_query_v0_names,"aw",@progbits | |
.p2align 4, 0x0 | |
iree_hal_executable_library_query_v0_names: | |
.quad __unnamed_2 | |
.quad __unnamed_3 | |
.quad __unnamed_4 | |
.quad __unnamed_5 | |
.size iree_hal_executable_library_query_v0_names, 32 | |
.type __unnamed_6,@object | |
.section .rodata.__unnamed_6,"a",@progbits | |
__unnamed_6: | |
.asciz "mmt3d_kernel.mlir" | |
.size __unnamed_6, 18 | |
.type __unnamed_7,@object | |
.section .rodata.__unnamed_7,"a",@progbits | |
__unnamed_7: | |
.asciz "mmt3d_kernel.mlir" | |
.size __unnamed_7, 18 | |
.type __unnamed_8,@object | |
.section .rodata.__unnamed_8,"a",@progbits | |
__unnamed_8: | |
.asciz "mmt3d_kernel.mlir" | |
.size __unnamed_8, 18 | |
.type __unnamed_9,@object | |
.section .rodata.__unnamed_9,"a",@progbits | |
__unnamed_9: | |
.asciz "mmt3d_kernel.mlir" | |
.size __unnamed_9, 18 | |
.type iree_hal_executable_library_query_v0_source_locations,@object | |
.section .data.rel.ro.iree_hal_executable_library_query_v0_source_locations,"aw",@progbits | |
.p2align 4, 0x0 | |
iree_hal_executable_library_query_v0_source_locations: | |
.long 4 | |
.long 17 | |
.quad __unnamed_6 | |
.long 4 | |
.long 17 | |
.quad __unnamed_7 | |
.long 4 | |
.long 17 | |
.quad __unnamed_8 | |
.long 4 | |
.long 17 | |
.quad __unnamed_9 | |
.size iree_hal_executable_library_query_v0_source_locations, 64 | |
.type iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32_stage_names,@object | |
.section .rodata.iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32_stage_names,"a",@progbits | |
.p2align 3, 0x0 | |
iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32_stage_names: | |
.size iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32_stage_names, 0 | |
.type iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32_stage_source_locations,@object | |
.section .rodata.iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32_stage_source_locations,"a",@progbits | |
.p2align 3, 0x0 | |
iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32_stage_source_locations: | |
.size iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32_stage_source_locations, 0 | |
.type iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack_stage_names,@object | |
.section .rodata.iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack_stage_names,"a",@progbits | |
.p2align 3, 0x0 | |
iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack_stage_names: | |
.size iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack_stage_names, 0 | |
.type iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack_stage_source_locations,@object | |
.section .rodata.iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack_stage_source_locations,"a",@progbits | |
.p2align 3, 0x0 | |
iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack_stage_source_locations: | |
.size iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack_stage_source_locations, 0 | |
.type iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32_stage_names,@object | |
.section .rodata.iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32_stage_names,"a",@progbits | |
.p2align 3, 0x0 | |
iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32_stage_names: | |
.size iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32_stage_names, 0 | |
.type iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32_stage_source_locations,@object | |
.section .rodata.iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32_stage_source_locations,"a",@progbits | |
.p2align 3, 0x0 | |
iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32_stage_source_locations: | |
.size iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32_stage_source_locations, 0 | |
.type iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32_stage_names,@object | |
.section .rodata.iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32_stage_names,"a",@progbits | |
.p2align 3, 0x0 | |
iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32_stage_names: | |
.size iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32_stage_names, 0 | |
.type iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32_stage_source_locations,@object | |
.section .rodata.iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32_stage_source_locations,"a",@progbits | |
.p2align 3, 0x0 | |
iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32_stage_source_locations: | |
.size iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32_stage_source_locations, 0 | |
.type iree_hal_executable_library_query_v0_stage_location_tables,@object | |
.section .data.rel.ro.iree_hal_executable_library_query_v0_stage_location_tables,"aw",@progbits | |
.p2align 4, 0x0 | |
iree_hal_executable_library_query_v0_stage_location_tables: | |
.long 0 | |
.zero 4 | |
.quad iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32_stage_names | |
.quad iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32_stage_source_locations | |
.long 0 | |
.zero 4 | |
.quad iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack_stage_names | |
.quad iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack_stage_source_locations | |
.long 0 | |
.zero 4 | |
.quad iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32_stage_names | |
.quad iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32_stage_source_locations | |
.long 0 | |
.zero 4 | |
.quad iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32_stage_names | |
.quad iree_hal_executable_library_query_v0_turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32_stage_source_locations | |
.size iree_hal_executable_library_query_v0_stage_location_tables, 96 | |
.type iree_hal_executable_library_query_v0,@object | |
.section .data.rel.ro.iree_hal_executable_library_query_v0,"aw",@progbits | |
.p2align 4, 0x0 | |
iree_hal_executable_library_query_v0: | |
.quad iree_hal_executable_library_query_v0_header | |
.zero 16 | |
.long 4 | |
.zero 4 | |
.quad iree_hal_executable_library_query_v0_funcs | |
.quad iree_hal_executable_library_query_v0_attrs | |
.quad iree_hal_executable_library_query_v0_names | |
.quad 0 | |
.quad iree_hal_executable_library_query_v0_source_locations | |
.quad iree_hal_executable_library_query_v0_stage_location_tables | |
.zero 4 | |
.zero 4 | |
.zero 16 | |
.size iree_hal_executable_library_query_v0, 104 | |
.type __powf_log2_data,@object | |
.section .rodata.__powf_log2_data,"a",@progbits | |
.p2align 3, 0x0 | |
__powf_log2_data: | |
.quad 0x3ff661ec79f8f3be | |
.quad 0xbfdefec65b963019 | |
.quad 0x3ff571ed4aaf883d | |
.quad 0xbfdb0b6832d4fca4 | |
.quad 0x3ff49539f0f010b0 | |
.quad 0xbfd7418b0a1fb77b | |
.quad 0x3ff3c995b0b80385 | |
.quad 0xbfd39de91a6dcf7b | |
.quad 0x3ff30d190c8864a5 | |
.quad 0xbfd01d9bf3f2b631 | |
.quad 0x3ff25e227b0b8ea0 | |
.quad 0xbfc97c1d1b3b7af0 | |
.quad 0x3ff1bb4a4a1a343f | |
.quad 0xbfc2f9e393af3c9f | |
.quad 0x3ff12358f08ae5ba | |
.quad 0xbfb960cbbf788d5c | |
.quad 0x3ff0953f419900a7 | |
.quad 0xbfaa6f9db6475fce | |
.quad 0x3ff0000000000000 | |
.quad 0x0000000000000000 | |
.quad 0x3fee608cfd9a47ac | |
.quad 0x3fb338ca9f24f53d | |
.quad 0x3feca4b31f026aa0 | |
.quad 0x3fc476a9543891ba | |
.quad 0x3feb2036576afce6 | |
.quad 0x3fce840b4ac4e4d2 | |
.quad 0x3fe9c2d163a1aa2d | |
.quad 0x3fd40645f0c6651c | |
.quad 0x3fe886e6037841ed | |
.quad 0x3fd88e9c2c1b9ff8 | |
.quad 0x3fe767dcf5534862 | |
.quad 0x3fdce0a44eb17bcc | |
.quad 0x3fd27616c9496e0b | |
.quad 0xbfd71969a075c67a | |
.quad 0x3fdec70a6ca7badd | |
.quad 0xbfe7154748bef6c8 | |
.quad 0x3ff71547652ab82b | |
.size __powf_log2_data, 296 | |
.type __exp2f_data,@object | |
.section .rodata.__exp2f_data,"a",@progbits | |
.p2align 3, 0x0 | |
__exp2f_data: | |
.quad 4607182418800017408 | |
.quad 4607140297302181236 | |
.quad 4607100335213349135 | |
.quad 4607062579818421073 | |
.quad 4607027079437701499 | |
.quad 4606993883449571754 | |
.quad 4606963042313658936 | |
.quad 4606934607594512097 | |
.quad 4606908631985796885 | |
.quad 4606885169335019979 | |
.quad 4606864274668794914 | |
.quad 4606846004218661165 | |
.quad 4606830415447468583 | |
.quad 4606817567076339586 | |
.quad 4606807519112221737 | |
.quad 4606800332876043653 | |
.quad 4606796071031487437 | |
.quad 4606794797614391156 | |
.quad 4606796578062795143 | |
.quad 4606801479247646227 | |
.quad 4606809569504174299 | |
.quad 4606820918663955941 | |
.quad 4606835598087680144 | |
.quad 4606853680698631517 | |
.quad 4606875241016906669 | |
.quad 4606900355194379847 | |
.quad 4606929101050434204 | |
.quad 4606961558108475497 | |
.quad 4606997807633245319 | |
.quad 4607037932668951391 | |
.quad 4607082018078232794 | |
.quad 4607130150581978432 | |
.quad 0x42e8000000000000 | |
.quad 0x3fac6af84b912394 | |
.quad 0x3fcebfce50fac4f3 | |
.quad 0x3fe62e42ff0c52d6 | |
.quad 0x4338000000000000 | |
.quad 0x40471547652b82fe | |
.quad 0x3ebc6af84b912394 | |
.quad 0x3f2ebfce50fac4f3 | |
.quad 0x3f962e42ff0c52d6 | |
.size __exp2f_data, 328 | |
.section .debug_abbrev,"",@progbits | |
.byte 1 | |
.byte 17 | |
.byte 1 | |
.byte 37 | |
.byte 14 | |
.byte 19 | |
.byte 5 | |
.byte 3 | |
.byte 14 | |
.byte 16 | |
.byte 23 | |
.ascii "\264B" | |
.byte 25 | |
.byte 17 | |
.byte 1 | |
.byte 18 | |
.byte 6 | |
.byte 0 | |
.byte 0 | |
.byte 2 | |
.byte 46 | |
.byte 0 | |
.byte 17 | |
.byte 1 | |
.byte 18 | |
.byte 6 | |
.byte 64 | |
.byte 24 | |
.byte 110 | |
.byte 14 | |
.byte 3 | |
.byte 14 | |
.byte 58 | |
.byte 11 | |
.byte 59 | |
.byte 11 | |
.byte 73 | |
.byte 19 | |
.byte 63 | |
.byte 25 | |
.byte 0 | |
.byte 0 | |
.byte 3 | |
.byte 36 | |
.byte 0 | |
.byte 3 | |
.byte 14 | |
.byte 62 | |
.byte 11 | |
.byte 11 | |
.byte 11 | |
.byte 0 | |
.byte 0 | |
.byte 4 | |
.byte 46 | |
.byte 0 | |
.byte 17 | |
.byte 1 | |
.byte 18 | |
.byte 6 | |
.byte 64 | |
.byte 24 | |
.byte 110 | |
.byte 14 | |
.byte 3 | |
.byte 14 | |
.byte 58 | |
.byte 11 | |
.byte 59 | |
.byte 11 | |
.byte 73 | |
.byte 16 | |
.byte 63 | |
.byte 25 | |
.byte 0 | |
.byte 0 | |
.byte 0 | |
.section .debug_info,"",@progbits | |
.Lcu_begin0: | |
.long .Ldebug_info_end0-.Ldebug_info_start0 | |
.Ldebug_info_start0: | |
.short 4 | |
.long .debug_abbrev | |
.byte 8 | |
.byte 1 | |
.long .Linfo_string0 | |
.short 44 | |
.long .Linfo_string1 | |
.long .Lline_table_start0 | |
.quad .Lfunc_begin0 | |
.long .Lfunc_end0-.Lfunc_begin0 | |
.byte 2 | |
.quad .Lfunc_begin0 | |
.long .Lfunc_end0-.Lfunc_begin0 | |
.byte 1 | |
.byte 86 | |
.long .Linfo_string2 | |
.long .Linfo_string2 | |
.byte 1 | |
.byte 1 | |
.long 67 | |
.byte 3 | |
.long .Linfo_string3 | |
.byte 5 | |
.byte 4 | |
.byte 0 | |
.Ldebug_info_end0: | |
.Lcu_begin1: | |
.long .Ldebug_info_end1-.Ldebug_info_start1 | |
.Ldebug_info_start1: | |
.short 4 | |
.long .debug_abbrev | |
.byte 8 | |
.byte 1 | |
.long .Linfo_string0 | |
.short 44 | |
.long .Linfo_string1 | |
.long .Lline_table_start0 | |
.quad .Lfunc_begin1 | |
.long .Lfunc_end1-.Lfunc_begin1 | |
.byte 4 | |
.quad .Lfunc_begin1 | |
.long .Lfunc_end1-.Lfunc_begin1 | |
.byte 1 | |
.byte 86 | |
.long .Linfo_string4 | |
.long .Linfo_string4 | |
.byte 1 | |
.byte 1 | |
.long .debug_info+67 | |
.byte 0 | |
.Ldebug_info_end1: | |
.Lcu_begin2: | |
.long .Ldebug_info_end2-.Ldebug_info_start2 | |
.Ldebug_info_start2: | |
.short 4 | |
.long .debug_abbrev | |
.byte 8 | |
.byte 1 | |
.long .Linfo_string0 | |
.short 44 | |
.long .Linfo_string1 | |
.long .Lline_table_start0 | |
.quad .Lfunc_begin2 | |
.long .Lfunc_end2-.Lfunc_begin2 | |
.byte 4 | |
.quad .Lfunc_begin2 | |
.long .Lfunc_end2-.Lfunc_begin2 | |
.byte 1 | |
.byte 86 | |
.long .Linfo_string5 | |
.long .Linfo_string5 | |
.byte 1 | |
.byte 1 | |
.long .debug_info+67 | |
.byte 0 | |
.Ldebug_info_end2: | |
.Lcu_begin3: | |
.long .Ldebug_info_end3-.Ldebug_info_start3 | |
.Ldebug_info_start3: | |
.short 4 | |
.long .debug_abbrev | |
.byte 8 | |
.byte 1 | |
.long .Linfo_string0 | |
.short 44 | |
.long .Linfo_string1 | |
.long .Lline_table_start0 | |
.quad .Lfunc_begin3 | |
.long .Lfunc_end3-.Lfunc_begin3 | |
.byte 4 | |
.quad .Lfunc_begin3 | |
.long .Lfunc_end3-.Lfunc_begin3 | |
.byte 1 | |
.byte 86 | |
.long .Linfo_string6 | |
.long .Linfo_string6 | |
.byte 1 | |
.byte 1 | |
.long .debug_info+67 | |
.byte 0 | |
.Ldebug_info_end3: | |
.section .debug_str,"MS",@progbits,1 | |
.Linfo_string0: | |
.asciz "IREE" | |
.Linfo_string1: | |
.asciz "-" | |
.Linfo_string2: | |
.asciz "turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32" | |
.Linfo_string3: | |
.asciz "int" | |
.Linfo_string4: | |
.asciz "turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack" | |
.Linfo_string5: | |
.asciz "turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32" | |
.Linfo_string6: | |
.asciz "turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32" | |
.section .debug_pubnames,"",@progbits | |
.long .LpubNames_end0-.LpubNames_start0 | |
.LpubNames_start0: | |
.short 2 | |
.long .Lcu_begin0 | |
.long 75 | |
.long 38 | |
.asciz "turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32" | |
.long 0 | |
.LpubNames_end0: | |
.section .debug_pubtypes,"",@progbits | |
.long .LpubTypes_end0-.LpubTypes_start0 | |
.LpubTypes_start0: | |
.short 2 | |
.long .Lcu_begin0 | |
.long 75 | |
.long 67 | |
.asciz "int" | |
.long 0 | |
.LpubTypes_end0: | |
.section .debug_pubnames,"",@progbits | |
.long .LpubNames_end1-.LpubNames_start1 | |
.LpubNames_start1: | |
.short 2 | |
.long .Lcu_begin1 | |
.long 68 | |
.long 38 | |
.asciz "turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_1_generic_Dx8640x3200_f16_pack" | |
.long 0 | |
.LpubNames_end1: | |
.section .debug_pubtypes,"",@progbits | |
.long .LpubTypes_end1-.LpubTypes_start1 | |
.LpubTypes_start1: | |
.short 2 | |
.long .Lcu_begin1 | |
.long 68 | |
.long 0 | |
.LpubTypes_end1: | |
.section .debug_pubnames,"",@progbits | |
.long .LpubNames_end2-.LpubNames_start2 | |
.LpubNames_start2: | |
.short 2 | |
.long .Lcu_begin2 | |
.long 68 | |
.long 38 | |
.asciz "turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32" | |
.long 0 | |
.LpubNames_end2: | |
.section .debug_pubtypes,"",@progbits | |
.long .LpubTypes_end2-.LpubTypes_start2 | |
.LpubTypes_start2: | |
.short 2 | |
.long .Lcu_begin2 | |
.long 68 | |
.long 0 | |
.LpubTypes_end2: | |
.section .debug_pubnames,"",@progbits | |
.long .LpubNames_end3-.LpubNames_start3 | |
.LpubNames_start3: | |
.short 2 | |
.long .Lcu_begin3 | |
.long 68 | |
.long 38 | |
.asciz "turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_3_unpack_f32" | |
.long 0 | |
.LpubNames_end3: | |
.section .debug_pubtypes,"",@progbits | |
.long .LpubTypes_end3-.LpubTypes_start3 | |
.LpubTypes_start3: | |
.short 2 | |
.long .Lcu_begin3 | |
.long 68 | |
.long 0 | |
.LpubTypes_end3: | |
.section ".note.GNU-stack","",@progbits | |
.section .debug_line,"",@progbits | |
.Lline_table_start0: |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment