Skip to content

Instantly share code, notes, and snippets.

@pashu123
Created April 24, 2024 11:25
Show Gist options
  • Save pashu123/5e8bd947aac028c636bd79b0d66c6d4e to your computer and use it in GitHub Desktop.
Save pashu123/5e8bd947aac028c636bd79b0d66c6d4e to your computer and use it in GitHub Desktop.
.text
.intel_syntax noprefix
.file "pack_dispatch_0"
.section .rodata.cst32,"aM",@progbits,32
.p2align 5, 0x0
.LCPI0_0:
.short 0
.short 16
.short 1
.short 17
.short 4
.short 20
.short 5
.short 21
.short 8
.short 24
.short 9
.short 25
.short 12
.short 28
.short 13
.short 29
.LCPI0_1:
.short 2
.short 18
.short 3
.short 19
.short 6
.short 22
.short 7
.short 23
.short 10
.short 26
.short 11
.short 27
.short 14
.short 30
.short 15
.short 31
.section .rodata.cst16,"aM",@progbits,16
.LCPI0_2:
.byte 0
.byte 16
.byte 1
.byte 17
.byte 4
.byte 20
.byte 5
.byte 21
.byte 8
.byte 24
.byte 9
.byte 25
.byte 12
.byte 28
.byte 13
.byte 29
.LCPI0_3:
.byte 2
.byte 18
.byte 3
.byte 19
.byte 6
.byte 22
.byte 7
.byte 23
.byte 10
.byte 26
.byte 11
.byte 27
.byte 14
.byte 30
.byte 15
.byte 31
.section .text.pack_dispatch_0_pack_f16,"ax",@progbits
.p2align 4, 0x90
.type pack_dispatch_0_pack_f16,@function
pack_dispatch_0_pack_f16:
.Lfunc_begin0:
.file 1 "-"
.loc 1 1 0
.cfi_startproc
push rbp
.cfi_def_cfa_offset 16
.cfi_offset rbp, -16
mov rbp, rsp
.cfi_def_cfa_register rbp
.Ltmp0:
push r15
push r14
push r13
push r12
push rbx
sub rsp, 16
.cfi_offset rbx, -56
.cfi_offset r12, -48
.cfi_offset r13, -40
.cfi_offset r14, -32
.cfi_offset r15, -24
.loc 1 4 3 prologue_end
mov rcx, qword ptr [rsi + 24]
movzx edi, word ptr [rdx + 8]
mov r10d, dword ptr [rcx + 4]
mov eax, dword ptr [rcx]
mov r8d, edi
shl r8d, 6
mov qword ptr [rbp - 48], r8
shl r10, 32
lea rcx, [r10 + rax]
mov qword ptr [rbp - 72], rcx
cmp r8, rcx
jge .LBB0_16
.loc 1 0 3 is_stmt 0
mov ebx, dword ptr [rdx]
mov edx, dword ptr [rdx + 4]
mov r8, qword ptr [rsi + 32]
movzx r9d, word ptr [rsi + 20]
mov r14d, 3538944000
.loc 1 4 3
or r10, rax
mov r11d, dword ptr [rsi + 12]
mov ecx, dword ptr [rsi + 16]
vpmovsxbw ymm0, xmmword ptr [rip + .LCPI0_2]
vpmovsxbw ymm1, xmmword ptr [rip + .LCPI0_3]
sub r10, qword ptr [rbp - 48]
imul rdi, r14
imul rax, rdx, 6144000
mov rsi, qword ptr [r8]
mov r8, qword ptr [r8 + 8]
imul r14, r9
shl r9d, 6
mov qword ptr [rbp - 120], rdx
mov qword ptr [rbp - 160], rbx
mov qword ptr [rbp - 104], r9
imul r9, rdx, 60
mov rdx, rbx
shl rdx, 12
add rax, rdi
mov rdi, r11
shl rdi, 7
mov qword ptr [rbp - 112], r14
add rdx, rax
mov qword ptr [rbp - 96], r9
imul r9, rcx, 60
imul rcx, rcx, 6144000
mov qword ptr [rbp - 184], rdi
lea rdx, [r8 + rdx + 480]
mov qword ptr [rbp - 64], rdx
mov rdx, rbx
shl rdx, 8
mov qword ptr [rbp - 152], r9
mov r9, rbx
shl r9, 7
mov qword ptr [rbp - 144], rcx
mov rcx, r11
shl rcx, 12
shl r11, 8
add rdx, rax
mov r8, r11
mov qword ptr [rbp - 136], r9
mov qword ptr [rbp - 176], rcx
lea rdx, [rsi + rdx + 96000]
mov qword ptr [rbp - 56], rdx
jmp .LBB0_2
.p2align 4, 0x90
.LBB0_15:
.loc 1 0 3
mov rdx, qword ptr [rbp - 112]
mov rax, qword ptr [rbp - 48]
mov rcx, qword ptr [rbp - 104]
mov r10, qword ptr [rbp - 128]
.loc 1 4 3
add qword ptr [rbp - 64], rdx
add qword ptr [rbp - 56], rdx
add rax, rcx
sub r10, rcx
mov qword ptr [rbp - 48], rax
cmp rax, qword ptr [rbp - 72]
jge .LBB0_16
.LBB0_2:
cmp r10, 64
mov edx, 64
mov eax, 1
mov qword ptr [rbp - 128], r10
cmovl rdx, r10
cmp rdx, 2
cmovl rdx, rax
cmp dword ptr [rbp - 120], 8
ja .LBB0_15
.loc 1 0 3
mov rcx, qword ptr [rbp - 64]
mov r9, qword ptr [rbp - 72]
mov rax, qword ptr [rbp - 56]
.loc 1 4 3
sub r9, qword ptr [rbp - 48]
mov qword ptr [rbp - 80], rcx
mov rcx, qword ptr [rbp - 96]
mov qword ptr [rbp - 88], rax
jmp .LBB0_4
.p2align 4, 0x90
.LBB0_14:
.loc 1 0 3
mov rax, qword ptr [rbp - 144]
mov rcx, qword ptr [rbp - 168]
.loc 1 4 3
add rcx, qword ptr [rbp - 152]
add qword ptr [rbp - 80], rax
add qword ptr [rbp - 88], rax
cmp rcx, 540
jge .LBB0_15
.LBB0_4:
.loc 1 0 3
cmp dword ptr [rbp - 160], 24
mov qword ptr [rbp - 168], rcx
.loc 1 4 3
ja .LBB0_14
.loc 1 0 3
mov rsi, qword ptr [rbp - 88]
mov r12, qword ptr [rbp - 80]
mov r14, qword ptr [rbp - 136]
jmp .LBB0_6
.p2align 4, 0x90
.LBB0_13:
.loc 1 4 3
add r14, qword ptr [rbp - 184]
add r12, qword ptr [rbp - 176]
add rsi, r8
cmp r14, 3200
jge .LBB0_14
.LBB0_6:
.loc 1 0 3
test r9, r9
.loc 1 4 3
jle .LBB0_13
.loc 1 0 3
mov rdi, rsi
mov rbx, r12
xor r13d, r13d
.p2align 4, 0x90
.LBB0_8:
mov r11, rdi
mov rax, rbx
xor r10d, r10d
.p2align 4, 0x90
.LBB0_9:
mov rcx, -16
mov r15, rax
.p2align 4, 0x90
.LBB0_10:
.loc 1 4 3
vmovdqa ymm3, ymmword ptr [r11 + 2*rcx - 95968]
vmovdqa ymm5, ymmword ptr [r11 + 2*rcx - 83168]
vmovdqa ymm4, ymmword ptr [r11 + 2*rcx - 89568]
vmovdqa ymm6, ymmword ptr [r11 + 2*rcx - 76768]
vmovdqa ymm7, ymmword ptr [r11 + 2*rcx - 70368]
vmovdqa ymm8, ymmword ptr [r11 + 2*rcx - 63968]
vmovdqa ymm9, ymmword ptr [r11 + 2*rcx - 57568]
vmovdqa ymm10, ymmword ptr [r11 + 2*rcx - 51168]
vmovdqa ymm11, ymmword ptr [r11 + 2*rcx - 44768]
vmovdqa ymm12, ymmword ptr [r11 + 2*rcx - 38368]
vmovdqa ymm13, ymmword ptr [r11 + 2*rcx - 31968]
vmovdqa ymm14, ymmword ptr [r11 + 2*rcx - 25568]
vmovdqa ymm2, ymmword ptr [r11 + 2*rcx - 19168]
vmovdqa ymm15, ymmword ptr [r11 + 2*rcx - 12768]
vmovdqa64 ymm16, ymmword ptr [r11 + 2*rcx - 6368]
vmovdqa64 ymm17, ymmword ptr [r11 + 2*rcx + 32]
add rcx, 16
vmovdqa64 ymm18, ymm3
vmovdqa64 ymm20, ymm5
vpermt2w ymm18, ymm0, ymm4
vpermt2w ymm3, ymm1, ymm4
vpermt2w ymm20, ymm0, ymm6
vpermt2w ymm5, ymm1, ymm6
vmovdqa ymm6, ymm7
vpermt2w ymm6, ymm0, ymm8
vpermt2w ymm7, ymm1, ymm8
vmovdqa ymm8, ymm9
vpermt2w ymm8, ymm0, ymm10
vpermt2w ymm9, ymm1, ymm10
vmovdqa ymm10, ymm11
vpermt2w ymm10, ymm0, ymm12
vpermt2w ymm11, ymm1, ymm12
vmovdqa ymm12, ymm13
vpermt2w ymm12, ymm0, ymm14
vpermt2w ymm13, ymm1, ymm14
vmovdqa ymm14, ymm2
vpermt2w ymm14, ymm0, ymm15
vpermt2w ymm2, ymm1, ymm15
vmovdqa64 ymm15, ymm16
vpermt2w ymm16, ymm1, ymm17
vpermt2w ymm15, ymm0, ymm17
vshufps ymm17, ymm18, ymm20, 136
vshufps ymm18, ymm18, ymm20, 221
vshufps ymm20, ymm3, ymm5, 136
vshufps ymm5, ymm3, ymm5, 221
vshufps ymm3, ymm6, ymm8, 136
vshufps ymm21, ymm7, ymm9, 136
vshufps ymm22, ymm7, ymm9, 221
vshufps ymm7, ymm11, ymm13, 136
vshufps ymm4, ymm11, ymm13, 221
vshufps ymm11, ymm2, ymm16, 136
vshufps ymm9, ymm2, ymm16, 221
vshufps ymm6, ymm6, ymm8, 221
vshufps ymm23, ymm10, ymm12, 136
vshufps ymm10, ymm10, ymm12, 221
vshufps ymm12, ymm14, ymm15, 136
vshufps ymm13, ymm14, ymm15, 221
vshufps ymm19, ymm17, ymm17, 216
vshufps ymm2, ymm3, ymm3, 216
vinsertf64x4 zmm8, zmm17, ymm3, 1
vinsertf64x4 zmm15, zmm18, ymm6, 1
vshufps ymm14, ymm18, ymm18, 216
vshufps ymm6, ymm6, ymm6, 216
vshufps ymm17, ymm21, ymm21, 216
vinsertf64x4 zmm18, zmm5, ymm22, 1
vinsertf64x4 zmm16, zmm20, ymm21, 1
vshuff64x2 ymm3, ymm19, ymm2, 3
vinsertf32x4 ymm2, ymm19, xmm2, 1
vshufps ymm19, ymm22, ymm22, 216
vpmovqd ymm8, zmm8
vpmovqd ymm15, zmm15
vpmovqd ymm16, zmm16
vunpckhpd ymm2, ymm2, ymm3
vperm2f128 ymm3, ymm14, ymm6, 49
vinsertf128 ymm6, ymm14, xmm6, 1
vshufps ymm14, ymm20, ymm20, 216
vunpckhpd ymm3, ymm6, ymm3
vshuff64x2 ymm6, ymm14, ymm17, 3
vinsertf32x4 ymm14, ymm14, xmm17, 1
vshufps ymm17, ymm5, ymm5, 216
vunpckhpd ymm5, ymm14, ymm6
vshuff64x2 ymm6, ymm17, ymm19, 3
vinsertf32x4 ymm14, ymm17, xmm19, 1
vshufps ymm17, ymm23, ymm23, 216
vunpckhpd ymm6, ymm14, ymm6
vshufps ymm14, ymm12, ymm12, 216
vinsertf64x4 zmm12, zmm23, ymm12, 1
vshuff64x2 ymm19, ymm17, ymm14, 3
vinsertf32x4 ymm14, ymm17, xmm14, 1
vshufps ymm17, ymm10, ymm10, 216
vinsertf64x4 zmm10, zmm10, ymm13, 1
vshufps ymm13, ymm13, ymm13, 216
vpmovqd ymm12, zmm12
vunpckhpd ymm14, ymm14, ymm19
vshuff64x2 ymm19, ymm17, ymm13, 3
vinsertf32x4 ymm13, ymm17, xmm13, 1
vshufps ymm17, ymm7, ymm7, 216
vinsertf64x4 zmm7, zmm7, ymm11, 1
vshufps ymm11, ymm11, ymm11, 216
vpmovqd ymm10, zmm10
vunpckhpd ymm13, ymm13, ymm19
vshuff64x2 ymm19, ymm17, ymm11, 3
vinsertf32x4 ymm11, ymm17, xmm11, 1
vshufps ymm17, ymm4, ymm4, 216
vinsertf64x4 zmm4, zmm4, ymm9, 1
vshufps ymm9, ymm9, ymm9, 216
vpmovqd ymm7, zmm7
vunpckhpd ymm11, ymm11, ymm19
vshuff64x2 ymm19, ymm17, ymm9, 3
vinsertf32x4 ymm9, ymm17, xmm9, 1
vpmovqd ymm17, zmm18
vpmovqd ymm4, zmm4
vshuff64x2 ymm18, ymm8, ymm12, 3
vinsertf128 ymm8, ymm8, xmm12, 1
vperm2f128 ymm12, ymm15, ymm10, 49
vinsertf128 ymm10, ymm15, xmm10, 1
vunpckhpd ymm9, ymm9, ymm19
vshuff64x2 ymm15, ymm16, ymm7, 3
vinsertf32x4 ymm7, ymm16, xmm7, 1
vshuff64x2 ymm16, ymm17, ymm4, 3
vinsertf32x4 ymm4, ymm17, xmm4, 1
vshuff64x2 ymm17, ymm2, ymm14, 3
vinsertf128 ymm2, ymm2, xmm14, 1
vperm2f128 ymm14, ymm3, ymm13, 49
vinsertf128 ymm3, ymm3, xmm13, 1
vperm2f128 ymm13, ymm5, ymm11, 49
vinsertf128 ymm5, ymm5, xmm11, 1
vperm2f128 ymm11, ymm6, ymm9, 49
vinsertf128 ymm6, ymm6, xmm9, 1
vunpcklpd ymm9, ymm8, ymm18
vunpckhpd ymm8, ymm8, ymm18
vunpcklpd ymm18, ymm10, ymm12
vunpckhpd ymm10, ymm10, ymm12
vunpcklpd ymm12, ymm7, ymm15
vunpckhpd ymm7, ymm7, ymm15
vmovapd ymmword ptr [r15 - 480], ymm9
vmovapd ymmword ptr [r15 - 448], ymm18
vmovapd ymmword ptr [r15 - 416], ymm12
vunpcklpd ymm15, ymm4, ymm16
vunpckhpd ymm4, ymm4, ymm16
vunpcklpd ymm16, ymm2, ymm17
vunpckhpd ymm2, ymm2, ymm17
vunpcklpd ymm17, ymm3, ymm14
vunpckhpd ymm3, ymm3, ymm14
vunpcklpd ymm14, ymm5, ymm13
vunpckhpd ymm5, ymm5, ymm13
vunpcklpd ymm13, ymm6, ymm11
vunpckhpd ymm6, ymm6, ymm11
vmovapd ymmword ptr [r15 - 384], ymm15
vmovapd ymmword ptr [r15 - 352], ymm16
vmovapd ymmword ptr [r15 - 320], ymm17
vmovapd ymmword ptr [r15 - 288], ymm14
vmovapd ymmword ptr [r15 - 256], ymm13
vmovapd ymmword ptr [r15 - 224], ymm8
vmovapd ymmword ptr [r15 - 192], ymm10
vmovapd ymmword ptr [r15 - 160], ymm7
vmovapd ymmword ptr [r15 - 128], ymm4
vmovapd ymmword ptr [r15 - 96], ymm2
vmovapd ymmword ptr [r15 - 64], ymm3
vmovapd ymmword ptr [r15 - 32], ymm5
vmovapd ymmword ptr [r15], ymm6
add r15, 512
cmp rcx, 112
jb .LBB0_10
inc r10
add rax, 102400
add r11, 102400
cmp r10, 60
jne .LBB0_9
inc r13
add rbx, 55296000
add rdi, 55296000
cmp r13, rdx
jne .LBB0_8
jmp .LBB0_13
.LBB0_16:
xor eax, eax
.loc 1 4 3 epilogue_begin
add rsp, 16
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
.cfi_def_cfa rsp, 8
vzeroupper
ret
.Ltmp1:
.Lfunc_end0:
.size pack_dispatch_0_pack_f16, .Lfunc_end0-pack_dispatch_0_pack_f16
.cfi_endproc
.section .text.iree_hal_executable_library_query,"ax",@progbits
.globl iree_hal_executable_library_query
.p2align 4, 0x90
.type iree_hal_executable_library_query,@function
iree_hal_executable_library_query:
.Liree_hal_executable_library_query$local:
.type .Liree_hal_executable_library_query$local,@function
.Lfunc_begin1:
.cfi_startproc
xor eax, eax
cmp edi, 4
lea rcx, [rip + iree_hal_executable_library_query_v0]
cmove rax, rcx
ret
.Lfunc_end1:
.size iree_hal_executable_library_query, .Lfunc_end1-iree_hal_executable_library_query
.size .Liree_hal_executable_library_query$local, .Lfunc_end1-iree_hal_executable_library_query
.cfi_endproc
.section .text.iree_h2f_ieee,"ax",@progbits
.p2align 4, 0x90
.type iree_h2f_ieee,@function
iree_h2f_ieee:
.Lfunc_begin2:
.cfi_startproc
mov eax, edi
and eax, 32768
mov edx, edi
mov ecx, edi
and ecx, 1023
shl eax, 16
and dx, 31744
je .LBB2_6
and edi, 31744
cmp edi, 31744
jne .LBB2_5
test cx, cx
je .LBB2_4
or eax, 2143289344
vmovd xmm0, eax
ret
.LBB2_6:
movzx ecx, cx
or eax, 864026624
vcvtsi2ss xmm0, xmm0, ecx
vmovd xmm1, eax
vmulss xmm0, xmm0, xmm1
ret
.LBB2_5:
movzx ecx, cx
movzx edx, dx
add edx, ecx
shl edx, 13
lea eax, [rdx + rax + 939524096]
vmovd xmm0, eax
ret
.LBB2_4:
or eax, 2139095040
vmovd xmm0, eax
ret
.Lfunc_end2:
.size iree_h2f_ieee, .Lfunc_end2-iree_h2f_ieee
.cfi_endproc
.section .text.iree_f2h_ieee,"ax",@progbits
.p2align 4, 0x90
.type iree_f2h_ieee,@function
iree_f2h_ieee:
.Lfunc_begin3:
.cfi_startproc
vmovd edi, xmm0
mov edx, 2071
bextr esi, edi, edx
mov eax, edi
mov ecx, edi
shr ecx, 31
and eax, 8388607
lea edx, [rsi - 112]
lea r8d, [rsi - 113]
cmp r8d, 28
ja .LBB3_2
add eax, 4096
shl ecx, 15
shl edx, 10
shr eax, 13
or eax, ecx
or eax, edx
ret
.LBB3_2:
test edi, edi
je .LBB3_6
cmp esi, 112
ja .LBB3_7
cmp esi, 102
jae .LBB3_10
.LBB3_6:
xor eax, eax
ret
.LBB3_7:
cmp edx, 143
jne .LBB3_11
shl ecx, 15
test eax, eax
je .LBB3_14
shr eax, 13
or ecx, eax
or ecx, 31744
mov eax, ecx
ret
.LBB3_10:
mov dl, 113
or eax, 8388608
shl ecx, 15
sub dl, sil
shrx eax, eax, edx
mov edx, eax
and edx, 4096
lea eax, [rax + 2*rdx]
shr eax, 13
or eax, ecx
ret
.LBB3_11:
test edi, 4096
je .LBB3_13
lea edi, [rax + 8192]
add esi, -111
xor r8d, r8d
cmp eax, 8380416
cmovb esi, edx
cmovb r8d, edi
mov eax, r8d
mov edx, esi
.LBB3_13:
shl ecx, 15
cmp edx, 31
jb .LBB3_15
.LBB3_14:
or ecx, 31744
mov eax, ecx
ret
.LBB3_15:
shr eax, 13
shl edx, 10
or eax, ecx
or eax, edx
ret
.Lfunc_end3:
.size iree_f2h_ieee, .Lfunc_end3-iree_f2h_ieee
.cfi_endproc
.section .text.__gnu_h2f_ieee,"ax",@progbits
.p2align 4, 0x90
.type __gnu_h2f_ieee,@function
__gnu_h2f_ieee:
.Lfunc_begin4:
.cfi_startproc
mov eax, edi
and eax, 32768
mov edx, edi
mov ecx, edi
and ecx, 1023
shl eax, 16
and dx, 31744
je .LBB4_6
and edi, 31744
cmp edi, 31744
jne .LBB4_5
test cx, cx
je .LBB4_4
or eax, 2143289344
vmovd xmm0, eax
ret
.LBB4_6:
movzx ecx, cx
or eax, 864026624
vcvtsi2ss xmm0, xmm0, ecx
vmovd xmm1, eax
vmulss xmm0, xmm0, xmm1
ret
.LBB4_5:
movzx ecx, cx
movzx edx, dx
add edx, ecx
shl edx, 13
lea eax, [rdx + rax + 939524096]
vmovd xmm0, eax
ret
.LBB4_4:
or eax, 2139095040
vmovd xmm0, eax
ret
.Lfunc_end4:
.size __gnu_h2f_ieee, .Lfunc_end4-__gnu_h2f_ieee
.cfi_endproc
.section .text.__extendhfsf2,"ax",@progbits
.p2align 4, 0x90
.type __extendhfsf2,@function
__extendhfsf2:
.Lfunc_begin5:
.cfi_startproc
vmovd ecx, xmm0
mov eax, ecx
shl eax, 16
mov edx, ecx
and edx, 1023
mov esi, ecx
and eax, -2147483648
and esi, 31744
je .LBB5_6
cmp esi, 31744
jne .LBB5_5
test dx, dx
je .LBB5_4
or eax, 2143289344
vmovd xmm0, eax
ret
.LBB5_6:
movzx ecx, dx
or eax, 864026624
vcvtsi2ss xmm0, xmm1, ecx
vmovd xmm1, eax
vmulss xmm0, xmm0, xmm1
ret
.LBB5_5:
and ecx, 32767
shl ecx, 13
lea eax, [rcx + rax + 939524096]
vmovd xmm0, eax
ret
.LBB5_4:
or eax, 2139095040
vmovd xmm0, eax
ret
.Lfunc_end5:
.size __extendhfsf2, .Lfunc_end5-__extendhfsf2
.cfi_endproc
.section .text.__gnu_f2h_ieee,"ax",@progbits
.p2align 4, 0x90
.type __gnu_f2h_ieee,@function
__gnu_f2h_ieee:
.Lfunc_begin6:
.cfi_startproc
vmovd edi, xmm0
mov edx, 2071
bextr esi, edi, edx
mov eax, edi
mov ecx, edi
shr ecx, 31
and eax, 8388607
lea edx, [rsi - 112]
lea r8d, [rsi - 113]
cmp r8d, 28
ja .LBB6_2
add eax, 4096
shl ecx, 15
shl edx, 10
shr eax, 13
or eax, ecx
or eax, edx
ret
.LBB6_2:
test edi, edi
je .LBB6_6
cmp esi, 112
ja .LBB6_7
cmp esi, 102
jae .LBB6_10
.LBB6_6:
xor eax, eax
ret
.LBB6_7:
cmp edx, 143
jne .LBB6_11
shl ecx, 15
test eax, eax
je .LBB6_14
shr eax, 13
or ecx, eax
or ecx, 31744
mov eax, ecx
ret
.LBB6_10:
mov dl, 113
or eax, 8388608
shl ecx, 15
sub dl, sil
shrx eax, eax, edx
mov edx, eax
and edx, 4096
lea eax, [rax + 2*rdx]
shr eax, 13
or eax, ecx
ret
.LBB6_11:
test edi, 4096
je .LBB6_13
lea edi, [rax + 8192]
add esi, -111
xor r8d, r8d
cmp eax, 8380416
cmovb esi, edx
cmovb r8d, edi
mov eax, r8d
mov edx, esi
.LBB6_13:
shl ecx, 15
cmp edx, 31
jb .LBB6_15
.LBB6_14:
or ecx, 31744
mov eax, ecx
ret
.LBB6_15:
shr eax, 13
shl edx, 10
or eax, ecx
or eax, edx
ret
.Lfunc_end6:
.size __gnu_f2h_ieee, .Lfunc_end6-__gnu_f2h_ieee
.cfi_endproc
.section .text.__truncsfhf2,"ax",@progbits
.p2align 4, 0x90
.type __truncsfhf2,@function
__truncsfhf2:
.Lfunc_begin7:
.cfi_startproc
vmovd edi, xmm0
mov edx, 2071
bextr esi, edi, edx
mov eax, edi
mov ecx, edi
shr ecx, 31
and eax, 8388607
lea edx, [rsi - 112]
lea r8d, [rsi - 113]
cmp r8d, 28
ja .LBB7_2
add eax, 4096
shl ecx, 15
shl edx, 10
shr eax, 13
.LBB7_16:
or eax, ecx
or eax, edx
mov word ptr [rsp - 4], ax
vmovss xmm0, dword ptr [rsp - 4]
ret
.LBB7_2:
test edi, edi
je .LBB7_6
cmp esi, 112
ja .LBB7_7
cmp esi, 102
jae .LBB7_10
.LBB7_6:
xor eax, eax
mov word ptr [rsp - 4], ax
vmovss xmm0, dword ptr [rsp - 4]
ret
.LBB7_7:
cmp edx, 143
jne .LBB7_11
shl ecx, 15
test eax, eax
je .LBB7_14
shr eax, 13
or ecx, eax
jmp .LBB7_14
.LBB7_10:
mov dl, 113
or eax, 8388608
shl ecx, 15
sub dl, sil
shrx eax, eax, edx
mov edx, eax
and edx, 4096
lea eax, [rax + 2*rdx]
shr eax, 13
or eax, ecx
mov word ptr [rsp - 4], ax
vmovss xmm0, dword ptr [rsp - 4]
ret
.LBB7_11:
test edi, 4096
je .LBB7_13
lea edi, [rax + 8192]
add esi, -111
xor r8d, r8d
cmp eax, 8380416
cmovb esi, edx
cmovb r8d, edi
mov eax, r8d
mov edx, esi
.LBB7_13:
shl ecx, 15
cmp edx, 31
jb .LBB7_15
.LBB7_14:
or ecx, 31744
mov eax, ecx
mov word ptr [rsp - 4], ax
vmovss xmm0, dword ptr [rsp - 4]
ret
.LBB7_15:
shr eax, 13
shl edx, 10
jmp .LBB7_16
.Lfunc_end7:
.size __truncsfhf2, .Lfunc_end7-__truncsfhf2
.cfi_endproc
.section .rodata.cst4,"aM",@progbits,4
.p2align 2, 0x0
.LCPI8_0:
.long 0x7b800000
.LCPI8_1:
.long 0x80000000
.LCPI8_2:
.long 0x3f800000
.section .text.ceilf,"ax",@progbits
.p2align 4, 0x90
.type ceilf,@function
ceilf:
.Lfunc_begin8:
.cfi_startproc
vmovd eax, xmm0
mov ecx, 2071
bextr ecx, eax, ecx
cmp ecx, 149
ja .LBB8_7
cmp ecx, 127
jb .LBB8_4
add ecx, -127
mov edx, 8388607
shrx edx, edx, ecx
test edx, eax
je .LBB8_7
vaddss xmm0, xmm0, dword ptr [rip + .LCPI8_0]
xor esi, esi
test eax, eax
mov edi, -8388608
cmovs edx, esi
sarx ecx, edi, ecx
add edx, eax
and edx, ecx
vmovss dword ptr [rsp - 8], xmm0
vmovd xmm0, edx
ret
.LBB8_4:
vaddss xmm1, xmm0, dword ptr [rip + .LCPI8_0]
vmovss dword ptr [rsp - 4], xmm1
test eax, eax
js .LBB8_5
vmovss xmm1, dword ptr [rip + .LCPI8_2]
sete al
kmovd k1, eax
vmovss xmm1 {k1}, xmm1, xmm0
vmovaps xmm0, xmm1
.LBB8_7:
ret
.LBB8_5:
vmovss xmm0, dword ptr [rip + .LCPI8_1]
ret
.Lfunc_end8:
.size ceilf, .Lfunc_end8-ceilf
.cfi_endproc
.section .rodata.cst4,"aM",@progbits,4
.p2align 2, 0x0
.LCPI9_0:
.long 0x42b17217
.LCPI9_1:
.long 0xc2cff1b4
.LCPI9_2:
.long 0x10000000
.LCPI9_3:
.long 0x70000000
.section .rodata.cst8,"aM",@progbits,8
.p2align 3, 0x0
.LCPI9_4:
.quad 0x40471547652b82fe
.LCPI9_5:
.quad 0x4338000000000000
.LCPI9_6:
.quad 0xc338000000000000
.LCPI9_7:
.quad 0x3ebc6af84b912394
.LCPI9_8:
.quad 0x3f2ebfce50fac4f3
.LCPI9_9:
.quad 0x3f962e42ff0c52d6
.LCPI9_10:
.quad 0x3ff0000000000000
.section .text.expf,"ax",@progbits
.p2align 4, 0x90
.type expf,@function
expf:
.Lfunc_begin9:
.cfi_startproc
vmovd ecx, xmm0
mov eax, 2836
bextr eax, ecx, eax
cmp eax, 1067
jae .LBB9_1
.LBB9_8:
vcvtss2sd xmm0, xmm0, xmm0
vmulsd xmm0, xmm0, qword ptr [rip + .LCPI9_4]
lea rdx, [rip + __exp2f_data]
vaddsd xmm1, xmm0, qword ptr [rip + .LCPI9_5]
vmovq rax, xmm1
vaddsd xmm1, xmm1, qword ptr [rip + .LCPI9_6]
mov ecx, eax
and ecx, 31
shl rax, 47
add rax, qword ptr [rdx + 8*rcx]
vsubsd xmm0, xmm0, xmm1
vmulsd xmm2, xmm0, qword ptr [rip + .LCPI9_7]
vmovq xmm1, rax
vmulsd xmm3, xmm0, xmm0
vaddsd xmm2, xmm2, qword ptr [rip + .LCPI9_8]
vmulsd xmm0, xmm0, qword ptr [rip + .LCPI9_9]
vaddsd xmm0, xmm0, qword ptr [rip + .LCPI9_10]
vmulsd xmm2, xmm3, xmm2
vaddsd xmm0, xmm0, xmm2
vmulsd xmm0, xmm0, xmm1
vcvtsd2ss xmm1, xmm0, xmm0
.LBB9_9:
vmovaps xmm0, xmm1
ret
.LBB9_1:
vxorps xmm1, xmm1, xmm1
cmp ecx, -8388608
je .LBB9_9
cmp eax, 2040
jae .LBB9_3
vucomiss xmm0, dword ptr [rip + .LCPI9_0]
jbe .LBB9_6
mov dword ptr [rsp - 8], 1879048192
vmovss xmm0, dword ptr [rsp - 8]
vmulss xmm0, xmm0, dword ptr [rip + .LCPI9_3]
ret
.LBB9_3:
vaddss xmm0, xmm0, xmm0
ret
.LBB9_6:
vmovss xmm1, dword ptr [rip + .LCPI9_1]
vucomiss xmm1, xmm0
jbe .LBB9_8
mov dword ptr [rsp - 4], 268435456
vmovss xmm0, dword ptr [rsp - 4]
vmulss xmm0, xmm0, dword ptr [rip + .LCPI9_2]
ret
.Lfunc_end9:
.size expf, .Lfunc_end9-expf
.cfi_endproc
.section .rodata.cst8,"aM",@progbits,8
.p2align 2, 0x0
.LCPI10_0:
.long 0xf0000000
.long 0x70000000
.section .rodata.cst4,"aM",@progbits,4
.p2align 2, 0x0
.LCPI10_1:
.long 0x70000000
.section .text.__math_oflowf,"ax",@progbits
.p2align 4, 0x90
.type __math_oflowf,@function
__math_oflowf:
.Lfunc_begin10:
.cfi_startproc
xor eax, eax
test edi, edi
lea rcx, [rip + .LCPI10_0]
sete al
vmovss xmm0, dword ptr [rcx + 4*rax]
vmovss dword ptr [rsp - 4], xmm0
vmovss xmm0, dword ptr [rsp - 4]
vmulss xmm0, xmm0, dword ptr [rip + .LCPI10_1]
ret
.Lfunc_end10:
.size __math_oflowf, .Lfunc_end10-__math_oflowf
.cfi_endproc
.section .rodata.cst8,"aM",@progbits,8
.p2align 2, 0x0
.LCPI11_0:
.long 0x90000000
.long 0x10000000
.section .rodata.cst4,"aM",@progbits,4
.p2align 2, 0x0
.LCPI11_1:
.long 0x10000000
.section .text.__math_uflowf,"ax",@progbits
.p2align 4, 0x90
.type __math_uflowf,@function
__math_uflowf:
.Lfunc_begin11:
.cfi_startproc
xor eax, eax
test edi, edi
lea rcx, [rip + .LCPI11_0]
sete al
vmovss xmm0, dword ptr [rcx + 4*rax]
vmovss dword ptr [rsp - 4], xmm0
vmovss xmm0, dword ptr [rsp - 4]
vmulss xmm0, xmm0, dword ptr [rip + .LCPI11_1]
ret
.Lfunc_end11:
.size __math_uflowf, .Lfunc_end11-__math_uflowf
.cfi_endproc
.section .rodata.cst4,"aM",@progbits,4
.p2align 2, 0x0
.LCPI12_0:
.long 0x80000000
.section .text.__math_xflowf,"ax",@progbits
.p2align 4, 0x90
.type __math_xflowf,@function
__math_xflowf:
.Lfunc_begin12:
.cfi_startproc
vxorps xmm1, xmm0, dword ptr [rip + .LCPI12_0]{1to4}
test edi, edi
sete al
kmovd k1, eax
vmovss xmm1 {k1}, xmm1, xmm0
vmovss dword ptr [rsp - 4], xmm1
vmulss xmm0, xmm0, dword ptr [rsp - 4]
ret
.Lfunc_end12:
.size __math_xflowf, .Lfunc_end12-__math_xflowf
.cfi_endproc
.section .text.feclearexcept,"ax",@progbits
.p2align 4, 0x90
.type feclearexcept,@function
feclearexcept:
.Lfunc_begin13:
.cfi_startproc
xor eax, eax
ret
.Lfunc_end13:
.size feclearexcept, .Lfunc_end13-feclearexcept
.cfi_endproc
.section .text.feraiseexcept,"ax",@progbits
.p2align 4, 0x90
.type feraiseexcept,@function
feraiseexcept:
.Lfunc_begin14:
.cfi_startproc
xor eax, eax
ret
.Lfunc_end14:
.size feraiseexcept, .Lfunc_end14-feraiseexcept
.cfi_endproc
.section .text.fetestexcept,"ax",@progbits
.p2align 4, 0x90
.type fetestexcept,@function
fetestexcept:
.Lfunc_begin15:
.cfi_startproc
xor eax, eax
ret
.Lfunc_end15:
.size fetestexcept, .Lfunc_end15-fetestexcept
.cfi_endproc
.section .text.fegetround,"ax",@progbits
.p2align 4, 0x90
.type fegetround,@function
fegetround:
.Lfunc_begin16:
.cfi_startproc
xor eax, eax
ret
.Lfunc_end16:
.size fegetround, .Lfunc_end16-fegetround
.cfi_endproc
.section .text.__fesetround,"ax",@progbits
.p2align 4, 0x90
.type __fesetround,@function
__fesetround:
.Lfunc_begin17:
.cfi_startproc
xor eax, eax
ret
.Lfunc_end17:
.size __fesetround, .Lfunc_end17-__fesetround
.cfi_endproc
.section .text.fegetenv,"ax",@progbits
.p2align 4, 0x90
.type fegetenv,@function
fegetenv:
.Lfunc_begin18:
.cfi_startproc
xor eax, eax
ret
.Lfunc_end18:
.size fegetenv, .Lfunc_end18-fegetenv
.cfi_endproc
.section .text.fesetenv,"ax",@progbits
.p2align 4, 0x90
.type fesetenv,@function
fesetenv:
.Lfunc_begin19:
.cfi_startproc
xor eax, eax
ret
.Lfunc_end19:
.size fesetenv, .Lfunc_end19-fesetenv
.cfi_endproc
.section .rodata.cst4,"aM",@progbits,4
.p2align 2, 0x0
.LCPI20_0:
.long 0x7b800000
.LCPI20_1:
.long 0xbf800000
.section .text.floorf,"ax",@progbits
.p2align 4, 0x90
.type floorf,@function
floorf:
.Lfunc_begin20:
.cfi_startproc
vmovd eax, xmm0
mov ecx, 2071
bextr ecx, eax, ecx
cmp ecx, 149
jbe .LBB20_1
ret
.LBB20_1:
cmp ecx, 127
jb .LBB20_4
add ecx, -127
mov edx, 8388607
shrx edx, edx, ecx
test edx, eax
je .LBB20_6
vaddss xmm0, xmm0, dword ptr [rip + .LCPI20_0]
mov esi, -8388608
sarx ecx, esi, ecx
mov esi, eax
sar esi, 31
and esi, edx
add esi, eax
and esi, ecx
vmovss dword ptr [rsp - 8], xmm0
vmovd xmm0, esi
ret
.LBB20_4:
vaddss xmm1, xmm0, dword ptr [rip + .LCPI20_0]
vmovss dword ptr [rsp - 4], xmm1
vxorps xmm1, xmm1, xmm1
test eax, eax
js .LBB20_7
vmovaps xmm0, xmm1
.LBB20_6:
ret
.LBB20_7:
vcmpeqss k1, xmm0, xmm1
vmovss xmm1, dword ptr [rip + .LCPI20_1]
vmovss xmm1 {k1}, xmm1, xmm0
vmovaps xmm0, xmm1
ret
.Lfunc_end20:
.size floorf, .Lfunc_end20-floorf
.cfi_endproc
.section .text.fmaf,"ax",@progbits
.p2align 4, 0x90
.type fmaf,@function
fmaf:
.Lfunc_begin21:
.cfi_startproc
vcvtss2sd xmm0, xmm0, xmm0
movabs rdx, 9218868437227405312
vcvtss2sd xmm1, xmm1, xmm1
vcvtss2sd xmm2, xmm2, xmm2
vmulsd xmm1, xmm0, xmm1
vaddsd xmm0, xmm1, xmm2
vmovq rax, xmm0
mov ecx, eax
and ecx, 536870911
cmp ecx, 268435456
setne cl
andn rdx, rax, rdx
sete dl
or dl, cl
jne .LBB21_4
vsubsd xmm3, xmm0, xmm1
vucomisd xmm3, xmm2
jne .LBB21_3
jp .LBB21_3
vsubsd xmm3, xmm0, xmm2
vucomisd xmm3, xmm1
jne .LBB21_3
jp .LBB21_3
.LBB21_4:
vcvtsd2ss xmm0, xmm0, xmm0
ret
.LBB21_3:
test rax, rax
vsubsd xmm3, xmm1, xmm0
vsubsd xmm0, xmm2, xmm0
sets cl
vucomisd xmm2, xmm1
vaddsd xmm0, xmm1, xmm0
vaddsd xmm3, xmm3, xmm2
vxorpd xmm1, xmm1, xmm1
setbe dl
xor dl, cl
kmovd k1, edx
vmovsd xmm0 {k1}, xmm0, xmm3
vucomisd xmm1, xmm0
setbe dl
xor dl, cl
movzx ecx, dl
dec rcx
or rcx, 1
add rcx, rax
vmovq xmm0, rcx
vcvtsd2ss xmm0, xmm0, xmm0
ret
.Lfunc_end21:
.size fmaf, .Lfunc_end21-fmaf
.cfi_endproc
.section .text.fmodf,"ax",@progbits
.p2align 4, 0x90
.type fmodf,@function
fmodf:
.Lfunc_begin22:
.cfi_startproc
vmovd edx, xmm1
mov esi, edx
add esi, edx
je .LBB22_2
mov r8d, edx
vmovd eax, xmm0
mov edi, 2071
and r8d, 2147483647
bextr ecx, eax, edi
cmp r8d, 2139095041
setae r8b
cmp ecx, 255
sete r9b
or r9b, r8b
cmp r9b, 1
jne .LBB22_3
.LBB22_2:
vmulss xmm0, xmm0, xmm1
vdivss xmm0, xmm0, xmm0
ret
.LBB22_3:
lea r8d, [rax + rax]
cmp r8d, esi
jbe .LBB22_4
bextr edi, edx, edi
test ecx, ecx
je .LBB22_6
mov esi, eax
and esi, 8388607
or esi, 8388608
test edi, edi
je .LBB22_11
.LBB22_14:
and edx, 8388607
or edx, 8388608
cmp ecx, edi
jg .LBB22_16
.LBB22_21:
mov edi, esi
sub edi, edx
jns .LBB22_22
jmp .LBB22_23
.LBB22_4:
vpxor xmm1, xmm1, xmm1
sete al
vmulss xmm1, xmm0, xmm1
kmovd k1, eax
vmovss xmm0 {k1}, xmm0, xmm1
ret
.LBB22_6:
mov esi, eax
xor ecx, ecx
shl esi, 9
js .LBB22_8
.p2align 4, 0x90
.LBB22_7:
dec ecx
add esi, esi
jns .LBB22_7
.LBB22_8:
mov sil, 1
sub sil, cl
shlx esi, eax, esi
test edi, edi
jne .LBB22_14
.LBB22_11:
mov r8d, edx
xor edi, edi
shl r8d, 9
js .LBB22_13
.p2align 4, 0x90
.LBB22_12:
dec edi
add r8d, r8d
jns .LBB22_12
.LBB22_13:
mov r8b, 1
sub r8b, dil
shlx edx, edx, r8d
cmp ecx, edi
jg .LBB22_16
jmp .LBB22_21
.p2align 4, 0x90
.LBB22_19:
add esi, esi
dec ecx
cmp ecx, edi
jle .LBB22_20
.LBB22_16:
mov r8d, esi
sub r8d, edx
js .LBB22_19
mov esi, r8d
jne .LBB22_19
jmp .LBB22_18
.LBB22_20:
mov ecx, edi
mov edi, esi
sub edi, edx
js .LBB22_23
.LBB22_22:
mov esi, edi
je .LBB22_18
.LBB22_23:
cmp esi, 8388607
ja .LBB22_24
.p2align 4, 0x90
.LBB22_25:
lea edx, [rsi + rsi]
dec ecx
cmp esi, 4194304
mov esi, edx
jb .LBB22_25
and eax, -2147483648
test ecx, ecx
jle .LBB22_28
.LBB22_27:
add edx, -8388608
shl ecx, 23
or ecx, edx
or ecx, eax
vmovd xmm0, ecx
ret
.LBB22_18:
vpxor xmm1, xmm1, xmm1
vmulss xmm0, xmm0, xmm1
ret
.LBB22_24:
mov edx, esi
and eax, -2147483648
test ecx, ecx
jg .LBB22_27
.LBB22_28:
mov sil, 1
sub sil, cl
shrx ecx, edx, esi
or ecx, eax
vmovd xmm0, ecx
ret
.Lfunc_end22:
.size fmodf, .Lfunc_end22-fmodf
.cfi_endproc
.section .text.__math_invalidf,"ax",@progbits
.p2align 4, 0x90
.type __math_invalidf,@function
__math_invalidf:
.Lfunc_begin23:
.cfi_startproc
vsubss xmm0, xmm0, xmm0
vdivss xmm0, xmm0, xmm0
ret
.Lfunc_end23:
.size __math_invalidf, .Lfunc_end23-__math_invalidf
.cfi_endproc
.section .rodata.cst4,"aM",@progbits,4
.p2align 2, 0x0
.LCPI24_0:
.long 0x3f800000
.LCPI24_1:
.long 0x80000000
.LCPI24_2:
.long 0x4b000000
.LCPI24_12:
.long 0x10000000
.LCPI24_20:
.long 0x70000000
.section .rodata.cst8,"aM",@progbits,8
.p2align 3, 0x0
.LCPI24_3:
.quad 0xbff0000000000000
.LCPI24_4:
.quad 0x3fd27616c9496e0b
.LCPI24_5:
.quad 0xbfd71969a075c67a
.LCPI24_6:
.quad 0x3fdec70a6ca7badd
.LCPI24_7:
.quad 0xbfe7154748bef6c8
.LCPI24_8:
.quad 0x3ff71547652ab82b
.LCPI24_9:
.quad 0x405fffffffd1d571
.LCPI24_10:
.quad 0xc062c00000000000
.LCPI24_11:
.long 0x90000000
.long 0x10000000
.LCPI24_13:
.quad 0x42e8000000000000
.LCPI24_14:
.quad 0xc2e8000000000000
.LCPI24_15:
.quad 0x3fac6af84b912394
.LCPI24_16:
.quad 0x3fcebfce50fac4f3
.LCPI24_17:
.quad 0x3fe62e42ff0c52d6
.LCPI24_18:
.quad 0x3ff0000000000000
.LCPI24_19:
.long 0xf0000000
.long 0x70000000
.section .text.powf,"ax",@progbits
.p2align 4, 0x90
.type powf,@function
powf:
.Lfunc_begin24:
.cfi_startproc
vmovd edx, xmm0
vmovd eax, xmm1
lea ecx, [rdx - 2139095040]
cmp ecx, -2130706432
jb .LBB24_2
lea esi, [rax + rax + 16777216]
xor ecx, ecx
cmp esi, 16777216
jbe .LBB24_2
.LBB24_24:
lea eax, [rdx - 1060306944]
mov esi, eax
mov edi, eax
shr esi, 19
and edi, -8388608
sar eax, 23
sub edx, edi
shl esi, 4
lea rdi, [rip + __powf_log2_data]
vmovd xmm0, edx
movzx esi, sil
movabs rdx, 9223231299366420480
vcvtss2sd xmm0, xmm0, xmm0
vmulsd xmm0, xmm0, qword ptr [rsi + rdi]
vaddsd xmm0, xmm0, qword ptr [rip + .LCPI24_3]
vmulsd xmm5, xmm0, qword ptr [rip + .LCPI24_6]
vmulsd xmm4, xmm0, qword ptr [rip + .LCPI24_4]
vaddsd xmm5, xmm5, qword ptr [rip + .LCPI24_7]
vaddsd xmm4, xmm4, qword ptr [rip + .LCPI24_5]
vcvtsi2sd xmm2, xmm2, eax
vmulsd xmm3, xmm0, xmm0
vaddsd xmm2, xmm2, qword ptr [rsi + rdi + 8]
vmulsd xmm0, xmm0, qword ptr [rip + .LCPI24_8]
movabs rsi, 4638426141214900225
vmulsd xmm6, xmm3, xmm3
vmulsd xmm3, xmm3, xmm5
vmulsd xmm4, xmm4, xmm6
vaddsd xmm0, xmm2, xmm0
vaddsd xmm0, xmm0, xmm3
vaddsd xmm0, xmm4, xmm0
vcvtss2sd xmm1, xmm1, xmm1
vmulsd xmm0, xmm0, xmm1
vmovq rax, xmm0
and rdx, rax
cmp rdx, rsi
jae .LBB24_25
.LBB24_29:
vaddsd xmm1, xmm0, qword ptr [rip + .LCPI24_13]
lea rdx, [rip + __exp2f_data]
vmovq rax, xmm1
vaddsd xmm1, xmm1, qword ptr [rip + .LCPI24_14]
add ecx, eax
and eax, 31
shl rcx, 47
add rcx, qword ptr [rdx + 8*rax]
vsubsd xmm0, xmm0, xmm1
vmulsd xmm2, xmm0, qword ptr [rip + .LCPI24_15]
vmovq xmm1, rcx
vmulsd xmm3, xmm0, xmm0
vaddsd xmm2, xmm2, qword ptr [rip + .LCPI24_16]
vmulsd xmm0, xmm0, qword ptr [rip + .LCPI24_17]
vaddsd xmm0, xmm0, qword ptr [rip + .LCPI24_18]
vmulsd xmm2, xmm3, xmm2
vaddsd xmm0, xmm0, xmm2
vmulsd xmm0, xmm0, xmm1
vcvtsd2ss xmm0, xmm0, xmm0
.LBB24_30:
ret
.LBB24_2:
lea ecx, [rax + rax]
lea esi, [rcx - 1]
cmp esi, -16777217
jae .LBB24_3
lea ecx, [rdx + rdx - 1]
cmp ecx, -16777217
jae .LBB24_10
xor ecx, ecx
test edx, edx
js .LBB24_16
cmp edx, 8388607
ja .LBB24_24
.LBB24_23:
vmulss xmm0, xmm0, dword ptr [rip + .LCPI24_2]
vmovd edx, xmm0
and edx, 2147483647
add edx, -192937984
jmp .LBB24_24
.LBB24_25:
vucomisd xmm0, qword ptr [rip + .LCPI24_9]
jbe .LBB24_27
xor eax, eax
test ecx, ecx
lea rcx, [rip + .LCPI24_19]
sete al
vmovss xmm0, dword ptr [rcx + 4*rax]
vmovss dword ptr [rsp - 8], xmm0
vmovss xmm0, dword ptr [rsp - 8]
vmulss xmm0, xmm0, dword ptr [rip + .LCPI24_20]
ret
.LBB24_16:
mov ecx, 2071
bextr ecx, eax, ecx
cmp ecx, 127
jb .LBB24_31
cmp ecx, 150
jbe .LBB24_18
.LBB24_20:
xor ecx, ecx
.LBB24_21:
vmovd edx, xmm0
and edx, 2147483647
cmp edx, 8388607
ja .LBB24_24
jmp .LBB24_23
.LBB24_27:
vmovsd xmm1, qword ptr [rip + .LCPI24_10]
vucomisd xmm1, xmm0
jb .LBB24_29
xor eax, eax
test ecx, ecx
lea rcx, [rip + .LCPI24_11]
sete al
vmovss xmm0, dword ptr [rcx + 4*rax]
vmovss dword ptr [rsp - 4], xmm0
vmovss xmm0, dword ptr [rsp - 4]
vmulss xmm0, xmm0, dword ptr [rip + .LCPI24_12]
ret
.LBB24_18:
mov dl, -106
sub dl, cl
bzhi ecx, eax, edx
je .LBB24_19
.LBB24_31:
vsubss xmm0, xmm0, xmm0
vdivss xmm0, xmm0, xmm0
ret
.LBB24_19:
mov ecx, 1
shlx edx, ecx, edx
mov ecx, 65536
test edx, eax
jne .LBB24_21
jmp .LBB24_20
.LBB24_3:
vmovss xmm2, dword ptr [rip + .LCPI24_0]
test ecx, ecx
sete sil
cmp edx, 1065353216
sete dil
or dil, sil
je .LBB24_5
vmovaps xmm0, xmm2
ret
.LBB24_10:
vmulss xmm0, xmm0, xmm0
test edx, edx
jns .LBB24_13
mov ecx, 2071
bextr ecx, eax, ecx
lea edx, [rcx - 151]
cmp edx, -24
jb .LBB24_13
vxorps xmm1, xmm0, dword ptr [rip + .LCPI24_1]{1to4}
mov dl, -106
sub dl, cl
bzhi ecx, eax, edx
movzx edx, dl
setne cl
bt eax, edx
setae dl
kmovd k1, ecx
kmovd k2, edx
vmovss xmm1 {k2}, xmm1, xmm0
vmovss xmm1 {k1}, xmm1, xmm0
vmovaps xmm0, xmm1
.LBB24_13:
test eax, eax
jns .LBB24_30
vmovss xmm1, dword ptr [rip + .LCPI24_0]
vdivss xmm0, xmm1, xmm0
vmovss dword ptr [rsp - 12], xmm0
vmovss xmm0, dword ptr [rsp - 12]
ret
.LBB24_5:
add edx, edx
cmp edx, -16777215
setae sil
cmp ecx, -16777215
setae cl
or cl, sil
cmp cl, 1
jne .LBB24_7
vaddss xmm0, xmm0, xmm1
ret
.LBB24_7:
vmovaps xmm0, xmm2
cmp edx, 2130706432
je .LBB24_30
setae cl
test eax, eax
vmulss xmm0, xmm1, xmm1
vxorps xmm1, xmm1, xmm1
setns al
xor al, cl
kmovd k1, eax
vmovss xmm0 {k1}, xmm0, xmm1
ret
.Lfunc_end24:
.size powf, .Lfunc_end24-powf
.cfi_endproc
.section .rodata.cst4,"aM",@progbits,4
.p2align 2, 0x0
.LCPI25_0:
.long 0x7fffffff
.LCPI25_1:
.long 0x4b000000
.LCPI25_2:
.long 0xcb000000
.LCPI25_3:
.long 0x3f000000
.LCPI25_4:
.long 0xbf000000
.LCPI25_5:
.long 0x3f800000
.LCPI25_6:
.long 0xbf800000
.LCPI25_7:
.long 0x80000000
.section .text.roundf,"ax",@progbits
.p2align 4, 0x90
.type roundf,@function
roundf:
.Lfunc_begin25:
.cfi_startproc
vmovd eax, xmm0
mov ecx, 2071
bextr ecx, eax, ecx
cmp ecx, 149
ja .LBB25_8
vpandd xmm1, xmm0, dword ptr [rip + .LCPI25_0]{1to4}
vaddss xmm2, xmm1, dword ptr [rip + .LCPI25_1]
cmp ecx, 125
ja .LBB25_3
vxorps xmm1, xmm1, xmm1
vmovss dword ptr [rsp - 4], xmm2
vmulss xmm0, xmm0, xmm1
ret
.LBB25_3:
vaddss xmm0, xmm2, dword ptr [rip + .LCPI25_2]
vsubss xmm0, xmm0, xmm1
vucomiss xmm0, dword ptr [rip + .LCPI25_3]
jbe .LBB25_5
vaddss xmm0, xmm1, xmm0
vaddss xmm0, xmm0, dword ptr [rip + .LCPI25_6]
jmp .LBB25_7
.LBB25_5:
vmovss xmm2, dword ptr [rip + .LCPI25_4]
vucomiss xmm2, xmm0
vaddss xmm0, xmm1, xmm0
jb .LBB25_7
vaddss xmm0, xmm0, dword ptr [rip + .LCPI25_5]
.LBB25_7:
vxorps xmm1, xmm0, dword ptr [rip + .LCPI25_7]{1to4}
test eax, eax
sets al
kmovd k1, eax
vmovss xmm0 {k1}, xmm0, xmm1
.LBB25_8:
ret
.Lfunc_end25:
.size roundf, .Lfunc_end25-roundf
.cfi_endproc
.type __unnamed_1,@object
.section .rodata.__unnamed_1,"a",@progbits
__unnamed_1:
.asciz "pack_dispatch_0"
.size __unnamed_1, 16
.type iree_hal_executable_library_query_v0_header,@object
.section .data.rel.ro.iree_hal_executable_library_query_v0_header,"aw",@progbits
.p2align 4, 0x0
iree_hal_executable_library_query_v0_header:
.long 4
.zero 4
.quad __unnamed_1
.long 0
.long 0
.size iree_hal_executable_library_query_v0_header, 24
.type iree_hal_executable_library_query_v0_funcs,@object
.section .data.rel.ro.iree_hal_executable_library_query_v0_funcs,"aw",@progbits
.p2align 3, 0x0
iree_hal_executable_library_query_v0_funcs:
.quad pack_dispatch_0_pack_f16
.size iree_hal_executable_library_query_v0_funcs, 8
.type iree_hal_executable_library_query_v0_attrs,@object
.section .rodata.iree_hal_executable_library_query_v0_attrs,"a",@progbits
.p2align 3, 0x0
iree_hal_executable_library_query_v0_attrs:
.zero 4
.size iree_hal_executable_library_query_v0_attrs, 4
.type __unnamed_2,@object
.section .rodata.__unnamed_2,"a",@progbits
__unnamed_2:
.asciz "pack_dispatch_0_pack_f16"
.size __unnamed_2, 25
.type iree_hal_executable_library_query_v0_names,@object
.section .data.rel.ro.iree_hal_executable_library_query_v0_names,"aw",@progbits
.p2align 3, 0x0
iree_hal_executable_library_query_v0_names:
.quad __unnamed_2
.size iree_hal_executable_library_query_v0_names, 8
.type __unnamed_3,@object
.section .rodata.__unnamed_3,"a",@progbits
__unnamed_3:
.asciz "pack.mlir"
.size __unnamed_3, 10
.type iree_hal_executable_library_query_v0_source_locations,@object
.section .data.rel.ro.iree_hal_executable_library_query_v0_source_locations,"aw",@progbits
.p2align 3, 0x0
iree_hal_executable_library_query_v0_source_locations:
.long 4
.long 9
.quad __unnamed_3
.size iree_hal_executable_library_query_v0_source_locations, 16
.type iree_hal_executable_library_query_v0_pack_dispatch_0_pack_f16_stage_names,@object
.section .rodata.iree_hal_executable_library_query_v0_pack_dispatch_0_pack_f16_stage_names,"a",@progbits
.p2align 3, 0x0
iree_hal_executable_library_query_v0_pack_dispatch_0_pack_f16_stage_names:
.size iree_hal_executable_library_query_v0_pack_dispatch_0_pack_f16_stage_names, 0
.type iree_hal_executable_library_query_v0_pack_dispatch_0_pack_f16_stage_source_locations,@object
.section .rodata.iree_hal_executable_library_query_v0_pack_dispatch_0_pack_f16_stage_source_locations,"a",@progbits
.p2align 3, 0x0
iree_hal_executable_library_query_v0_pack_dispatch_0_pack_f16_stage_source_locations:
.size iree_hal_executable_library_query_v0_pack_dispatch_0_pack_f16_stage_source_locations, 0
.type iree_hal_executable_library_query_v0_stage_location_tables,@object
.section .data.rel.ro.iree_hal_executable_library_query_v0_stage_location_tables,"aw",@progbits
.p2align 4, 0x0
iree_hal_executable_library_query_v0_stage_location_tables:
.long 0
.zero 4
.quad iree_hal_executable_library_query_v0_pack_dispatch_0_pack_f16_stage_names
.quad iree_hal_executable_library_query_v0_pack_dispatch_0_pack_f16_stage_source_locations
.size iree_hal_executable_library_query_v0_stage_location_tables, 24
.type iree_hal_executable_library_query_v0,@object
.section .data.rel.ro.iree_hal_executable_library_query_v0,"aw",@progbits
.p2align 4, 0x0
iree_hal_executable_library_query_v0:
.quad iree_hal_executable_library_query_v0_header
.zero 16
.long 1
.zero 4
.quad iree_hal_executable_library_query_v0_funcs
.quad iree_hal_executable_library_query_v0_attrs
.quad iree_hal_executable_library_query_v0_names
.quad 0
.quad iree_hal_executable_library_query_v0_source_locations
.quad iree_hal_executable_library_query_v0_stage_location_tables
.zero 4
.zero 4
.zero 16
.size iree_hal_executable_library_query_v0, 104
.type __powf_log2_data,@object
.section .rodata.__powf_log2_data,"a",@progbits
.p2align 3, 0x0
__powf_log2_data:
.quad 0x3ff661ec79f8f3be
.quad 0xbfdefec65b963019
.quad 0x3ff571ed4aaf883d
.quad 0xbfdb0b6832d4fca4
.quad 0x3ff49539f0f010b0
.quad 0xbfd7418b0a1fb77b
.quad 0x3ff3c995b0b80385
.quad 0xbfd39de91a6dcf7b
.quad 0x3ff30d190c8864a5
.quad 0xbfd01d9bf3f2b631
.quad 0x3ff25e227b0b8ea0
.quad 0xbfc97c1d1b3b7af0
.quad 0x3ff1bb4a4a1a343f
.quad 0xbfc2f9e393af3c9f
.quad 0x3ff12358f08ae5ba
.quad 0xbfb960cbbf788d5c
.quad 0x3ff0953f419900a7
.quad 0xbfaa6f9db6475fce
.quad 0x3ff0000000000000
.quad 0x0000000000000000
.quad 0x3fee608cfd9a47ac
.quad 0x3fb338ca9f24f53d
.quad 0x3feca4b31f026aa0
.quad 0x3fc476a9543891ba
.quad 0x3feb2036576afce6
.quad 0x3fce840b4ac4e4d2
.quad 0x3fe9c2d163a1aa2d
.quad 0x3fd40645f0c6651c
.quad 0x3fe886e6037841ed
.quad 0x3fd88e9c2c1b9ff8
.quad 0x3fe767dcf5534862
.quad 0x3fdce0a44eb17bcc
.quad 0x3fd27616c9496e0b
.quad 0xbfd71969a075c67a
.quad 0x3fdec70a6ca7badd
.quad 0xbfe7154748bef6c8
.quad 0x3ff71547652ab82b
.size __powf_log2_data, 296
.type __exp2f_data,@object
.section .rodata.__exp2f_data,"a",@progbits
.p2align 3, 0x0
__exp2f_data:
.quad 4607182418800017408
.quad 4607140297302181236
.quad 4607100335213349135
.quad 4607062579818421073
.quad 4607027079437701499
.quad 4606993883449571754
.quad 4606963042313658936
.quad 4606934607594512097
.quad 4606908631985796885
.quad 4606885169335019979
.quad 4606864274668794914
.quad 4606846004218661165
.quad 4606830415447468583
.quad 4606817567076339586
.quad 4606807519112221737
.quad 4606800332876043653
.quad 4606796071031487437
.quad 4606794797614391156
.quad 4606796578062795143
.quad 4606801479247646227
.quad 4606809569504174299
.quad 4606820918663955941
.quad 4606835598087680144
.quad 4606853680698631517
.quad 4606875241016906669
.quad 4606900355194379847
.quad 4606929101050434204
.quad 4606961558108475497
.quad 4606997807633245319
.quad 4607037932668951391
.quad 4607082018078232794
.quad 4607130150581978432
.quad 0x42e8000000000000
.quad 0x3fac6af84b912394
.quad 0x3fcebfce50fac4f3
.quad 0x3fe62e42ff0c52d6
.quad 0x4338000000000000
.quad 0x40471547652b82fe
.quad 0x3ebc6af84b912394
.quad 0x3f2ebfce50fac4f3
.quad 0x3f962e42ff0c52d6
.size __exp2f_data, 328
.section .debug_abbrev,"",@progbits
.byte 1
.byte 17
.byte 1
.byte 37
.byte 14
.byte 19
.byte 5
.byte 3
.byte 14
.byte 16
.byte 23
.ascii "\264B"
.byte 25
.byte 17
.byte 1
.byte 18
.byte 6
.byte 0
.byte 0
.byte 2
.byte 46
.byte 0
.byte 17
.byte 1
.byte 18
.byte 6
.byte 64
.byte 24
.byte 110
.byte 14
.byte 3
.byte 14
.byte 58
.byte 11
.byte 59
.byte 11
.byte 73
.byte 19
.byte 63
.byte 25
.byte 0
.byte 0
.byte 3
.byte 36
.byte 0
.byte 3
.byte 14
.byte 62
.byte 11
.byte 11
.byte 11
.byte 0
.byte 0
.byte 0
.section .debug_info,"",@progbits
.Lcu_begin0:
.long .Ldebug_info_end0-.Ldebug_info_start0
.Ldebug_info_start0:
.short 4
.long .debug_abbrev
.byte 8
.byte 1
.long .Linfo_string0
.short 44
.long .Linfo_string1
.long .Lline_table_start0
.quad .Lfunc_begin0
.long .Lfunc_end0-.Lfunc_begin0
.byte 2
.quad .Lfunc_begin0
.long .Lfunc_end0-.Lfunc_begin0
.byte 1
.byte 86
.long .Linfo_string2
.long .Linfo_string2
.byte 1
.byte 1
.long 67
.byte 3
.long .Linfo_string3
.byte 5
.byte 4
.byte 0
.Ldebug_info_end0:
.section .debug_str,"MS",@progbits,1
.Linfo_string0:
.asciz "IREE"
.Linfo_string1:
.asciz "-"
.Linfo_string2:
.asciz "pack_dispatch_0_pack_f16"
.Linfo_string3:
.asciz "int"
.section .debug_pubnames,"",@progbits
.long .LpubNames_end0-.LpubNames_start0
.LpubNames_start0:
.short 2
.long .Lcu_begin0
.long 75
.long 38
.asciz "pack_dispatch_0_pack_f16"
.long 0
.LpubNames_end0:
.section .debug_pubtypes,"",@progbits
.long .LpubTypes_end0-.LpubTypes_start0
.LpubTypes_start0:
.short 2
.long .Lcu_begin0
.long 75
.long 67
.asciz "int"
.long 0
.LpubTypes_end0:
.section ".note.GNU-stack","",@progbits
.section .debug_line,"",@progbits
.Lline_table_start0:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment