Skip to content

Instantly share code, notes, and snippets.

@pashu123
Created April 24, 2024 13:03
Show Gist options
  • Save pashu123/9145c2b2859bf31fff9e55243821f31a to your computer and use it in GitHub Desktop.
Save pashu123/9145c2b2859bf31fff9e55243821f31a to your computer and use it in GitHub Desktop.
.text
.intel_syntax noprefix
.file "broadcast_dispatch_0"
.section .text.broadcast_dispatch_0_generic_Dx8640x3200_f16,"ax",@progbits
.p2align 4, 0x90
.type broadcast_dispatch_0_generic_Dx8640x3200_f16,@function
broadcast_dispatch_0_generic_Dx8640x3200_f16:
.Lfunc_begin0:
.file 1 "-"
.loc 1 1 0
.cfi_startproc
push rbp
.cfi_def_cfa_offset 16
.cfi_offset rbp, -16
mov rbp, rsp
.cfi_def_cfa_register rbp
.Ltmp0:
push r15
push r14
push r13
push r12
push rbx
.cfi_offset rbx, -56
.cfi_offset r12, -48
.cfi_offset r13, -40
.cfi_offset r14, -32
.cfi_offset r15, -24
.loc 1 4 3 prologue_end
mov rax, qword ptr [rsi + 24]
mov ebx, dword ptr [rax + 4]
mov ecx, dword ptr [rax]
movzx eax, word ptr [rdx + 8]
shl rbx, 32
mov r8d, eax
shl r8d, 6
lea rdi, [rbx + rcx]
mov qword ptr [rbp - 48], r8
mov qword ptr [rbp - 64], rdi
cmp r8, rdi
jge .LBB0_16
.loc 1 0 3 is_stmt 0
mov r11d, dword ptr [rdx]
mov edx, dword ptr [rdx + 4]
mov rdi, qword ptr [rsi + 32]
movzx r10d, word ptr [rsi + 20]
mov r9d, dword ptr [rsi + 16]
.loc 1 4 3
or rbx, rcx
mov r8d, dword ptr [rsi + 12]
mov r14d, 3538944000
sub rbx, qword ptr [rbp - 48]
imul rax, r14
mov rsi, qword ptr [rdi]
mov rdi, qword ptr [rdi + 8]
imul rcx, rdx, 409600
imul r14, r10
shl r10d, 6
mov qword ptr [rbp - 120], rdx
mov r13, r8
mov qword ptr [rbp - 160], r11
shl r13, 6
shl r8, 7
mov qword ptr [rbp - 104], r10
mov r10, rdx
mov rdx, r11
shl rdx, 7
shl r10, 6
shl r11, 6
mov qword ptr [rbp - 96], r10
imul r10, r9, 409600
shl r9, 6
mov qword ptr [rbp - 136], r11
add rdx, rcx
mov qword ptr [rbp - 112], r14
mov qword ptr [rbp - 144], r9
add rax, rdx
lea rax, [rdi + rax + 96000]
lea rcx, [rsi + rdx + 96000]
mov qword ptr [rbp - 152], r10
mov qword ptr [rbp - 56], rax
mov qword ptr [rbp - 88], rcx
jmp .LBB0_2
.p2align 4, 0x90
.LBB0_15:
.loc 1 0 3
mov rdx, qword ptr [rbp - 56]
mov rax, qword ptr [rbp - 48]
mov rcx, qword ptr [rbp - 104]
mov rbx, qword ptr [rbp - 128]
.loc 1 4 3
add rdx, qword ptr [rbp - 112]
add rax, rcx
sub rbx, rcx
mov qword ptr [rbp - 56], rdx
mov qword ptr [rbp - 48], rax
cmp rax, qword ptr [rbp - 64]
jge .LBB0_16
.LBB0_2:
cmp rbx, 64
mov r11d, 64
mov eax, 1
mov qword ptr [rbp - 128], rbx
cmovl r11, rbx
cmp r11, 2
cmovl r11, rax
cmp dword ptr [rbp - 120], 134
ja .LBB0_15
.loc 1 0 3
mov rcx, qword ptr [rbp - 64]
mov rax, qword ptr [rbp - 88]
mov rdx, qword ptr [rbp - 56]
mov r10, qword ptr [rbp - 96]
.loc 1 4 3
sub rcx, qword ptr [rbp - 48]
mov qword ptr [rbp - 80], rax
mov qword ptr [rbp - 72], rdx
jmp .LBB0_4
.p2align 4, 0x90
.LBB0_14:
.loc 1 0 3
mov rax, qword ptr [rbp - 152]
.loc 1 4 3
add r10, qword ptr [rbp - 144]
add qword ptr [rbp - 72], rax
add qword ptr [rbp - 80], rax
cmp r10, 8640
jge .LBB0_15
.LBB0_4:
.loc 1 0 3
cmp dword ptr [rbp - 160], 49
.loc 1 4 3
ja .LBB0_14
.loc 1 0 3
mov rdi, qword ptr [rbp - 80]
mov rax, qword ptr [rbp - 72]
mov r15, qword ptr [rbp - 136]
jmp .LBB0_6
.p2align 4, 0x90
.LBB0_13:
.loc 1 4 3
add r15, r13
add rax, r8
add rdi, r8
cmp r15, 3200
jge .LBB0_14
.LBB0_6:
.loc 1 0 3
test rcx, rcx
.loc 1 4 3
jle .LBB0_13
.loc 1 0 3
mov r9, rax
xor esi, esi
.p2align 4, 0x90
.LBB0_8:
mov rdx, rdi
mov r12, r9
xor r14d, r14d
.p2align 4, 0x90
.LBB0_9:
mov rbx, -16
.p2align 4, 0x90
.LBB0_10:
.loc 1 4 3
vmovaps ymm0, ymmword ptr [rdx + 2*rbx - 95968]
vmovaps ymm1, ymmword ptr [rdx + 2*rbx - 89568]
vmovaps ymm2, ymmword ptr [rdx + 2*rbx - 83168]
vmovaps ymm3, ymmword ptr [rdx + 2*rbx - 76768]
vmovaps ymm4, ymmword ptr [rdx + 2*rbx - 70368]
vmovaps ymm5, ymmword ptr [rdx + 2*rbx - 63968]
vmovaps ymm6, ymmword ptr [rdx + 2*rbx - 57568]
vmovaps ymm7, ymmword ptr [rdx + 2*rbx - 51168]
vmovaps ymm8, ymmword ptr [rdx + 2*rbx - 44768]
vmovaps ymm9, ymmword ptr [rdx + 2*rbx - 38368]
vmovaps ymm10, ymmword ptr [rdx + 2*rbx - 31968]
vmovaps ymm11, ymmword ptr [rdx + 2*rbx - 25568]
vmovaps ymm12, ymmword ptr [rdx + 2*rbx - 19168]
vmovaps ymm13, ymmword ptr [rdx + 2*rbx - 12768]
vmovaps ymm14, ymmword ptr [rdx + 2*rbx - 6368]
vmovaps ymm15, ymmword ptr [rdx + 2*rbx + 32]
vmovaps ymmword ptr [r12 + 2*rbx - 95968], ymm0
vmovaps ymmword ptr [r12 + 2*rbx - 89568], ymm1
vmovaps ymmword ptr [r12 + 2*rbx - 83168], ymm2
vmovaps ymmword ptr [r12 + 2*rbx - 76768], ymm3
vmovaps ymmword ptr [r12 + 2*rbx - 70368], ymm4
vmovaps ymmword ptr [r12 + 2*rbx - 63968], ymm5
vmovaps ymmword ptr [r12 + 2*rbx - 57568], ymm6
vmovaps ymmword ptr [r12 + 2*rbx - 51168], ymm7
vmovaps ymmword ptr [r12 + 2*rbx - 44768], ymm8
vmovaps ymmword ptr [r12 + 2*rbx - 38368], ymm9
vmovaps ymmword ptr [r12 + 2*rbx - 31968], ymm10
vmovaps ymmword ptr [r12 + 2*rbx - 25568], ymm11
vmovaps ymmword ptr [r12 + 2*rbx - 19168], ymm12
vmovaps ymmword ptr [r12 + 2*rbx - 12768], ymm13
vmovaps ymmword ptr [r12 + 2*rbx - 6368], ymm14
vmovaps ymmword ptr [r12 + 2*rbx + 32], ymm15
add rbx, 16
cmp rbx, 48
jb .LBB0_10
lea rbx, [r14 + 16]
add r12, 102400
add rdx, 102400
cmp r14, 48
mov r14, rbx
jb .LBB0_9
inc rsi
add r9, 55296000
cmp rsi, r11
jne .LBB0_8
jmp .LBB0_13
.LBB0_16:
xor eax, eax
.loc 1 4 3 epilogue_begin
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
.cfi_def_cfa rsp, 8
vzeroupper
ret
.Ltmp1:
.Lfunc_end0:
.size broadcast_dispatch_0_generic_Dx8640x3200_f16, .Lfunc_end0-broadcast_dispatch_0_generic_Dx8640x3200_f16
.cfi_endproc
.section .text.iree_hal_executable_library_query,"ax",@progbits
.globl iree_hal_executable_library_query
.p2align 4, 0x90
.type iree_hal_executable_library_query,@function
iree_hal_executable_library_query:
.Liree_hal_executable_library_query$local:
.type .Liree_hal_executable_library_query$local,@function
.Lfunc_begin1:
.cfi_startproc
xor eax, eax
cmp edi, 4
lea rcx, [rip + iree_hal_executable_library_query_v0]
cmove rax, rcx
ret
.Lfunc_end1:
.size iree_hal_executable_library_query, .Lfunc_end1-iree_hal_executable_library_query
.size .Liree_hal_executable_library_query$local, .Lfunc_end1-iree_hal_executable_library_query
.cfi_endproc
.section .text.iree_h2f_ieee,"ax",@progbits
.p2align 4, 0x90
.type iree_h2f_ieee,@function
iree_h2f_ieee:
.Lfunc_begin2:
.cfi_startproc
mov eax, edi
and eax, 32768
mov edx, edi
mov ecx, edi
and ecx, 1023
shl eax, 16
and dx, 31744
je .LBB2_6
and edi, 31744
cmp edi, 31744
jne .LBB2_5
test cx, cx
je .LBB2_4
or eax, 2143289344
vmovd xmm0, eax
ret
.LBB2_6:
movzx ecx, cx
or eax, 864026624
vcvtsi2ss xmm0, xmm0, ecx
vmovd xmm1, eax
vmulss xmm0, xmm0, xmm1
ret
.LBB2_5:
movzx ecx, cx
movzx edx, dx
add edx, ecx
shl edx, 13
lea eax, [rdx + rax + 939524096]
vmovd xmm0, eax
ret
.LBB2_4:
or eax, 2139095040
vmovd xmm0, eax
ret
.Lfunc_end2:
.size iree_h2f_ieee, .Lfunc_end2-iree_h2f_ieee
.cfi_endproc
.section .text.iree_f2h_ieee,"ax",@progbits
.p2align 4, 0x90
.type iree_f2h_ieee,@function
iree_f2h_ieee:
.Lfunc_begin3:
.cfi_startproc
vmovd edi, xmm0
mov edx, 2071
bextr esi, edi, edx
mov eax, edi
mov ecx, edi
shr ecx, 31
and eax, 8388607
lea edx, [rsi - 112]
lea r8d, [rsi - 113]
cmp r8d, 28
ja .LBB3_2
add eax, 4096
shl ecx, 15
shl edx, 10
shr eax, 13
or eax, ecx
or eax, edx
ret
.LBB3_2:
test edi, edi
je .LBB3_6
cmp esi, 112
ja .LBB3_7
cmp esi, 102
jae .LBB3_10
.LBB3_6:
xor eax, eax
ret
.LBB3_7:
cmp edx, 143
jne .LBB3_11
shl ecx, 15
test eax, eax
je .LBB3_14
shr eax, 13
or ecx, eax
or ecx, 31744
mov eax, ecx
ret
.LBB3_10:
mov dl, 113
or eax, 8388608
shl ecx, 15
sub dl, sil
shrx eax, eax, edx
mov edx, eax
and edx, 4096
lea eax, [rax + 2*rdx]
shr eax, 13
or eax, ecx
ret
.LBB3_11:
test edi, 4096
je .LBB3_13
lea edi, [rax + 8192]
add esi, -111
xor r8d, r8d
cmp eax, 8380416
cmovb esi, edx
cmovb r8d, edi
mov eax, r8d
mov edx, esi
.LBB3_13:
shl ecx, 15
cmp edx, 31
jb .LBB3_15
.LBB3_14:
or ecx, 31744
mov eax, ecx
ret
.LBB3_15:
shr eax, 13
shl edx, 10
or eax, ecx
or eax, edx
ret
.Lfunc_end3:
.size iree_f2h_ieee, .Lfunc_end3-iree_f2h_ieee
.cfi_endproc
.section .text.__gnu_h2f_ieee,"ax",@progbits
.p2align 4, 0x90
.type __gnu_h2f_ieee,@function
__gnu_h2f_ieee:
.Lfunc_begin4:
.cfi_startproc
mov eax, edi
and eax, 32768
mov edx, edi
mov ecx, edi
and ecx, 1023
shl eax, 16
and dx, 31744
je .LBB4_6
and edi, 31744
cmp edi, 31744
jne .LBB4_5
test cx, cx
je .LBB4_4
or eax, 2143289344
vmovd xmm0, eax
ret
.LBB4_6:
movzx ecx, cx
or eax, 864026624
vcvtsi2ss xmm0, xmm0, ecx
vmovd xmm1, eax
vmulss xmm0, xmm0, xmm1
ret
.LBB4_5:
movzx ecx, cx
movzx edx, dx
add edx, ecx
shl edx, 13
lea eax, [rdx + rax + 939524096]
vmovd xmm0, eax
ret
.LBB4_4:
or eax, 2139095040
vmovd xmm0, eax
ret
.Lfunc_end4:
.size __gnu_h2f_ieee, .Lfunc_end4-__gnu_h2f_ieee
.cfi_endproc
.section .text.__extendhfsf2,"ax",@progbits
.p2align 4, 0x90
.type __extendhfsf2,@function
__extendhfsf2:
.Lfunc_begin5:
.cfi_startproc
vmovd ecx, xmm0
mov eax, ecx
shl eax, 16
mov edx, ecx
and edx, 1023
mov esi, ecx
and eax, -2147483648
and esi, 31744
je .LBB5_6
cmp esi, 31744
jne .LBB5_5
test dx, dx
je .LBB5_4
or eax, 2143289344
vmovd xmm0, eax
ret
.LBB5_6:
movzx ecx, dx
or eax, 864026624
vcvtsi2ss xmm0, xmm1, ecx
vmovd xmm1, eax
vmulss xmm0, xmm0, xmm1
ret
.LBB5_5:
and ecx, 32767
shl ecx, 13
lea eax, [rcx + rax + 939524096]
vmovd xmm0, eax
ret
.LBB5_4:
or eax, 2139095040
vmovd xmm0, eax
ret
.Lfunc_end5:
.size __extendhfsf2, .Lfunc_end5-__extendhfsf2
.cfi_endproc
.section .text.__gnu_f2h_ieee,"ax",@progbits
.p2align 4, 0x90
.type __gnu_f2h_ieee,@function
__gnu_f2h_ieee:
.Lfunc_begin6:
.cfi_startproc
vmovd edi, xmm0
mov edx, 2071
bextr esi, edi, edx
mov eax, edi
mov ecx, edi
shr ecx, 31
and eax, 8388607
lea edx, [rsi - 112]
lea r8d, [rsi - 113]
cmp r8d, 28
ja .LBB6_2
add eax, 4096
shl ecx, 15
shl edx, 10
shr eax, 13
or eax, ecx
or eax, edx
ret
.LBB6_2:
test edi, edi
je .LBB6_6
cmp esi, 112
ja .LBB6_7
cmp esi, 102
jae .LBB6_10
.LBB6_6:
xor eax, eax
ret
.LBB6_7:
cmp edx, 143
jne .LBB6_11
shl ecx, 15
test eax, eax
je .LBB6_14
shr eax, 13
or ecx, eax
or ecx, 31744
mov eax, ecx
ret
.LBB6_10:
mov dl, 113
or eax, 8388608
shl ecx, 15
sub dl, sil
shrx eax, eax, edx
mov edx, eax
and edx, 4096
lea eax, [rax + 2*rdx]
shr eax, 13
or eax, ecx
ret
.LBB6_11:
test edi, 4096
je .LBB6_13
lea edi, [rax + 8192]
add esi, -111
xor r8d, r8d
cmp eax, 8380416
cmovb esi, edx
cmovb r8d, edi
mov eax, r8d
mov edx, esi
.LBB6_13:
shl ecx, 15
cmp edx, 31
jb .LBB6_15
.LBB6_14:
or ecx, 31744
mov eax, ecx
ret
.LBB6_15:
shr eax, 13
shl edx, 10
or eax, ecx
or eax, edx
ret
.Lfunc_end6:
.size __gnu_f2h_ieee, .Lfunc_end6-__gnu_f2h_ieee
.cfi_endproc
.section .text.__truncsfhf2,"ax",@progbits
.p2align 4, 0x90
.type __truncsfhf2,@function
__truncsfhf2:
.Lfunc_begin7:
.cfi_startproc
vmovd edi, xmm0
mov edx, 2071
bextr esi, edi, edx
mov eax, edi
mov ecx, edi
shr ecx, 31
and eax, 8388607
lea edx, [rsi - 112]
lea r8d, [rsi - 113]
cmp r8d, 28
ja .LBB7_2
add eax, 4096
shl ecx, 15
shl edx, 10
shr eax, 13
.LBB7_16:
or eax, ecx
or eax, edx
mov word ptr [rsp - 4], ax
vmovss xmm0, dword ptr [rsp - 4]
ret
.LBB7_2:
test edi, edi
je .LBB7_6
cmp esi, 112
ja .LBB7_7
cmp esi, 102
jae .LBB7_10
.LBB7_6:
xor eax, eax
mov word ptr [rsp - 4], ax
vmovss xmm0, dword ptr [rsp - 4]
ret
.LBB7_7:
cmp edx, 143
jne .LBB7_11
shl ecx, 15
test eax, eax
je .LBB7_14
shr eax, 13
or ecx, eax
jmp .LBB7_14
.LBB7_10:
mov dl, 113
or eax, 8388608
shl ecx, 15
sub dl, sil
shrx eax, eax, edx
mov edx, eax
and edx, 4096
lea eax, [rax + 2*rdx]
shr eax, 13
or eax, ecx
mov word ptr [rsp - 4], ax
vmovss xmm0, dword ptr [rsp - 4]
ret
.LBB7_11:
test edi, 4096
je .LBB7_13
lea edi, [rax + 8192]
add esi, -111
xor r8d, r8d
cmp eax, 8380416
cmovb esi, edx
cmovb r8d, edi
mov eax, r8d
mov edx, esi
.LBB7_13:
shl ecx, 15
cmp edx, 31
jb .LBB7_15
.LBB7_14:
or ecx, 31744
mov eax, ecx
mov word ptr [rsp - 4], ax
vmovss xmm0, dword ptr [rsp - 4]
ret
.LBB7_15:
shr eax, 13
shl edx, 10
jmp .LBB7_16
.Lfunc_end7:
.size __truncsfhf2, .Lfunc_end7-__truncsfhf2
.cfi_endproc
.section .rodata.cst4,"aM",@progbits,4
.p2align 2, 0x0
.LCPI8_0:
.long 0x7b800000
.LCPI8_1:
.long 0x80000000
.LCPI8_2:
.long 0x3f800000
.section .text.ceilf,"ax",@progbits
.p2align 4, 0x90
.type ceilf,@function
ceilf:
.Lfunc_begin8:
.cfi_startproc
vmovd eax, xmm0
mov ecx, 2071
bextr ecx, eax, ecx
cmp ecx, 149
ja .LBB8_7
cmp ecx, 127
jb .LBB8_4
add ecx, -127
mov edx, 8388607
shrx edx, edx, ecx
test edx, eax
je .LBB8_7
vaddss xmm0, xmm0, dword ptr [rip + .LCPI8_0]
xor esi, esi
test eax, eax
mov edi, -8388608
cmovs edx, esi
sarx ecx, edi, ecx
add edx, eax
and edx, ecx
vmovss dword ptr [rsp - 8], xmm0
vmovd xmm0, edx
ret
.LBB8_4:
vaddss xmm1, xmm0, dword ptr [rip + .LCPI8_0]
vmovss dword ptr [rsp - 4], xmm1
test eax, eax
js .LBB8_5
vmovss xmm1, dword ptr [rip + .LCPI8_2]
sete al
kmovd k1, eax
vmovss xmm1 {k1}, xmm1, xmm0
vmovaps xmm0, xmm1
.LBB8_7:
ret
.LBB8_5:
vmovss xmm0, dword ptr [rip + .LCPI8_1]
ret
.Lfunc_end8:
.size ceilf, .Lfunc_end8-ceilf
.cfi_endproc
.section .rodata.cst4,"aM",@progbits,4
.p2align 2, 0x0
.LCPI9_0:
.long 0x42b17217
.LCPI9_1:
.long 0xc2cff1b4
.LCPI9_2:
.long 0x10000000
.LCPI9_3:
.long 0x70000000
.section .rodata.cst8,"aM",@progbits,8
.p2align 3, 0x0
.LCPI9_4:
.quad 0x40471547652b82fe
.LCPI9_5:
.quad 0x4338000000000000
.LCPI9_6:
.quad 0xc338000000000000
.LCPI9_7:
.quad 0x3ebc6af84b912394
.LCPI9_8:
.quad 0x3f2ebfce50fac4f3
.LCPI9_9:
.quad 0x3f962e42ff0c52d6
.LCPI9_10:
.quad 0x3ff0000000000000
.section .text.expf,"ax",@progbits
.p2align 4, 0x90
.type expf,@function
expf:
.Lfunc_begin9:
.cfi_startproc
vmovd ecx, xmm0
mov eax, 2836
bextr eax, ecx, eax
cmp eax, 1067
jae .LBB9_1
.LBB9_8:
vcvtss2sd xmm0, xmm0, xmm0
vmulsd xmm0, xmm0, qword ptr [rip + .LCPI9_4]
lea rdx, [rip + __exp2f_data]
vaddsd xmm1, xmm0, qword ptr [rip + .LCPI9_5]
vmovq rax, xmm1
vaddsd xmm1, xmm1, qword ptr [rip + .LCPI9_6]
mov ecx, eax
and ecx, 31
shl rax, 47
add rax, qword ptr [rdx + 8*rcx]
vsubsd xmm0, xmm0, xmm1
vmulsd xmm2, xmm0, qword ptr [rip + .LCPI9_7]
vmovq xmm1, rax
vmulsd xmm3, xmm0, xmm0
vaddsd xmm2, xmm2, qword ptr [rip + .LCPI9_8]
vmulsd xmm0, xmm0, qword ptr [rip + .LCPI9_9]
vaddsd xmm0, xmm0, qword ptr [rip + .LCPI9_10]
vmulsd xmm2, xmm3, xmm2
vaddsd xmm0, xmm0, xmm2
vmulsd xmm0, xmm0, xmm1
vcvtsd2ss xmm1, xmm0, xmm0
.LBB9_9:
vmovaps xmm0, xmm1
ret
.LBB9_1:
vxorps xmm1, xmm1, xmm1
cmp ecx, -8388608
je .LBB9_9
cmp eax, 2040
jae .LBB9_3
vucomiss xmm0, dword ptr [rip + .LCPI9_0]
jbe .LBB9_6
mov dword ptr [rsp - 8], 1879048192
vmovss xmm0, dword ptr [rsp - 8]
vmulss xmm0, xmm0, dword ptr [rip + .LCPI9_3]
ret
.LBB9_3:
vaddss xmm0, xmm0, xmm0
ret
.LBB9_6:
vmovss xmm1, dword ptr [rip + .LCPI9_1]
vucomiss xmm1, xmm0
jbe .LBB9_8
mov dword ptr [rsp - 4], 268435456
vmovss xmm0, dword ptr [rsp - 4]
vmulss xmm0, xmm0, dword ptr [rip + .LCPI9_2]
ret
.Lfunc_end9:
.size expf, .Lfunc_end9-expf
.cfi_endproc
.section .rodata.cst8,"aM",@progbits,8
.p2align 2, 0x0
.LCPI10_0:
.long 0xf0000000
.long 0x70000000
.section .rodata.cst4,"aM",@progbits,4
.p2align 2, 0x0
.LCPI10_1:
.long 0x70000000
.section .text.__math_oflowf,"ax",@progbits
.p2align 4, 0x90
.type __math_oflowf,@function
__math_oflowf:
.Lfunc_begin10:
.cfi_startproc
xor eax, eax
test edi, edi
lea rcx, [rip + .LCPI10_0]
sete al
vmovss xmm0, dword ptr [rcx + 4*rax]
vmovss dword ptr [rsp - 4], xmm0
vmovss xmm0, dword ptr [rsp - 4]
vmulss xmm0, xmm0, dword ptr [rip + .LCPI10_1]
ret
.Lfunc_end10:
.size __math_oflowf, .Lfunc_end10-__math_oflowf
.cfi_endproc
.section .rodata.cst8,"aM",@progbits,8
.p2align 2, 0x0
.LCPI11_0:
.long 0x90000000
.long 0x10000000
.section .rodata.cst4,"aM",@progbits,4
.p2align 2, 0x0
.LCPI11_1:
.long 0x10000000
.section .text.__math_uflowf,"ax",@progbits
.p2align 4, 0x90
.type __math_uflowf,@function
__math_uflowf:
.Lfunc_begin11:
.cfi_startproc
xor eax, eax
test edi, edi
lea rcx, [rip + .LCPI11_0]
sete al
vmovss xmm0, dword ptr [rcx + 4*rax]
vmovss dword ptr [rsp - 4], xmm0
vmovss xmm0, dword ptr [rsp - 4]
vmulss xmm0, xmm0, dword ptr [rip + .LCPI11_1]
ret
.Lfunc_end11:
.size __math_uflowf, .Lfunc_end11-__math_uflowf
.cfi_endproc
.section .rodata.cst4,"aM",@progbits,4
.p2align 2, 0x0
.LCPI12_0:
.long 0x80000000
.section .text.__math_xflowf,"ax",@progbits
.p2align 4, 0x90
.type __math_xflowf,@function
__math_xflowf:
.Lfunc_begin12:
.cfi_startproc
vxorps xmm1, xmm0, dword ptr [rip + .LCPI12_0]{1to4}
test edi, edi
sete al
kmovd k1, eax
vmovss xmm1 {k1}, xmm1, xmm0
vmovss dword ptr [rsp - 4], xmm1
vmulss xmm0, xmm0, dword ptr [rsp - 4]
ret
.Lfunc_end12:
.size __math_xflowf, .Lfunc_end12-__math_xflowf
.cfi_endproc
.section .text.feclearexcept,"ax",@progbits
.p2align 4, 0x90
.type feclearexcept,@function
feclearexcept:
.Lfunc_begin13:
.cfi_startproc
xor eax, eax
ret
.Lfunc_end13:
.size feclearexcept, .Lfunc_end13-feclearexcept
.cfi_endproc
.section .text.feraiseexcept,"ax",@progbits
.p2align 4, 0x90
.type feraiseexcept,@function
feraiseexcept:
.Lfunc_begin14:
.cfi_startproc
xor eax, eax
ret
.Lfunc_end14:
.size feraiseexcept, .Lfunc_end14-feraiseexcept
.cfi_endproc
.section .text.fetestexcept,"ax",@progbits
.p2align 4, 0x90
.type fetestexcept,@function
fetestexcept:
.Lfunc_begin15:
.cfi_startproc
xor eax, eax
ret
.Lfunc_end15:
.size fetestexcept, .Lfunc_end15-fetestexcept
.cfi_endproc
.section .text.fegetround,"ax",@progbits
.p2align 4, 0x90
.type fegetround,@function
fegetround:
.Lfunc_begin16:
.cfi_startproc
xor eax, eax
ret
.Lfunc_end16:
.size fegetround, .Lfunc_end16-fegetround
.cfi_endproc
.section .text.__fesetround,"ax",@progbits
.p2align 4, 0x90
.type __fesetround,@function
__fesetround:
.Lfunc_begin17:
.cfi_startproc
xor eax, eax
ret
.Lfunc_end17:
.size __fesetround, .Lfunc_end17-__fesetround
.cfi_endproc
.section .text.fegetenv,"ax",@progbits
.p2align 4, 0x90
.type fegetenv,@function
fegetenv:
.Lfunc_begin18:
.cfi_startproc
xor eax, eax
ret
.Lfunc_end18:
.size fegetenv, .Lfunc_end18-fegetenv
.cfi_endproc
.section .text.fesetenv,"ax",@progbits
.p2align 4, 0x90
.type fesetenv,@function
fesetenv:
.Lfunc_begin19:
.cfi_startproc
xor eax, eax
ret
.Lfunc_end19:
.size fesetenv, .Lfunc_end19-fesetenv
.cfi_endproc
.section .rodata.cst4,"aM",@progbits,4
.p2align 2, 0x0
.LCPI20_0:
.long 0x7b800000
.LCPI20_1:
.long 0xbf800000
.section .text.floorf,"ax",@progbits
.p2align 4, 0x90
.type floorf,@function
floorf:
.Lfunc_begin20:
.cfi_startproc
vmovd eax, xmm0
mov ecx, 2071
bextr ecx, eax, ecx
cmp ecx, 149
jbe .LBB20_1
ret
.LBB20_1:
cmp ecx, 127
jb .LBB20_4
add ecx, -127
mov edx, 8388607
shrx edx, edx, ecx
test edx, eax
je .LBB20_6
vaddss xmm0, xmm0, dword ptr [rip + .LCPI20_0]
mov esi, -8388608
sarx ecx, esi, ecx
mov esi, eax
sar esi, 31
and esi, edx
add esi, eax
and esi, ecx
vmovss dword ptr [rsp - 8], xmm0
vmovd xmm0, esi
ret
.LBB20_4:
vaddss xmm1, xmm0, dword ptr [rip + .LCPI20_0]
vmovss dword ptr [rsp - 4], xmm1
vxorps xmm1, xmm1, xmm1
test eax, eax
js .LBB20_7
vmovaps xmm0, xmm1
.LBB20_6:
ret
.LBB20_7:
vcmpeqss k1, xmm0, xmm1
vmovss xmm1, dword ptr [rip + .LCPI20_1]
vmovss xmm1 {k1}, xmm1, xmm0
vmovaps xmm0, xmm1
ret
.Lfunc_end20:
.size floorf, .Lfunc_end20-floorf
.cfi_endproc
.section .text.fmaf,"ax",@progbits
.p2align 4, 0x90
.type fmaf,@function
fmaf:
.Lfunc_begin21:
.cfi_startproc
vcvtss2sd xmm0, xmm0, xmm0
movabs rdx, 9218868437227405312
vcvtss2sd xmm1, xmm1, xmm1
vcvtss2sd xmm2, xmm2, xmm2
vmulsd xmm1, xmm0, xmm1
vaddsd xmm0, xmm1, xmm2
vmovq rax, xmm0
mov ecx, eax
and ecx, 536870911
cmp ecx, 268435456
setne cl
andn rdx, rax, rdx
sete dl
or dl, cl
jne .LBB21_4
vsubsd xmm3, xmm0, xmm1
vucomisd xmm3, xmm2
jne .LBB21_3
jp .LBB21_3
vsubsd xmm3, xmm0, xmm2
vucomisd xmm3, xmm1
jne .LBB21_3
jp .LBB21_3
.LBB21_4:
vcvtsd2ss xmm0, xmm0, xmm0
ret
.LBB21_3:
test rax, rax
vsubsd xmm3, xmm1, xmm0
vsubsd xmm0, xmm2, xmm0
sets cl
vucomisd xmm2, xmm1
vaddsd xmm0, xmm1, xmm0
vaddsd xmm3, xmm3, xmm2
vxorpd xmm1, xmm1, xmm1
setbe dl
xor dl, cl
kmovd k1, edx
vmovsd xmm0 {k1}, xmm0, xmm3
vucomisd xmm1, xmm0
setbe dl
xor dl, cl
movzx ecx, dl
dec rcx
or rcx, 1
add rcx, rax
vmovq xmm0, rcx
vcvtsd2ss xmm0, xmm0, xmm0
ret
.Lfunc_end21:
.size fmaf, .Lfunc_end21-fmaf
.cfi_endproc
.section .text.fmodf,"ax",@progbits
.p2align 4, 0x90
.type fmodf,@function
fmodf:
.Lfunc_begin22:
.cfi_startproc
vmovd edx, xmm1
mov esi, edx
add esi, edx
je .LBB22_2
mov r8d, edx
vmovd eax, xmm0
mov edi, 2071
and r8d, 2147483647
bextr ecx, eax, edi
cmp r8d, 2139095041
setae r8b
cmp ecx, 255
sete r9b
or r9b, r8b
cmp r9b, 1
jne .LBB22_3
.LBB22_2:
vmulss xmm0, xmm0, xmm1
vdivss xmm0, xmm0, xmm0
ret
.LBB22_3:
lea r8d, [rax + rax]
cmp r8d, esi
jbe .LBB22_4
bextr edi, edx, edi
test ecx, ecx
je .LBB22_6
mov esi, eax
and esi, 8388607
or esi, 8388608
test edi, edi
je .LBB22_11
.LBB22_14:
and edx, 8388607
or edx, 8388608
cmp ecx, edi
jg .LBB22_16
.LBB22_21:
mov edi, esi
sub edi, edx
jns .LBB22_22
jmp .LBB22_23
.LBB22_4:
vpxor xmm1, xmm1, xmm1
sete al
vmulss xmm1, xmm0, xmm1
kmovd k1, eax
vmovss xmm0 {k1}, xmm0, xmm1
ret
.LBB22_6:
mov esi, eax
xor ecx, ecx
shl esi, 9
js .LBB22_8
.p2align 4, 0x90
.LBB22_7:
dec ecx
add esi, esi
jns .LBB22_7
.LBB22_8:
mov sil, 1
sub sil, cl
shlx esi, eax, esi
test edi, edi
jne .LBB22_14
.LBB22_11:
mov r8d, edx
xor edi, edi
shl r8d, 9
js .LBB22_13
.p2align 4, 0x90
.LBB22_12:
dec edi
add r8d, r8d
jns .LBB22_12
.LBB22_13:
mov r8b, 1
sub r8b, dil
shlx edx, edx, r8d
cmp ecx, edi
jg .LBB22_16
jmp .LBB22_21
.p2align 4, 0x90
.LBB22_19:
add esi, esi
dec ecx
cmp ecx, edi
jle .LBB22_20
.LBB22_16:
mov r8d, esi
sub r8d, edx
js .LBB22_19
mov esi, r8d
jne .LBB22_19
jmp .LBB22_18
.LBB22_20:
mov ecx, edi
mov edi, esi
sub edi, edx
js .LBB22_23
.LBB22_22:
mov esi, edi
je .LBB22_18
.LBB22_23:
cmp esi, 8388607
ja .LBB22_24
.p2align 4, 0x90
.LBB22_25:
lea edx, [rsi + rsi]
dec ecx
cmp esi, 4194304
mov esi, edx
jb .LBB22_25
and eax, -2147483648
test ecx, ecx
jle .LBB22_28
.LBB22_27:
add edx, -8388608
shl ecx, 23
or ecx, edx
or ecx, eax
vmovd xmm0, ecx
ret
.LBB22_18:
vpxor xmm1, xmm1, xmm1
vmulss xmm0, xmm0, xmm1
ret
.LBB22_24:
mov edx, esi
and eax, -2147483648
test ecx, ecx
jg .LBB22_27
.LBB22_28:
mov sil, 1
sub sil, cl
shrx ecx, edx, esi
or ecx, eax
vmovd xmm0, ecx
ret
.Lfunc_end22:
.size fmodf, .Lfunc_end22-fmodf
.cfi_endproc
.section .text.__math_invalidf,"ax",@progbits
.p2align 4, 0x90
.type __math_invalidf,@function
__math_invalidf:
.Lfunc_begin23:
.cfi_startproc
vsubss xmm0, xmm0, xmm0
vdivss xmm0, xmm0, xmm0
ret
.Lfunc_end23:
.size __math_invalidf, .Lfunc_end23-__math_invalidf
.cfi_endproc
.section .rodata.cst4,"aM",@progbits,4
.p2align 2, 0x0
.LCPI24_0:
.long 0x3f800000
.LCPI24_1:
.long 0x80000000
.LCPI24_2:
.long 0x4b000000
.LCPI24_12:
.long 0x10000000
.LCPI24_20:
.long 0x70000000
.section .rodata.cst8,"aM",@progbits,8
.p2align 3, 0x0
.LCPI24_3:
.quad 0xbff0000000000000
.LCPI24_4:
.quad 0x3fd27616c9496e0b
.LCPI24_5:
.quad 0xbfd71969a075c67a
.LCPI24_6:
.quad 0x3fdec70a6ca7badd
.LCPI24_7:
.quad 0xbfe7154748bef6c8
.LCPI24_8:
.quad 0x3ff71547652ab82b
.LCPI24_9:
.quad 0x405fffffffd1d571
.LCPI24_10:
.quad 0xc062c00000000000
.LCPI24_11:
.long 0x90000000
.long 0x10000000
.LCPI24_13:
.quad 0x42e8000000000000
.LCPI24_14:
.quad 0xc2e8000000000000
.LCPI24_15:
.quad 0x3fac6af84b912394
.LCPI24_16:
.quad 0x3fcebfce50fac4f3
.LCPI24_17:
.quad 0x3fe62e42ff0c52d6
.LCPI24_18:
.quad 0x3ff0000000000000
.LCPI24_19:
.long 0xf0000000
.long 0x70000000
.section .text.powf,"ax",@progbits
.p2align 4, 0x90
.type powf,@function
powf:
.Lfunc_begin24:
.cfi_startproc
vmovd edx, xmm0
vmovd eax, xmm1
lea ecx, [rdx - 2139095040]
cmp ecx, -2130706432
jb .LBB24_2
lea esi, [rax + rax + 16777216]
xor ecx, ecx
cmp esi, 16777216
jbe .LBB24_2
.LBB24_24:
lea eax, [rdx - 1060306944]
mov esi, eax
mov edi, eax
shr esi, 19
and edi, -8388608
sar eax, 23
sub edx, edi
shl esi, 4
lea rdi, [rip + __powf_log2_data]
vmovd xmm0, edx
movzx esi, sil
movabs rdx, 9223231299366420480
vcvtss2sd xmm0, xmm0, xmm0
vmulsd xmm0, xmm0, qword ptr [rsi + rdi]
vaddsd xmm0, xmm0, qword ptr [rip + .LCPI24_3]
vmulsd xmm5, xmm0, qword ptr [rip + .LCPI24_6]
vmulsd xmm4, xmm0, qword ptr [rip + .LCPI24_4]
vaddsd xmm5, xmm5, qword ptr [rip + .LCPI24_7]
vaddsd xmm4, xmm4, qword ptr [rip + .LCPI24_5]
vcvtsi2sd xmm2, xmm2, eax
vmulsd xmm3, xmm0, xmm0
vaddsd xmm2, xmm2, qword ptr [rsi + rdi + 8]
vmulsd xmm0, xmm0, qword ptr [rip + .LCPI24_8]
movabs rsi, 4638426141214900225
vmulsd xmm6, xmm3, xmm3
vmulsd xmm3, xmm3, xmm5
vmulsd xmm4, xmm4, xmm6
vaddsd xmm0, xmm2, xmm0
vaddsd xmm0, xmm0, xmm3
vaddsd xmm0, xmm4, xmm0
vcvtss2sd xmm1, xmm1, xmm1
vmulsd xmm0, xmm0, xmm1
vmovq rax, xmm0
and rdx, rax
cmp rdx, rsi
jae .LBB24_25
.LBB24_29:
vaddsd xmm1, xmm0, qword ptr [rip + .LCPI24_13]
lea rdx, [rip + __exp2f_data]
vmovq rax, xmm1
vaddsd xmm1, xmm1, qword ptr [rip + .LCPI24_14]
add ecx, eax
and eax, 31
shl rcx, 47
add rcx, qword ptr [rdx + 8*rax]
vsubsd xmm0, xmm0, xmm1
vmulsd xmm2, xmm0, qword ptr [rip + .LCPI24_15]
vmovq xmm1, rcx
vmulsd xmm3, xmm0, xmm0
vaddsd xmm2, xmm2, qword ptr [rip + .LCPI24_16]
vmulsd xmm0, xmm0, qword ptr [rip + .LCPI24_17]
vaddsd xmm0, xmm0, qword ptr [rip + .LCPI24_18]
vmulsd xmm2, xmm3, xmm2
vaddsd xmm0, xmm0, xmm2
vmulsd xmm0, xmm0, xmm1
vcvtsd2ss xmm0, xmm0, xmm0
.LBB24_30:
ret
.LBB24_2:
lea ecx, [rax + rax]
lea esi, [rcx - 1]
cmp esi, -16777217
jae .LBB24_3
lea ecx, [rdx + rdx - 1]
cmp ecx, -16777217
jae .LBB24_10
xor ecx, ecx
test edx, edx
js .LBB24_16
cmp edx, 8388607
ja .LBB24_24
.LBB24_23:
vmulss xmm0, xmm0, dword ptr [rip + .LCPI24_2]
vmovd edx, xmm0
and edx, 2147483647
add edx, -192937984
jmp .LBB24_24
.LBB24_25:
vucomisd xmm0, qword ptr [rip + .LCPI24_9]
jbe .LBB24_27
xor eax, eax
test ecx, ecx
lea rcx, [rip + .LCPI24_19]
sete al
vmovss xmm0, dword ptr [rcx + 4*rax]
vmovss dword ptr [rsp - 8], xmm0
vmovss xmm0, dword ptr [rsp - 8]
vmulss xmm0, xmm0, dword ptr [rip + .LCPI24_20]
ret
.LBB24_16:
mov ecx, 2071
bextr ecx, eax, ecx
cmp ecx, 127
jb .LBB24_31
cmp ecx, 150
jbe .LBB24_18
.LBB24_20:
xor ecx, ecx
.LBB24_21:
vmovd edx, xmm0
and edx, 2147483647
cmp edx, 8388607
ja .LBB24_24
jmp .LBB24_23
.LBB24_27:
vmovsd xmm1, qword ptr [rip + .LCPI24_10]
vucomisd xmm1, xmm0
jb .LBB24_29
xor eax, eax
test ecx, ecx
lea rcx, [rip + .LCPI24_11]
sete al
vmovss xmm0, dword ptr [rcx + 4*rax]
vmovss dword ptr [rsp - 4], xmm0
vmovss xmm0, dword ptr [rsp - 4]
vmulss xmm0, xmm0, dword ptr [rip + .LCPI24_12]
ret
.LBB24_18:
mov dl, -106
sub dl, cl
bzhi ecx, eax, edx
je .LBB24_19
.LBB24_31:
vsubss xmm0, xmm0, xmm0
vdivss xmm0, xmm0, xmm0
ret
.LBB24_19:
mov ecx, 1
shlx edx, ecx, edx
mov ecx, 65536
test edx, eax
jne .LBB24_21
jmp .LBB24_20
.LBB24_3:
vmovss xmm2, dword ptr [rip + .LCPI24_0]
test ecx, ecx
sete sil
cmp edx, 1065353216
sete dil
or dil, sil
je .LBB24_5
vmovaps xmm0, xmm2
ret
.LBB24_10:
vmulss xmm0, xmm0, xmm0
test edx, edx
jns .LBB24_13
mov ecx, 2071
bextr ecx, eax, ecx
lea edx, [rcx - 151]
cmp edx, -24
jb .LBB24_13
vxorps xmm1, xmm0, dword ptr [rip + .LCPI24_1]{1to4}
mov dl, -106
sub dl, cl
bzhi ecx, eax, edx
movzx edx, dl
setne cl
bt eax, edx
setae dl
kmovd k1, ecx
kmovd k2, edx
vmovss xmm1 {k2}, xmm1, xmm0
vmovss xmm1 {k1}, xmm1, xmm0
vmovaps xmm0, xmm1
.LBB24_13:
test eax, eax
jns .LBB24_30
vmovss xmm1, dword ptr [rip + .LCPI24_0]
vdivss xmm0, xmm1, xmm0
vmovss dword ptr [rsp - 12], xmm0
vmovss xmm0, dword ptr [rsp - 12]
ret
.LBB24_5:
add edx, edx
cmp edx, -16777215
setae sil
cmp ecx, -16777215
setae cl
or cl, sil
cmp cl, 1
jne .LBB24_7
vaddss xmm0, xmm0, xmm1
ret
.LBB24_7:
vmovaps xmm0, xmm2
cmp edx, 2130706432
je .LBB24_30
setae cl
test eax, eax
vmulss xmm0, xmm1, xmm1
vxorps xmm1, xmm1, xmm1
setns al
xor al, cl
kmovd k1, eax
vmovss xmm0 {k1}, xmm0, xmm1
ret
.Lfunc_end24:
.size powf, .Lfunc_end24-powf
.cfi_endproc
.section .rodata.cst4,"aM",@progbits,4
.p2align 2, 0x0
.LCPI25_0:
.long 0x7fffffff
.LCPI25_1:
.long 0x4b000000
.LCPI25_2:
.long 0xcb000000
.LCPI25_3:
.long 0x3f000000
.LCPI25_4:
.long 0xbf000000
.LCPI25_5:
.long 0x3f800000
.LCPI25_6:
.long 0xbf800000
.LCPI25_7:
.long 0x80000000
.section .text.roundf,"ax",@progbits
.p2align 4, 0x90
.type roundf,@function
roundf:
.Lfunc_begin25:
.cfi_startproc
vmovd eax, xmm0
mov ecx, 2071
bextr ecx, eax, ecx
cmp ecx, 149
ja .LBB25_8
vpandd xmm1, xmm0, dword ptr [rip + .LCPI25_0]{1to4}
vaddss xmm2, xmm1, dword ptr [rip + .LCPI25_1]
cmp ecx, 125
ja .LBB25_3
vxorps xmm1, xmm1, xmm1
vmovss dword ptr [rsp - 4], xmm2
vmulss xmm0, xmm0, xmm1
ret
.LBB25_3:
vaddss xmm0, xmm2, dword ptr [rip + .LCPI25_2]
vsubss xmm0, xmm0, xmm1
vucomiss xmm0, dword ptr [rip + .LCPI25_3]
jbe .LBB25_5
vaddss xmm0, xmm1, xmm0
vaddss xmm0, xmm0, dword ptr [rip + .LCPI25_6]
jmp .LBB25_7
.LBB25_5:
vmovss xmm2, dword ptr [rip + .LCPI25_4]
vucomiss xmm2, xmm0
vaddss xmm0, xmm1, xmm0
jb .LBB25_7
vaddss xmm0, xmm0, dword ptr [rip + .LCPI25_5]
.LBB25_7:
vxorps xmm1, xmm0, dword ptr [rip + .LCPI25_7]{1to4}
test eax, eax
sets al
kmovd k1, eax
vmovss xmm0 {k1}, xmm0, xmm1
.LBB25_8:
ret
.Lfunc_end25:
.size roundf, .Lfunc_end25-roundf
.cfi_endproc
.type __unnamed_1,@object
.section .rodata.__unnamed_1,"a",@progbits
__unnamed_1:
.asciz "broadcast_dispatch_0"
.size __unnamed_1, 21
.type iree_hal_executable_library_query_v0_header,@object
.section .data.rel.ro.iree_hal_executable_library_query_v0_header,"aw",@progbits
.p2align 4, 0x0
iree_hal_executable_library_query_v0_header:
.long 4
.zero 4
.quad __unnamed_1
.long 0
.long 0
.size iree_hal_executable_library_query_v0_header, 24
.type iree_hal_executable_library_query_v0_funcs,@object
.section .data.rel.ro.iree_hal_executable_library_query_v0_funcs,"aw",@progbits
.p2align 3, 0x0
iree_hal_executable_library_query_v0_funcs:
.quad broadcast_dispatch_0_generic_Dx8640x3200_f16
.size iree_hal_executable_library_query_v0_funcs, 8
.type iree_hal_executable_library_query_v0_attrs,@object
.section .rodata.iree_hal_executable_library_query_v0_attrs,"a",@progbits
.p2align 3, 0x0
iree_hal_executable_library_query_v0_attrs:
.zero 4
.size iree_hal_executable_library_query_v0_attrs, 4
.type __unnamed_2,@object
.section .rodata.__unnamed_2,"a",@progbits
__unnamed_2:
.asciz "broadcast_dispatch_0_generic_Dx8640x3200_f16"
.size __unnamed_2, 45
.type iree_hal_executable_library_query_v0_names,@object
.section .data.rel.ro.iree_hal_executable_library_query_v0_names,"aw",@progbits
.p2align 3, 0x0
iree_hal_executable_library_query_v0_names:
.quad __unnamed_2
.size iree_hal_executable_library_query_v0_names, 8
.type __unnamed_3,@object
.section .rodata.__unnamed_3,"a",@progbits
__unnamed_3:
.asciz "broadcast.mlir"
.size __unnamed_3, 15
.type iree_hal_executable_library_query_v0_source_locations,@object
.section .data.rel.ro.iree_hal_executable_library_query_v0_source_locations,"aw",@progbits
.p2align 3, 0x0
iree_hal_executable_library_query_v0_source_locations:
.long 4
.long 14
.quad __unnamed_3
.size iree_hal_executable_library_query_v0_source_locations, 16
.type iree_hal_executable_library_query_v0_broadcast_dispatch_0_generic_Dx8640x3200_f16_stage_names,@object
.section .rodata.iree_hal_executable_library_query_v0_broadcast_dispatch_0_generic_Dx8640x3200_f16_stage_names,"a",@progbits
.p2align 3, 0x0
iree_hal_executable_library_query_v0_broadcast_dispatch_0_generic_Dx8640x3200_f16_stage_names:
.size iree_hal_executable_library_query_v0_broadcast_dispatch_0_generic_Dx8640x3200_f16_stage_names, 0
.type iree_hal_executable_library_query_v0_broadcast_dispatch_0_generic_Dx8640x3200_f16_stage_source_locations,@object
.section .rodata.iree_hal_executable_library_query_v0_broadcast_dispatch_0_generic_Dx8640x3200_f16_stage_source_locations,"a",@progbits
.p2align 3, 0x0
iree_hal_executable_library_query_v0_broadcast_dispatch_0_generic_Dx8640x3200_f16_stage_source_locations:
.size iree_hal_executable_library_query_v0_broadcast_dispatch_0_generic_Dx8640x3200_f16_stage_source_locations, 0
.type iree_hal_executable_library_query_v0_stage_location_tables,@object
.section .data.rel.ro.iree_hal_executable_library_query_v0_stage_location_tables,"aw",@progbits
.p2align 4, 0x0
iree_hal_executable_library_query_v0_stage_location_tables:
.long 0
.zero 4
.quad iree_hal_executable_library_query_v0_broadcast_dispatch_0_generic_Dx8640x3200_f16_stage_names
.quad iree_hal_executable_library_query_v0_broadcast_dispatch_0_generic_Dx8640x3200_f16_stage_source_locations
.size iree_hal_executable_library_query_v0_stage_location_tables, 24
.type iree_hal_executable_library_query_v0,@object
.section .data.rel.ro.iree_hal_executable_library_query_v0,"aw",@progbits
.p2align 4, 0x0
iree_hal_executable_library_query_v0:
.quad iree_hal_executable_library_query_v0_header
.zero 16
.long 1
.zero 4
.quad iree_hal_executable_library_query_v0_funcs
.quad iree_hal_executable_library_query_v0_attrs
.quad iree_hal_executable_library_query_v0_names
.quad 0
.quad iree_hal_executable_library_query_v0_source_locations
.quad iree_hal_executable_library_query_v0_stage_location_tables
.zero 4
.zero 4
.zero 16
.size iree_hal_executable_library_query_v0, 104
.type __powf_log2_data,@object
.section .rodata.__powf_log2_data,"a",@progbits
.p2align 3, 0x0
__powf_log2_data:
.quad 0x3ff661ec79f8f3be
.quad 0xbfdefec65b963019
.quad 0x3ff571ed4aaf883d
.quad 0xbfdb0b6832d4fca4
.quad 0x3ff49539f0f010b0
.quad 0xbfd7418b0a1fb77b
.quad 0x3ff3c995b0b80385
.quad 0xbfd39de91a6dcf7b
.quad 0x3ff30d190c8864a5
.quad 0xbfd01d9bf3f2b631
.quad 0x3ff25e227b0b8ea0
.quad 0xbfc97c1d1b3b7af0
.quad 0x3ff1bb4a4a1a343f
.quad 0xbfc2f9e393af3c9f
.quad 0x3ff12358f08ae5ba
.quad 0xbfb960cbbf788d5c
.quad 0x3ff0953f419900a7
.quad 0xbfaa6f9db6475fce
.quad 0x3ff0000000000000
.quad 0x0000000000000000
.quad 0x3fee608cfd9a47ac
.quad 0x3fb338ca9f24f53d
.quad 0x3feca4b31f026aa0
.quad 0x3fc476a9543891ba
.quad 0x3feb2036576afce6
.quad 0x3fce840b4ac4e4d2
.quad 0x3fe9c2d163a1aa2d
.quad 0x3fd40645f0c6651c
.quad 0x3fe886e6037841ed
.quad 0x3fd88e9c2c1b9ff8
.quad 0x3fe767dcf5534862
.quad 0x3fdce0a44eb17bcc
.quad 0x3fd27616c9496e0b
.quad 0xbfd71969a075c67a
.quad 0x3fdec70a6ca7badd
.quad 0xbfe7154748bef6c8
.quad 0x3ff71547652ab82b
.size __powf_log2_data, 296
.type __exp2f_data,@object
.section .rodata.__exp2f_data,"a",@progbits
.p2align 3, 0x0
__exp2f_data:
.quad 4607182418800017408
.quad 4607140297302181236
.quad 4607100335213349135
.quad 4607062579818421073
.quad 4607027079437701499
.quad 4606993883449571754
.quad 4606963042313658936
.quad 4606934607594512097
.quad 4606908631985796885
.quad 4606885169335019979
.quad 4606864274668794914
.quad 4606846004218661165
.quad 4606830415447468583
.quad 4606817567076339586
.quad 4606807519112221737
.quad 4606800332876043653
.quad 4606796071031487437
.quad 4606794797614391156
.quad 4606796578062795143
.quad 4606801479247646227
.quad 4606809569504174299
.quad 4606820918663955941
.quad 4606835598087680144
.quad 4606853680698631517
.quad 4606875241016906669
.quad 4606900355194379847
.quad 4606929101050434204
.quad 4606961558108475497
.quad 4606997807633245319
.quad 4607037932668951391
.quad 4607082018078232794
.quad 4607130150581978432
.quad 0x42e8000000000000
.quad 0x3fac6af84b912394
.quad 0x3fcebfce50fac4f3
.quad 0x3fe62e42ff0c52d6
.quad 0x4338000000000000
.quad 0x40471547652b82fe
.quad 0x3ebc6af84b912394
.quad 0x3f2ebfce50fac4f3
.quad 0x3f962e42ff0c52d6
.size __exp2f_data, 328
.section .debug_abbrev,"",@progbits
.byte 1
.byte 17
.byte 1
.byte 37
.byte 14
.byte 19
.byte 5
.byte 3
.byte 14
.byte 16
.byte 23
.ascii "\264B"
.byte 25
.byte 17
.byte 1
.byte 18
.byte 6
.byte 0
.byte 0
.byte 2
.byte 46
.byte 0
.byte 17
.byte 1
.byte 18
.byte 6
.byte 64
.byte 24
.byte 110
.byte 14
.byte 3
.byte 14
.byte 58
.byte 11
.byte 59
.byte 11
.byte 73
.byte 19
.byte 63
.byte 25
.byte 0
.byte 0
.byte 3
.byte 36
.byte 0
.byte 3
.byte 14
.byte 62
.byte 11
.byte 11
.byte 11
.byte 0
.byte 0
.byte 0
.section .debug_info,"",@progbits
.Lcu_begin0:
.long .Ldebug_info_end0-.Ldebug_info_start0
.Ldebug_info_start0:
.short 4
.long .debug_abbrev
.byte 8
.byte 1
.long .Linfo_string0
.short 44
.long .Linfo_string1
.long .Lline_table_start0
.quad .Lfunc_begin0
.long .Lfunc_end0-.Lfunc_begin0
.byte 2
.quad .Lfunc_begin0
.long .Lfunc_end0-.Lfunc_begin0
.byte 1
.byte 86
.long .Linfo_string2
.long .Linfo_string2
.byte 1
.byte 1
.long 67
.byte 3
.long .Linfo_string3
.byte 5
.byte 4
.byte 0
.Ldebug_info_end0:
.section .debug_str,"MS",@progbits,1
.Linfo_string0:
.asciz "IREE"
.Linfo_string1:
.asciz "-"
.Linfo_string2:
.asciz "broadcast_dispatch_0_generic_Dx8640x3200_f16"
.Linfo_string3:
.asciz "int"
.section .debug_pubnames,"",@progbits
.long .LpubNames_end0-.LpubNames_start0
.LpubNames_start0:
.short 2
.long .Lcu_begin0
.long 75
.long 38
.asciz "broadcast_dispatch_0_generic_Dx8640x3200_f16"
.long 0
.LpubNames_end0:
.section .debug_pubtypes,"",@progbits
.long .LpubTypes_end0-.LpubTypes_start0
.LpubTypes_start0:
.short 2
.long .Lcu_begin0
.long 75
.long 67
.asciz "int"
.long 0
.LpubTypes_end0:
.section ".note.GNU-stack","",@progbits
.section .debug_line,"",@progbits
.Lline_table_start0:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment