Created
November 23, 2023 16:14
-
-
Save MasonProtter/0cb4971cce58e9929e075dd1717c74aa to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
julia> code_native(Tuple{Vector{Int}}) do v | |
sum(v; init=0) | |
end | |
.text | |
.file "#58" | |
.globl "julia_#58_2730" # -- Begin function julia_#58_2730 | |
.p2align 4, 0x90 | |
.type "julia_#58_2730",@function | |
"julia_#58_2730": # @"julia_#58_2730" | |
; ┌ @ REPL[24]:2 within `#58` | |
# %bb.0: # %top | |
push rbp | |
; │┌ @ reducedim.jl:996 within `sum` | |
; ││┌ @ reducedim.jl:996 within `#sum#828` | |
; │││┌ @ reducedim.jl:1000 within `_sum` | |
; ││││┌ @ reducedim.jl:1000 within `#_sum#830` | |
; │││││┌ @ reducedim.jl:1001 within `_sum` | |
; ││││││┌ @ reducedim.jl:1001 within `#_sum#831` | |
; │││││││┌ @ reducedim.jl:357 within `mapreduce` | |
; ││││││││┌ @ reducedim.jl:357 within `#mapreduce#821` | |
; │││││││││┌ @ reducedim.jl:362 within `_mapreduce_dim` | |
; ││││││││││┌ @ reduce.jl:44 within `mapfoldl_impl` | |
; │││││││││││┌ @ reduce.jl:48 within `foldl_impl` | |
; ││││││││││││┌ @ reduce.jl:56 within `_foldl_impl` | |
; │││││││││││││┌ @ array.jl:943 within `iterate` @ array.jl:943 | |
; ││││││││││││││┌ @ essentials.jl:10 within `length` | |
mov rcx, qword ptr [rdi + 8] | |
mov rbp, rsp | |
; ││││││││││││││└ | |
; ││││││││││││││┌ @ int.jl:520 within `<` @ int.jl:513 | |
test rcx, rcx | |
; ││││││││││││││└ | |
je .LBB0_1 | |
# %bb.2: # %L19 | |
; ││││││││││││││┌ @ essentials.jl:13 within `getindex` | |
mov r9, qword ptr [rdi] | |
mov rax, qword ptr [r9] | |
; │││││││││││││└└ | |
; │││││││││││││ @ reduce.jl:60 within `_foldl_impl` | |
; │││││││││││││┌ @ array.jl:943 within `iterate` | |
; ││││││││││││││┌ @ int.jl:520 within `<` @ int.jl:513 | |
cmp rcx, 1 | |
; ││││││││││││││└ | |
je .LBB0_15 | |
# %bb.3: # %L40.preheader | |
lea r8, [rcx - 1] | |
cmp r8, 16 | |
jae .LBB0_5 | |
# %bb.4: | |
mov esi, 2 | |
mov edx, 1 | |
jmp .LBB0_13 | |
.LBB0_1: | |
xor eax, eax | |
; │└└└└└└└└└└└└└ | |
pop rbp | |
ret | |
.LBB0_5: # %vector.ph | |
; │┌ @ reducedim.jl:996 within `sum` | |
; ││┌ @ reducedim.jl:996 within `#sum#828` | |
; │││┌ @ reducedim.jl:1000 within `_sum` | |
; ││││┌ @ reducedim.jl:1000 within `#_sum#830` | |
; │││││┌ @ reducedim.jl:1001 within `_sum` | |
; ││││││┌ @ reducedim.jl:1001 within `#_sum#831` | |
; │││││││┌ @ reducedim.jl:357 within `mapreduce` | |
; ││││││││┌ @ reducedim.jl:357 within `#mapreduce#821` | |
; │││││││││┌ @ reducedim.jl:362 within `_mapreduce_dim` | |
; ││││││││││┌ @ reduce.jl:44 within `mapfoldl_impl` | |
; │││││││││││┌ @ reduce.jl:48 within `foldl_impl` | |
; ││││││││││││┌ @ reduce.jl:60 within `_foldl_impl` | |
; │││││││││││││┌ @ array.jl:943 within `iterate` | |
mov rsi, r8 | |
and rsi, -16 | |
vmovq xmm0, rax | |
lea rdx, [rsi - 16] | |
mov rax, rdx | |
shr rax, 4 | |
inc rax | |
mov r10d, eax | |
and r10d, 7 | |
cmp rdx, 112 | |
jae .LBB0_7 | |
# %bb.6: | |
vpxor xmm1, xmm1, xmm1 | |
xor edi, edi | |
vpxor xmm2, xmm2, xmm2 | |
vpxor xmm3, xmm3, xmm3 | |
jmp .LBB0_9 | |
.LBB0_7: # %vector.ph.new | |
and rax, -8 | |
lea rdx, [r9 + 1000] | |
vpxor xmm1, xmm1, xmm1 | |
xor edi, edi | |
vpxor xmm2, xmm2, xmm2 | |
vpxor xmm3, xmm3, xmm3 | |
.p2align 4, 0x90 | |
.LBB0_8: # %vector.body | |
# =>This Inner Loop Header: Depth=1 | |
; │││││││││││││└ | |
; │││││││││││││ @ reduce.jl:62 within `_foldl_impl` | |
; │││││││││││││┌ @ reduce.jl:86 within `BottomRF` | |
; ││││││││││││││┌ @ reduce.jl:27 within `add_sum` | |
; │││││││││││││││┌ @ int.jl:87 within `+` | |
vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 992] | |
vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 960] | |
vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 928] | |
vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 896] | |
vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 864] | |
vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 832] | |
vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 800] | |
vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 768] | |
vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 736] | |
vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 704] | |
vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 672] | |
vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 640] | |
vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 608] | |
vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 576] | |
vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 544] | |
vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 512] | |
vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 480] | |
vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 448] | |
vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 416] | |
vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 384] | |
vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 352] | |
vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 320] | |
vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 288] | |
vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 256] | |
vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 224] | |
vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 192] | |
vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 160] | |
vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 128] | |
vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 96] | |
vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 64] | |
vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 32] | |
vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi] | |
sub rdi, -128 | |
add rax, -8 | |
jne .LBB0_8 | |
.LBB0_9: # %middle.block.unr-lcssa | |
test r10, r10 | |
je .LBB0_11 | |
.p2align 4, 0x90 | |
.LBB0_10: # %vector.body.epil | |
# =>This Inner Loop Header: Depth=1 | |
; │││││││││││││└└└ | |
; │││││││││││││ @ reduce.jl:60 within `_foldl_impl` | |
; │││││││││││││┌ @ array.jl:943 within `iterate` | |
lea rax, [8*rdi] | |
add rdi, 16 | |
; ││││││││││││││┌ @ essentials.jl:13 within `getindex` | |
or rax, 8 | |
dec r10 | |
; │││││││││││││└└ | |
; │││││││││││││ @ reduce.jl:62 within `_foldl_impl` | |
; │││││││││││││┌ @ reduce.jl:86 within `BottomRF` | |
; ││││││││││││││┌ @ reduce.jl:27 within `add_sum` | |
; │││││││││││││││┌ @ int.jl:87 within `+` | |
vpaddq ymm0, ymm0, ymmword ptr [r9 + rax] | |
vpaddq ymm1, ymm1, ymmword ptr [r9 + rax + 32] | |
vpaddq ymm2, ymm2, ymmword ptr [r9 + rax + 64] | |
vpaddq ymm3, ymm3, ymmword ptr [r9 + rax + 96] | |
jne .LBB0_10 | |
.LBB0_11: # %middle.block | |
; │││││││││││││└└└ | |
; │││││││││││││ @ reduce.jl:60 within `_foldl_impl` | |
; │││││││││││││┌ @ array.jl:943 within `iterate` | |
vpaddq ymm1, ymm1, ymm3 | |
vpaddq ymm0, ymm0, ymm2 | |
vpaddq ymm0, ymm0, ymm1 | |
vextracti128 xmm1, ymm0, 1 | |
vpaddq xmm0, xmm0, xmm1 | |
vpshufd xmm1, xmm0, 238 # xmm1 = xmm0[2,3,2,3] | |
vpaddq xmm0, xmm0, xmm1 | |
vmovq rax, xmm0 | |
cmp r8, rsi | |
je .LBB0_15 | |
# %bb.12: | |
lea rdx, [rsi + 1] | |
or rsi, 2 | |
.LBB0_13: # %scalar.ph | |
sub rcx, rsi | |
inc rcx | |
.p2align 4, 0x90 | |
.LBB0_14: # %L40 | |
# =>This Inner Loop Header: Depth=1 | |
; │││││││││││││└ | |
; │││││││││││││ @ reduce.jl:62 within `_foldl_impl` | |
; │││││││││││││┌ @ reduce.jl:86 within `BottomRF` | |
; ││││││││││││││┌ @ reduce.jl:27 within `add_sum` | |
; │││││││││││││││┌ @ int.jl:87 within `+` | |
add rax, qword ptr [r9 + 8*rdx] | |
mov rdx, rsi | |
; │││││││││││││└└└ | |
; │││││││││││││ @ reduce.jl:60 within `_foldl_impl` | |
; │││││││││││││┌ @ array.jl:943 within `iterate` | |
; ││││││││││││││┌ @ int.jl:87 within `+` | |
inc rsi | |
; ││││││││││││││└ | |
; ││││││││││││││┌ @ int.jl:520 within `<` @ int.jl:513 | |
dec rcx | |
; ││││││││││││││└ | |
jne .LBB0_14 | |
.LBB0_15: # %L55 | |
; │└└└└└└└└└└└└└ | |
pop rbp | |
vzeroupper | |
ret | |
.Lfunc_end0: | |
.size "julia_#58_2730", .Lfunc_end0-"julia_#58_2730" | |
; └ | |
# -- End function | |
.section ".note.GNU-stack","",@progbits |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment