Skip to content

Instantly share code, notes, and snippets.

@MasonProtter
Created November 23, 2023 16:14
Show Gist options
  • Save MasonProtter/0cb4971cce58e9929e075dd1717c74aa to your computer and use it in GitHub Desktop.
Save MasonProtter/0cb4971cce58e9929e075dd1717c74aa to your computer and use it in GitHub Desktop.
julia> code_native(Tuple{Vector{Int}}) do v
sum(v; init=0)
end
.text
.file "#58"
.globl "julia_#58_2730" # -- Begin function julia_#58_2730
.p2align 4, 0x90
.type "julia_#58_2730",@function
"julia_#58_2730": # @"julia_#58_2730"
; ┌ @ REPL[24]:2 within `#58`
# %bb.0: # %top
push rbp
; │┌ @ reducedim.jl:996 within `sum`
; ││┌ @ reducedim.jl:996 within `#sum#828`
; │││┌ @ reducedim.jl:1000 within `_sum`
; ││││┌ @ reducedim.jl:1000 within `#_sum#830`
; │││││┌ @ reducedim.jl:1001 within `_sum`
; ││││││┌ @ reducedim.jl:1001 within `#_sum#831`
; │││││││┌ @ reducedim.jl:357 within `mapreduce`
; ││││││││┌ @ reducedim.jl:357 within `#mapreduce#821`
; │││││││││┌ @ reducedim.jl:362 within `_mapreduce_dim`
; ││││││││││┌ @ reduce.jl:44 within `mapfoldl_impl`
; │││││││││││┌ @ reduce.jl:48 within `foldl_impl`
; ││││││││││││┌ @ reduce.jl:56 within `_foldl_impl`
; │││││││││││││┌ @ array.jl:943 within `iterate` @ array.jl:943
; ││││││││││││││┌ @ essentials.jl:10 within `length`
mov rcx, qword ptr [rdi + 8]
mov rbp, rsp
; ││││││││││││││└
; ││││││││││││││┌ @ int.jl:520 within `<` @ int.jl:513
test rcx, rcx
; ││││││││││││││└
je .LBB0_1
# %bb.2: # %L19
; ││││││││││││││┌ @ essentials.jl:13 within `getindex`
mov r9, qword ptr [rdi]
mov rax, qword ptr [r9]
; │││││││││││││└└
; │││││││││││││ @ reduce.jl:60 within `_foldl_impl`
; │││││││││││││┌ @ array.jl:943 within `iterate`
; ││││││││││││││┌ @ int.jl:520 within `<` @ int.jl:513
cmp rcx, 1
; ││││││││││││││└
je .LBB0_15
# %bb.3: # %L40.preheader
lea r8, [rcx - 1]
cmp r8, 16
jae .LBB0_5
# %bb.4:
mov esi, 2
mov edx, 1
jmp .LBB0_13
.LBB0_1:
xor eax, eax
; │└└└└└└└└└└└└└
pop rbp
ret
.LBB0_5: # %vector.ph
; │┌ @ reducedim.jl:996 within `sum`
; ││┌ @ reducedim.jl:996 within `#sum#828`
; │││┌ @ reducedim.jl:1000 within `_sum`
; ││││┌ @ reducedim.jl:1000 within `#_sum#830`
; │││││┌ @ reducedim.jl:1001 within `_sum`
; ││││││┌ @ reducedim.jl:1001 within `#_sum#831`
; │││││││┌ @ reducedim.jl:357 within `mapreduce`
; ││││││││┌ @ reducedim.jl:357 within `#mapreduce#821`
; │││││││││┌ @ reducedim.jl:362 within `_mapreduce_dim`
; ││││││││││┌ @ reduce.jl:44 within `mapfoldl_impl`
; │││││││││││┌ @ reduce.jl:48 within `foldl_impl`
; ││││││││││││┌ @ reduce.jl:60 within `_foldl_impl`
; │││││││││││││┌ @ array.jl:943 within `iterate`
mov rsi, r8
and rsi, -16
vmovq xmm0, rax
lea rdx, [rsi - 16]
mov rax, rdx
shr rax, 4
inc rax
mov r10d, eax
and r10d, 7
cmp rdx, 112
jae .LBB0_7
# %bb.6:
vpxor xmm1, xmm1, xmm1
xor edi, edi
vpxor xmm2, xmm2, xmm2
vpxor xmm3, xmm3, xmm3
jmp .LBB0_9
.LBB0_7: # %vector.ph.new
and rax, -8
lea rdx, [r9 + 1000]
vpxor xmm1, xmm1, xmm1
xor edi, edi
vpxor xmm2, xmm2, xmm2
vpxor xmm3, xmm3, xmm3
.p2align 4, 0x90
.LBB0_8: # %vector.body
# =>This Inner Loop Header: Depth=1
; │││││││││││││└
; │││││││││││││ @ reduce.jl:62 within `_foldl_impl`
; │││││││││││││┌ @ reduce.jl:86 within `BottomRF`
; ││││││││││││││┌ @ reduce.jl:27 within `add_sum`
; │││││││││││││││┌ @ int.jl:87 within `+`
vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 992]
vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 960]
vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 928]
vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 896]
vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 864]
vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 832]
vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 800]
vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 768]
vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 736]
vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 704]
vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 672]
vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 640]
vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 608]
vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 576]
vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 544]
vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 512]
vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 480]
vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 448]
vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 416]
vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 384]
vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 352]
vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 320]
vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 288]
vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 256]
vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 224]
vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 192]
vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 160]
vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi - 128]
vpaddq ymm0, ymm0, ymmword ptr [rdx + 8*rdi - 96]
vpaddq ymm1, ymm1, ymmword ptr [rdx + 8*rdi - 64]
vpaddq ymm2, ymm2, ymmword ptr [rdx + 8*rdi - 32]
vpaddq ymm3, ymm3, ymmword ptr [rdx + 8*rdi]
sub rdi, -128
add rax, -8
jne .LBB0_8
.LBB0_9: # %middle.block.unr-lcssa
test r10, r10
je .LBB0_11
.p2align 4, 0x90
.LBB0_10: # %vector.body.epil
# =>This Inner Loop Header: Depth=1
; │││││││││││││└└└
; │││││││││││││ @ reduce.jl:60 within `_foldl_impl`
; │││││││││││││┌ @ array.jl:943 within `iterate`
lea rax, [8*rdi]
add rdi, 16
; ││││││││││││││┌ @ essentials.jl:13 within `getindex`
or rax, 8
dec r10
; │││││││││││││└└
; │││││││││││││ @ reduce.jl:62 within `_foldl_impl`
; │││││││││││││┌ @ reduce.jl:86 within `BottomRF`
; ││││││││││││││┌ @ reduce.jl:27 within `add_sum`
; │││││││││││││││┌ @ int.jl:87 within `+`
vpaddq ymm0, ymm0, ymmword ptr [r9 + rax]
vpaddq ymm1, ymm1, ymmword ptr [r9 + rax + 32]
vpaddq ymm2, ymm2, ymmword ptr [r9 + rax + 64]
vpaddq ymm3, ymm3, ymmword ptr [r9 + rax + 96]
jne .LBB0_10
.LBB0_11: # %middle.block
; │││││││││││││└└└
; │││││││││││││ @ reduce.jl:60 within `_foldl_impl`
; │││││││││││││┌ @ array.jl:943 within `iterate`
vpaddq ymm1, ymm1, ymm3
vpaddq ymm0, ymm0, ymm2
vpaddq ymm0, ymm0, ymm1
vextracti128 xmm1, ymm0, 1
vpaddq xmm0, xmm0, xmm1
vpshufd xmm1, xmm0, 238 # xmm1 = xmm0[2,3,2,3]
vpaddq xmm0, xmm0, xmm1
vmovq rax, xmm0
cmp r8, rsi
je .LBB0_15
# %bb.12:
lea rdx, [rsi + 1]
or rsi, 2
.LBB0_13: # %scalar.ph
sub rcx, rsi
inc rcx
.p2align 4, 0x90
.LBB0_14: # %L40
# =>This Inner Loop Header: Depth=1
; │││││││││││││└
; │││││││││││││ @ reduce.jl:62 within `_foldl_impl`
; │││││││││││││┌ @ reduce.jl:86 within `BottomRF`
; ││││││││││││││┌ @ reduce.jl:27 within `add_sum`
; │││││││││││││││┌ @ int.jl:87 within `+`
add rax, qword ptr [r9 + 8*rdx]
mov rdx, rsi
; │││││││││││││└└└
; │││││││││││││ @ reduce.jl:60 within `_foldl_impl`
; │││││││││││││┌ @ array.jl:943 within `iterate`
; ││││││││││││││┌ @ int.jl:87 within `+`
inc rsi
; ││││││││││││││└
; ││││││││││││││┌ @ int.jl:520 within `<` @ int.jl:513
dec rcx
; ││││││││││││││└
jne .LBB0_14
.LBB0_15: # %L55
; │└└└└└└└└└└└└└
pop rbp
vzeroupper
ret
.Lfunc_end0:
.size "julia_#58_2730", .Lfunc_end0-"julia_#58_2730"
; └
# -- End function
.section ".note.GNU-stack","",@progbits
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment