Skip to content

Instantly share code, notes, and snippets.

@ArchRobison
Created June 18, 2014 18:29
Show Gist options
  • Select an option

  • Save ArchRobison/74dd2a89010eef5a1bf8 to your computer and use it in GitHub Desktop.

Select an option

Save ArchRobison/74dd2a89010eef5a1bf8 to your computer and use it in GitHub Desktop.
$ cat y.jl
cat: y.jl: No such file or directory
$ cat /tmp/y.jl
function saxpy( a, x, y )
@simd for i=1:length(x)
@inbounds y[i] = y[i]+a*x[i]
end
end
code_native( saxpy, (Float32,Array{Float32,1},Array{Float32,1}) )
$ julia /tmp/y.jl
.text
Filename: /tmp/y.jl
Source line: 33
push RBP
mov RBP, RSP
Source line: 33
mov RCX, QWORD PTR [RDI + 16]
xor EAX, EAX
test RCX, RCX
cmovg RAX, RCX
cmp RAX, 1
lea RCX, QWORD PTR [RAX - 1]
jo 254
lea RAX, QWORD PTR [RCX + 1]
add RCX, 1
jo 240
test RAX, RAX
jle 226
Source line: 3
mov R8, QWORD PTR [RDI + 8]
mov R9, QWORD PTR [RSI + 8]
xor EDX, EDX
mov R10, RAX
and R10, -16
je 157
vpshufd XMM1, XMM0, 0 # xmm1 = xmm0[0,0,0,0]
vinsertf128 YMM1, YMM1, XMM1, 1
xor EDI, EDI
lea RSI, QWORD PTR [4*RDI]
mov RCX, R8
sub RCX, RSI
mov RDX, R9
sub RDX, RSI
vmovups XMM4, XMMWORD PTR [RDX]
vmovups XMM5, XMMWORD PTR [RDX + 16]
vmovups XMM2, XMMWORD PTR [RDX + 32]
vmovups XMM3, XMMWORD PTR [RDX + 48]
vinsertf128 YMM4, YMM4, XMM5, 1
vmovups XMM6, XMMWORD PTR [RCX]
vmovups XMM7, XMMWORD PTR [RCX + 16]
vmovups XMM5, XMMWORD PTR [RCX + 32]
vinsertf128 YMM6, YMM6, XMM7, 1
vmulps YMM6, YMM6, YMM1
add RDI, -16
vaddps YMM4, YMM4, YMM6
vinsertf128 YMM2, YMM2, XMM3, 1
vinsertf128 YMM3, YMM5, XMMWORD PTR [RCX + 48], 1
vmulps YMM3, YMM3, YMM1
vaddps YMM2, YMM2, YMM3
vextractf128 XMM3, YMM2, 1
vextractf128 XMM5, YMM4, 1
mov RCX, R10
add RCX, RDI
vmovups XMMWORD PTR [RDX + 16], XMM5
vmovups XMMWORD PTR [RDX], XMM4
vmovups XMMWORD PTR [RDX + 48], XMM3
vmovups XMMWORD PTR [RDX + 32], XMM2
jne -141
mov RDX, R10
sub RAX, RDX
je 37
Source line: 41
lea RCX, QWORD PTR [R9 + 4*RDX]
lea RDX, QWORD PTR [R8 + 4*RDX]
Source line: 3
vmulss XMM1, XMM0, DWORD PTR [RDX]
vaddss XMM1, XMM1, DWORD PTR [RCX]
vmovss DWORD PTR [RCX], XMM1
Source line: 41
add RCX, 4
add RDX, 4
dec RAX
jne -29
Source line: 48
pop RBP
vzeroupper
ret
Source line: 33
movabs RAX, 139833603693952
mov RDI, QWORD PTR [RAX]
movabs RAX, 139833588617104
mov ESI, 33
call RAX
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment