Created
June 18, 2014 18:29
-
-
Save ArchRobison/74dd2a89010eef5a1bf8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| $ cat y.jl | |
| cat: y.jl: No such file or directory | |
| $ cat /tmp/y.jl | |
| function saxpy( a, x, y ) | |
| @simd for i=1:length(x) | |
| @inbounds y[i] = y[i]+a*x[i] | |
| end | |
| end | |
| code_native( saxpy, (Float32,Array{Float32,1},Array{Float32,1}) ) | |
| $ julia /tmp/y.jl | |
| .text | |
| Filename: /tmp/y.jl | |
| Source line: 33 | |
| push RBP | |
| mov RBP, RSP | |
| Source line: 33 | |
| mov RCX, QWORD PTR [RDI + 16] | |
| xor EAX, EAX | |
| test RCX, RCX | |
| cmovg RAX, RCX | |
| cmp RAX, 1 | |
| lea RCX, QWORD PTR [RAX - 1] | |
| jo 254 | |
| lea RAX, QWORD PTR [RCX + 1] | |
| add RCX, 1 | |
| jo 240 | |
| test RAX, RAX | |
| jle 226 | |
| Source line: 3 | |
| mov R8, QWORD PTR [RDI + 8] | |
| mov R9, QWORD PTR [RSI + 8] | |
| xor EDX, EDX | |
| mov R10, RAX | |
| and R10, -16 | |
| je 157 | |
| vpshufd XMM1, XMM0, 0 # xmm1 = xmm0[0,0,0,0] | |
| vinsertf128 YMM1, YMM1, XMM1, 1 | |
| xor EDI, EDI | |
| lea RSI, QWORD PTR [4*RDI] | |
| mov RCX, R8 | |
| sub RCX, RSI | |
| mov RDX, R9 | |
| sub RDX, RSI | |
| vmovups XMM4, XMMWORD PTR [RDX] | |
| vmovups XMM5, XMMWORD PTR [RDX + 16] | |
| vmovups XMM2, XMMWORD PTR [RDX + 32] | |
| vmovups XMM3, XMMWORD PTR [RDX + 48] | |
| vinsertf128 YMM4, YMM4, XMM5, 1 | |
| vmovups XMM6, XMMWORD PTR [RCX] | |
| vmovups XMM7, XMMWORD PTR [RCX + 16] | |
| vmovups XMM5, XMMWORD PTR [RCX + 32] | |
| vinsertf128 YMM6, YMM6, XMM7, 1 | |
| vmulps YMM6, YMM6, YMM1 | |
| add RDI, -16 | |
| vaddps YMM4, YMM4, YMM6 | |
| vinsertf128 YMM2, YMM2, XMM3, 1 | |
| vinsertf128 YMM3, YMM5, XMMWORD PTR [RCX + 48], 1 | |
| vmulps YMM3, YMM3, YMM1 | |
| vaddps YMM2, YMM2, YMM3 | |
| vextractf128 XMM3, YMM2, 1 | |
| vextractf128 XMM5, YMM4, 1 | |
| mov RCX, R10 | |
| add RCX, RDI | |
| vmovups XMMWORD PTR [RDX + 16], XMM5 | |
| vmovups XMMWORD PTR [RDX], XMM4 | |
| vmovups XMMWORD PTR [RDX + 48], XMM3 | |
| vmovups XMMWORD PTR [RDX + 32], XMM2 | |
| jne -141 | |
| mov RDX, R10 | |
| sub RAX, RDX | |
| je 37 | |
| Source line: 41 | |
| lea RCX, QWORD PTR [R9 + 4*RDX] | |
| lea RDX, QWORD PTR [R8 + 4*RDX] | |
| Source line: 3 | |
| vmulss XMM1, XMM0, DWORD PTR [RDX] | |
| vaddss XMM1, XMM1, DWORD PTR [RCX] | |
| vmovss DWORD PTR [RCX], XMM1 | |
| Source line: 41 | |
| add RCX, 4 | |
| add RDX, 4 | |
| dec RAX | |
| jne -29 | |
| Source line: 48 | |
| pop RBP | |
| vzeroupper | |
| ret | |
| Source line: 33 | |
| movabs RAX, 139833603693952 | |
| mov RDI, QWORD PTR [RAX] | |
| movabs RAX, 139833588617104 | |
| mov ESI, 33 | |
| call RAX | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment