Last active
October 6, 2017 00:29
-
-
Save yoffy/72e2f6fedca8de5651a5be48ed723393 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # clang++-4.0 -I/home/yoffy/src/libiqo/include -I/home/yoffy/src/libiqo/src/../include -Wall -Wextra -Wconversion -Wno-sign-conversion -std=c++98 -fno-exceptions -Wall -Wextra -Wconversion -Wno-sign-conversion -Ofast -march=core2 -msse4.1 -mtune=westmere -S -mllvm --x86-asm-syntax=intel /home/yoffy/src/libiqo/src/IQOLanczosResizerImpl_SSE4_1.cpp | |
| .LBB7_11: # =>This Loop Header: Depth=1 | |
| # Child Loop BB7_12 Depth 2 | |
| mov rax, qword ptr [rsp - 16] # 8-byte Reload | |
| movdqu xmm1, xmmword ptr [rax + 4*r15] | |
| movdqu xmm2, xmmword ptr [rax + 4*r15 + 16] | |
| mov rax, qword ptr [rsp - 8] # 8-byte Reload | |
| lea rdi, [rax + 4*r12] | |
| xorps xmm0, xmm0 | |
| mov ebp, dword ptr [rsp - 36] # 4-byte Reload | |
| mov r11, rbx | |
| mov r13d, ebx | |
| xorps xmm3, xmm3 | |
| .p2align 4, 0x90 | |
| .LBB7_12: # Parent Loop BB7_11 Depth=1 | |
| # => This Inner Loop Header: Depth=2 | |
| movd xmm4, ebp | |
| pshufd xmm4, xmm4, 0 # xmm4 = xmm4[0,0,0,0] | |
| movdqa xmm5, xmm4 | |
| paddd xmm5, xmm1 | |
| paddd xmm4, xmm2 | |
| movq r8, xmm5 | |
| pextrq r9, xmm5, 1 | |
| movsxd r10, r8d | |
| sar r8, 32 | |
| movsxd r14, r9d | |
| sar r9, 32 | |
| movss xmm5, dword ptr [rsi + 4*r10] # xmm5 = mem[0],zero,zero,zero | |
| insertps xmm5, dword ptr [rsi + 4*r8], 16 # xmm5 = xmm5[0],mem[0],xmm5[2,3] | |
| insertps xmm5, dword ptr [rsi + 4*r14], 32 # xmm5 = xmm5[0,1],mem[0],xmm5[3] | |
| insertps xmm5, dword ptr [rsi + 4*r9], 48 # xmm5 = xmm5[0,1,2],mem[0] | |
| movq rbx, xmm4 | |
| pextrq rcx, xmm4, 1 | |
| movsxd rdx, ebx | |
| sar rbx, 32 | |
| movsxd rax, ecx | |
| sar rcx, 32 | |
| movss xmm4, dword ptr [rsi + 4*rdx] # xmm4 = mem[0],zero,zero,zero | |
| insertps xmm4, dword ptr [rsi + 4*rbx], 16 # xmm4 = xmm4[0],mem[0],xmm4[2,3] | |
| insertps xmm4, dword ptr [rsi + 4*rax], 32 # xmm4 = xmm4[0,1],mem[0],xmm4[3] | |
| insertps xmm4, dword ptr [rsi + 4*rcx], 48 # xmm4 = xmm4[0,1,2],mem[0] | |
| mulps xmm5, xmmword ptr [rdi - 16] | |
| mulps xmm4, xmmword ptr [rdi] | |
| addps xmm0, xmm5 | |
| addps xmm3, xmm4 | |
| add rdi, 32 | |
| inc ebp | |
| dec r13d | |
| jne .LBB7_12 | |
| # BB#13: # in Loop: Header=BB7_11 Depth=1 | |
| add r12, qword ptr [rsp - 24] # 8-byte Folded Reload | |
| roundps xmm0, xmm0, 8 | |
| roundps xmm1, xmm3, 8 | |
| cvtps2dq xmm0, xmm0 | |
| cvtps2dq xmm1, xmm1 | |
| packusdw xmm0, xmm1 | |
| packuswb xmm0, xmm0 | |
| mov rax, qword ptr [rsp - 48] # 8-byte Reload | |
| movq qword ptr [rax + r15], xmm0 | |
| cmp r12, qword ptr [rsp - 56] # 8-byte Folded Reload | |
| mov eax, 0 | |
| cmove r12, rax | |
| add r15, 8 | |
| cmp r15, qword ptr [rsp - 32] # 8-byte Folded Reload | |
| mov rbx, r11 | |
| jl .LBB7_11 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ; "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.11.25503\bin\HostX86\x64\CL.exe" /c /IC:\Users\yoffy\src\libiqo\include /IC:\Users\yoffy\src\libiqo\src\..\include /Zi /nologo /W3 /WX- /Ox /Ob2 /D WIN32 /D _WINDOWS /D NDEBUG /D "CMAKE_INTDIR=\"Release\"" /D _MBCS /Gm- /EHsc /MT /GS- /fp:fast /Zc:wchar_t /Zc:forScope /Zc:inline /GR /Gd /TP /FAsu C:\Users\yoffy\src\libiqo\src\IQOLanczosResizerImpl_SSE4_1.cpp | |
| $LL4@resizeXmai: | |
| movdqu xmm8, XMMWORD PTR [r11] | |
| xor ebp, ebp | |
| movaps xmm5, xmm7 | |
| movaps xmm6, xmm7 | |
| movdqu xmm9, XMMWORD PTR [r11+rax] | |
| test ebx, ebx | |
| jle $LN6@resizeXmai | |
| lea rsi, QWORD PTR [r12+rcx*4] | |
| mov eax, ebx | |
| cdq | |
| lea rcx, QWORD PTR [rcx+rbx*8] | |
| sub eax, edx | |
| sar eax, 1 | |
| neg eax | |
| mov r12d, eax | |
| npad 1 | |
| $LL7@resizeXmai: | |
| lea edx, DWORD PTR [r12+rbp] | |
| inc ebp | |
| movd xmm1, edx | |
| pshufd xmm1, xmm1, 0 | |
| movdqa xmm0, xmm1 | |
| paddd xmm1, xmm9 | |
| paddd xmm0, xmm8 | |
| movq r10, xmm1 | |
| pextrq rdx, xmm0, 1 | |
| movq r8, xmm0 | |
| mov rax, rdx | |
| shr rax, 32 ; 00000020H | |
| cdqe | |
| pextrq r9, xmm1, 1 | |
| movss xmm4, DWORD PTR [r14+rax*4] | |
| movsxd rax, edx | |
| movss xmm3, DWORD PTR [r14+rax*4] | |
| mov rax, r8 | |
| shr rax, 32 ; 00000020H | |
| cdqe | |
| movss xmm0, DWORD PTR [r14+rax*4] | |
| movsxd rax, r8d | |
| movss xmm2, DWORD PTR [r14+rax*4] | |
| mov rax, r9 | |
| insertps xmm2, xmm0, 16 | |
| insertps xmm2, xmm3, 32 ; 00000020H | |
| shr rax, 32 ; 00000020H | |
| insertps xmm2, xmm4, 48 ; 00000030H | |
| mulps xmm2, XMMWORD PTR [rsi] | |
| cdqe | |
| movss xmm4, DWORD PTR [r14+rax*4] | |
| addps xmm5, xmm2 | |
| movsxd rax, r9d | |
| movss xmm3, DWORD PTR [r14+rax*4] | |
| mov rax, r10 | |
| shr rax, 32 ; 00000020H | |
| cdqe | |
| movss xmm0, DWORD PTR [r14+rax*4] | |
| movsxd rax, r10d | |
| movss xmm2, DWORD PTR [r14+rax*4] | |
| insertps xmm2, xmm0, 16 | |
| insertps xmm2, xmm3, 32 ; 00000020H | |
| insertps xmm2, xmm4, 48 ; 00000030H | |
| mulps xmm2, XMMWORD PTR [rsi+16] | |
| add rsi, 32 ; 00000020H | |
| addps xmm6, xmm2 | |
| cmp ebp, ebx | |
| jl $LL7@resizeXmai | |
| mov r12, QWORD PTR coefs$1$[rsp] | |
| $LN6@resizeXmai: | |
| xor eax, eax | |
| add r11, 32 ; 00000020H | |
| roundps xmm0, xmm6, 8 | |
| roundps xmm1, xmm5, 8 | |
| cvtps2dq xmm2, xmm0 | |
| cvtps2dq xmm3, xmm1 | |
| packusdw xmm3, xmm2 | |
| packuswb xmm3, xmm3 | |
| movq QWORD PTR [rdi], xmm3 | |
| add rdi, 8 | |
| cmp rcx, r13 | |
| cmovne rax, rcx | |
| mov rcx, rax | |
| mov rax, QWORD PTR tv965[rsp] | |
| sub r15, 1 | |
| jne $LL4@resizeXmai |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment