Last active
October 6, 2017 00:27
-
-
Save yoffy/f6ec447b085c7cdd42751c2ddb2e8178 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ; clang++-4.0 -I/home/yoffy/src/libiqo/include -I/home/yoffy/src/libiqo/src/../include -Wall -Wextra -Wconversion -Wno-sign-conversion -std=c++98 -fno-exceptions -Wall -Wextra -Wconversion -Wno-sign-conversion -Ofast -march=core2 -msse4.1 -mtune=westmere -S -mllvm --x86-asm-syntax=intel /home/yoffy/src/libiqo/src/IQOAreaResizerImpl_SSE4_1. | |
| cpp | |
| .LBB5_2: # =>This Loop Header: Depth=1 | |
| # Child Loop BB5_4 Depth 2 | |
| test r14d, r14d | |
| jle .LBB5_6 | |
| # BB#3: # in Loop: Header=BB5_2 Depth=1 | |
| movdqu xmm5, xmmword ptr [r8 + 4*r15] | |
| mov rbp, r15 | |
| or rbp, 4 | |
| movdqu xmm6, xmmword ptr [r8 + 4*rbp] | |
| mov rbp, r15 | |
| or rbp, 8 | |
| movdqu xmm7, xmmword ptr [r8 + 4*rbp] | |
| mov rbp, r15 | |
| or rbp, 12 | |
| movdqu xmm0, xmmword ptr [r8 + 4*rbp] | |
| mov rax, qword ptr [rsp - 48] # 8-byte Reload | |
| lea r9, [rax + 4*rbx] | |
| xorps xmm11, xmm11 | |
| mov r11d, r14d | |
| xorps xmm12, xmm12 | |
| xorps xmm10, xmm10 | |
| xorps xmm9, xmm9 | |
| .p2align 4, 0x90 | |
| .LBB5_4: # Parent Loop BB5_2 Depth=1 | |
| # => This Inner Loop Header: Depth=2 | |
| movq rbp, xmm5 | |
| pextrq rdi, xmm5, 1 | |
| movsxd r12, ebp | |
| sar rbp, 32 | |
| movsxd r13, edi | |
| sar rdi, 32 | |
| movss xmm1, dword ptr [rsi + 4*r12] # xmm1 = mem[0],zero,zero,zero | |
| insertps xmm1, dword ptr [rsi + 4*rbp], 16 # xmm1 = xmm1[0],mem[0],xmm1[2,3] | |
| insertps xmm1, dword ptr [rsi + 4*r13], 32 # xmm1 = xmm1[0,1],mem[0],xmm1[3] | |
| insertps xmm1, dword ptr [rsi + 4*rdi], 48 # xmm1 = xmm1[0,1,2],mem[0] | |
| movq rdi, xmm6 | |
| pextrq rbp, xmm6, 1 | |
| movsxd rax, edi | |
| sar rdi, 32 | |
| movsxd rdx, ebp | |
| sar rbp, 32 | |
| movss xmm2, dword ptr [rsi + 4*rax] # xmm2 = mem[0],zero,zero,zero | |
| insertps xmm2, dword ptr [rsi + 4*rdi], 16 # xmm2 = xmm2[0],mem[0],xmm2[2,3] | |
| insertps xmm2, dword ptr [rsi + 4*rdx], 32 # xmm2 = xmm2[0,1],mem[0],xmm2[3] | |
| insertps xmm2, dword ptr [rsi + 4*rbp], 48 # xmm2 = xmm2[0,1,2],mem[0] | |
| movq rax, xmm7 | |
| pextrq rdx, xmm7, 1 | |
| movsxd rdi, eax | |
| sar rax, 32 | |
| movsxd rbp, edx | |
| sar rdx, 32 | |
| movss xmm3, dword ptr [rsi + 4*rdi] # xmm3 = mem[0],zero,zero,zero | |
| insertps xmm3, dword ptr [rsi + 4*rax], 16 # xmm3 = xmm3[0],mem[0],xmm3[2,3] | |
| insertps xmm3, dword ptr [rsi + 4*rbp], 32 # xmm3 = xmm3[0,1],mem[0],xmm3[3] | |
| insertps xmm3, dword ptr [rsi + 4*rdx], 48 # xmm3 = xmm3[0,1,2],mem[0] | |
| movq rax, xmm0 | |
| pextrq rdx, xmm0, 1 | |
| movsxd rdi, eax | |
| sar rax, 32 | |
| movsxd rbp, edx | |
| sar rdx, 32 | |
| movss xmm4, dword ptr [rsi + 4*rdi] # xmm4 = mem[0],zero,zero,zero | |
| insertps xmm4, dword ptr [rsi + 4*rax], 16 # xmm4 = xmm4[0],mem[0],xmm4[2,3] | |
| insertps xmm4, dword ptr [rsi + 4*rbp], 32 # xmm4 = xmm4[0,1],mem[0],xmm4[3] | |
| insertps xmm4, dword ptr [rsi + 4*rdx], 48 # xmm4 = xmm4[0,1,2],mem[0] | |
| mulps xmm1, xmmword ptr [r9 - 48] | |
| mulps xmm2, xmmword ptr [r9 - 32] | |
| mulps xmm3, xmmword ptr [r9 - 16] | |
| mulps xmm4, xmmword ptr [r9] | |
| addps xmm11, xmm1 | |
| addps xmm12, xmm2 | |
| addps xmm10, xmm3 | |
| addps xmm9, xmm4 | |
| paddd xmm5, xmm8 | |
| paddd xmm6, xmm8 | |
| paddd xmm7, xmm8 | |
| paddd xmm0, xmm8 | |
| add r9, 64 | |
| dec r11d | |
| jne .LBB5_4 | |
| # BB#5: # in Loop: Header=BB5_2 Depth=1 | |
| add rbx, qword ptr [rsp - 40] # 8-byte Folded Reload | |
| jmp .LBB5_7 | |
| .p2align 4, 0x90 | |
| .LBB5_6: # in Loop: Header=BB5_2 Depth=1 | |
| xorps xmm9, xmm9 | |
| xorps xmm10, xmm10 | |
| xorps xmm12, xmm12 | |
| xorps xmm11, xmm11 | |
| .LBB5_7: # in Loop: Header=BB5_2 Depth=1 | |
| roundps xmm0, xmm11, 8 | |
| roundps xmm1, xmm12, 8 | |
| cvtps2dq xmm0, xmm0 | |
| cvtps2dq xmm1, xmm1 | |
| packusdw xmm0, xmm1 | |
| packuswb xmm0, xmm0 | |
| roundps xmm1, xmm10, 8 | |
| roundps xmm2, xmm9, 8 | |
| cvtps2dq xmm1, xmm1 | |
| cvtps2dq xmm2, xmm2 | |
| packusdw xmm1, xmm2 | |
| packuswb xmm1, xmm1 | |
| punpcklqdq xmm0, xmm1 # xmm0 = xmm0[0],xmm1[0] | |
| movdqu xmmword ptr [r10 + r15], xmm0 | |
| cmp rbx, qword ptr [rsp - 64] # 8-byte Folded Reload | |
| mov ebp, 0 | |
| cmove rbx, rbp | |
| add r15, 16 | |
| cmp r15, qword ptr [rsp - 24] # 8-byte Folded Reload | |
| jl .LBB5_2 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.11.25503\bin\HostX86\x64\CL.exe" /c /IC:\Users\yoffy\src\libiqo\include /IC:\Users\yoffy\src\libiqo\src\..\include /Zi /nologo /W3 /WX- /Ox /Ob2 /D WIN32 /D _WINDOWS /D NDEBUG /D "CMAKE_INTDIR=\"Release\"" /D _MBCS /Gm- /EHsc /MT /GS- /fp:fast /Zc:wchar_t /Zc:forScope /Zc:inline /GR /Gd /TP /FAu C:\Users\yoffy\src\libiqo\src\IQOAreaResizerImpl_SSE4_1.cpp | |
| $LL4@resizeXmai: | |
| movaps xmm6, xmm5 | |
| movaps xmm7, xmm5 | |
| movaps xmm8, xmm5 | |
| movaps xmm9, xmm5 | |
| movdqu xmm11, XMMWORD PTR [rbp-32] | |
| movdqu xmm12, XMMWORD PTR [rbp-16] | |
| movdqu xmm13, XMMWORD PTR [rbp] | |
| movdqu xmm14, XMMWORD PTR [rbp+16] | |
| test r8d, r8d | |
| jle $LN6@resizeXmai | |
| movdqa xmm10, XMMWORD PTR __xmm@00000001000000010000000100000001 | |
| lea r13, QWORD PTR [r14+8] | |
| mov eax, r8d | |
| lea r13, QWORD PTR [rsi+r13*4] | |
| shl rax, 4 | |
| add r14, rax | |
| mov r15d, r8d | |
| npad 1 | |
| $LL7@resizeXmai: | |
| pextrq rdx, xmm11, 1 | |
| movq r8, xmm11 | |
| movq r10, xmm12 | |
| pextrq r9, xmm12, 1 | |
| pextrq r11, xmm13, 1 | |
| movq rbx, xmm13 | |
| mov rax, rdx | |
| shr rax, 32 ; 00000020H | |
| movq rsi, xmm14 | |
| movsxd rcx, eax | |
| movdqa xmm1, xmm10 | |
| movsxd rax, edx | |
| paddd xmm1, xmm12 | |
| pextrq rdi, xmm14, 1 | |
| movdqa xmm12, xmm1 | |
| movss xmm4, DWORD PTR [r12+rcx*4] | |
| movss xmm0, DWORD PTR [r12+rax*4] | |
| mov rax, r8 | |
| shr rax, 32 ; 00000020H | |
| movsxd rcx, eax | |
| movsxd rax, r8d | |
| movss xmm2, DWORD PTR [r12+rax*4] | |
| mov rax, r9 | |
| insertps xmm2, DWORD PTR [r12+rcx*4], 16 | |
| insertps xmm2, xmm0, 32 ; 00000020H | |
| insertps xmm2, xmm4, 48 ; 00000030H; Line 375 | |
| mulps xmm2, XMMWORD PTR [r13-32] | |
| shr rax, 32 ; 00000020H | |
| movsxd rcx, eax | |
| movsxd rax, r9d | |
| addps xmm6, xmm2 | |
| movss xmm4, DWORD PTR [r12+rcx*4] | |
| movss xmm0, DWORD PTR [r12+rax*4] | |
| mov rax, r10 | |
| shr rax, 32 ; 00000020H | |
| movsxd rcx, eax | |
| movsxd rax, r10d | |
| movss xmm2, DWORD PTR [r12+rax*4] | |
| mov rax, r11 | |
| insertps xmm2, DWORD PTR [r12+rcx*4], 16 | |
| insertps xmm2, xmm0, 32 ; 00000020H | |
| insertps xmm2, xmm4, 48 ; 00000030H | |
| mulps xmm2, XMMWORD PTR [r13-16] | |
| shr rax, 32 ; 00000020H | |
| movsxd rcx, eax | |
| movsxd rax, r11d | |
| addps xmm7, xmm2 | |
| movss xmm4, DWORD PTR [r12+rcx*4] | |
| movss xmm0, DWORD PTR [r12+rax*4] | |
| mov rax, rbx | |
| shr rax, 32 ; 00000020H | |
| movsxd rcx, eax | |
| movsxd rax, ebx | |
| movss xmm2, DWORD PTR [r12+rax*4] | |
| mov rax, rdi | |
| insertps xmm2, DWORD PTR [r12+rcx*4], 16 | |
| insertps xmm2, xmm0, 32 ; 00000020H | |
| insertps xmm2, xmm4, 48 ; 00000030H | |
| mulps xmm2, XMMWORD PTR [r13] | |
| shr rax, 32 ; 00000020H | |
| movsxd rcx, eax | |
| movsxd rax, edi | |
| addps xmm8, xmm2 | |
| movss xmm4, DWORD PTR [r12+rcx*4] | |
| movss xmm0, DWORD PTR [r12+rax*4] | |
| mov rax, rsi | |
| shr rax, 32 ; 00000020H | |
| movsxd rcx, eax | |
| movsxd rax, esi | |
| movss xmm2, DWORD PTR [r12+rax*4] | |
| insertps xmm2, DWORD PTR [r12+rcx*4], 16 | |
| insertps xmm2, xmm0, 32 ; 00000020H | |
| movdqa xmm0, xmm10 | |
| paddd xmm0, xmm11 | |
| movdqa xmm11, xmm0 | |
| movdqa xmm0, xmm10 | |
| insertps xmm2, xmm4, 48 ; 00000030H | |
| mulps xmm2, XMMWORD PTR [r13+16] | |
| paddd xmm0, xmm13 | |
| movdqa xmm13, xmm0 | |
| addps xmm9, xmm2 | |
| movdqa xmm1, xmm10 | |
| add r13, 64 ; 00000040H | |
| paddd xmm1, xmm14 | |
| movdqa xmm14, xmm1 | |
| sub r15, 1 | |
| jne $LL7@resizeXmai | |
| mov r13, QWORD PTR tv3180[rsp] | |
| mov rsi, QWORD PTR coefs$1$[rsp] | |
| mov r8d, DWORD PTR numCoefsX$1$[rsp] | |
| mov rcx, QWORD PTR tv3026[rsp] | |
| $LN6@resizeXmai: | |
| xor eax, eax | |
| add rbp, 64 ; 00000040H | |
| roundps xmm0, xmm7, 8 | |
| cvtps2dq xmm2, xmm0 | |
| roundps xmm1, xmm6, 8 | |
| cvtps2dq xmm4, xmm1 | |
| roundps xmm0, xmm9, 8 | |
| packusdw xmm4, xmm2 | |
| roundps xmm1, xmm8, 8 | |
| cvtps2dq xmm2, xmm0 | |
| cvtps2dq xmm3, xmm1 | |
| packuswb xmm4, xmm4 | |
| packusdw xmm3, xmm2 | |
| packuswb xmm3, xmm3 | |
| punpcklqdq xmm4, xmm3 | |
| movdqu XMMWORD PTR [r13], xmm4 | |
| add r13, 16 | |
| cmp r14, rcx | |
| mov QWORD PTR tv3180[rsp], r13 | |
| cmovne rax, r14 | |
| sub QWORD PTR tv3017[rsp], 1 | |
| mov r14, rax | |
| jne $LL4@resizeXmai |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment