Created
August 15, 2015 07:12
-
-
Save goldshtn/2c12eb8fb16b1698f5b3 to your computer and use it in GitHub Desktop.
Dot product of shorts and floats: C# vs. C++
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; C#, RyuJIT, release, x64 | |
00007ffe`fc6808ec 4c63c8 movsxd r9,eax ; loop: r9 = i | |
00007ffe`fc6808ef 4e0fbf4c4910 movsx r9,word ptr [rcx+r9*2+10h] ; r9 = a[i] | |
00007ffe`fc6808f5 f3410f2ac9 cvtsi2ss xmm1,r9d ; xmm1 = (float)r9d | |
00007ffe`fc6808fa 4c63c8 movsxd r9,eax ; r9 = i | |
00007ffe`fc6808fd f3420f594c8a10 mulss xmm1,dword ptr [rdx+r9*4+10h] ; xmm1 *= b[i] | |
00007ffe`fc680904 f30f58c1 addss xmm0,xmm1 ; sum += xmm1 | |
00007ffe`fc680908 ffc0 inc eax ; ++i | |
00007ffe`fc68090a 443bc0 cmp r8d,eax ; if i < a.Length | |
00007ffe`fc68090d 7fdd jg 00007ffe`fc6808ec ; goto loop | |
00007ffe`fc68090f eb2c jmp 00007ffe`fc68093d ; goto done | |
; VC++ 2015, release, x64 | |
00007fff`4f501090 0fbf43fc movsx eax,word ptr [rbx-4] ; eax = &a[rbx-2] | |
00007fff`4f501094 660f6ec8 movd xmm1,eax ; xmm1 = eax | |
00007fff`4f501098 0fbf43fe movsx eax,word ptr [rbx-2] ; eax = &a[rbx-1] | |
00007fff`4f50109c 0f5bc9 cvtdq2ps xmm1,xmm1 ; xmm1 = (float)xmm1 | |
00007fff`4f50109f 660f6ec0 movd xmm0,eax ; xmm0 = eax | |
00007fff`4f5010a3 f3410f594bf8 mulss xmm1,dword ptr [r11-8] ; xmm1 *= &b[r11-2] | |
00007fff`4f5010a9 0fbf03 movsx eax,word ptr [rbx] ; eax = &a[rbx] | |
00007fff`4f5010ac 0f5bc0 cvtdq2ps xmm0,xmm0 ; xmm0 = (float)xmm0 | |
00007fff`4f5010af f30f58ca addss xmm1,xmm2 ; xmm1 += sum | |
00007fff`4f5010b3 f3410f5943fc mulss xmm0,dword ptr [r11-4] ; xmm0 *= &b[r11-1] | |
00007fff`4f5010b9 0f28d1 movaps xmm2,xmm1 ; sum = xmm1 | |
00007fff`4f5010bc 660f6ec8 movd xmm1,eax ; xmm1 = eax | |
00007fff`4f5010c0 0fbf4302 movsx eax,word ptr [rbx+2] ; eax = &a[rbx+1] | |
00007fff`4f5010c4 f30f58d0 addss xmm2,xmm0 ; sum += xmm0 | |
00007fff`4f5010c8 0f5bc9 cvtdq2ps xmm1,xmm1 ; xmm1 = (float)xmm1 | |
00007fff`4f5010cb 4883c308 add rbx,8 ; rbx += 8 | |
00007fff`4f5010cf 660f6ec0 movd xmm0,eax ; xmm0 = eax | |
00007fff`4f5010d3 f3410f590b mulss xmm1,dword ptr [r11] ; xmm1 *= &b[r11] | |
00007fff`4f5010d8 0f5bc0 cvtdq2ps xmm0,xmm0 ; xmm0 = (float)xmm0 | |
00007fff`4f5010db f30f58d1 addss xmm2,xmm1 ; sum += xmm1 | |
00007fff`4f5010df f3410f594304 mulss xmm0,dword ptr [r11+4] ; xmm0 *= &b[r11+1] | |
00007fff`4f5010e5 4983c310 add r11,10h ; r11 += 16 | |
00007fff`4f5010e9 f30f58d0 addss xmm2,xmm0 ; sum += xmm0 | |
00007fff`4f5010ed 4983e801 sub r8,1 ; --r8 | |
00007fff`4f5010f1 759d jne 00007fff`4f501090 ; if not zero goto loop |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment