Last active
March 23, 2018 13:28
-
-
Save goldshtn/a1cf8a1d3007bf731224 to your computer and use it in GitHub Desktop.
Vectorized dot product of float arrays in C# and C++
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; C++ core loop, key intrinsic is _mm_dp_ps | |
00007fff`548b10d5 0f100c0a movups xmm1,xmmword ptr [rdx+rcx] ; LOOP | |
00007fff`548b10d9 0f1011 movups xmm2,xmmword ptr [rcx] | |
00007fff`548b10dc 4883c110 add rcx,10h | |
00007fff`548b10e0 660f3a40d1f1 dpps xmm2,xmm1,0F1h | |
00007fff`548b10e6 f30f58c2 addss xmm0,xmm2 | |
00007fff`548b10ea 4983e801 sub r8,1 | |
00007fff`548b10ee 75e5 jne 00007fff`548b10d5 ; LOOP | |
; C# core loop, key intrinsic is Vector.Dot(Vector<float>, Vector<float>); | |
; runs 70% slower than C++ version | |
00007ffe`fc663686 448d5003 lea r10d,[rax+3] ; LOOP | |
00007ffe`fc66368a 453bd0 cmp r10d,r8d | |
00007ffe`fc66368d 7337 jae RANGE_FAIL | |
00007ffe`fc66368f 0f104c8110 movups xmm1,xmmword ptr [rcx+rax*4+10h] | |
00007ffe`fc663694 453bd1 cmp r10d,r9d | |
00007ffe`fc663697 732d jae RANGE_FAIL | |
00007ffe`fc663699 0f10548210 movups xmm2,xmmword ptr [rdx+rax*4+10h] | |
00007ffe`fc66369e 0f59ca mulps xmm1,xmm2 | |
00007ffe`fc6636a1 0f28d9 movaps xmm3,xmm1 | |
00007ffe`fc6636a4 0fc6dbb1 shufps xmm3,xmm3,0B1h | |
00007ffe`fc6636a8 0f58cb addps xmm1,xmm3 | |
00007ffe`fc6636ab 0f28d9 movaps xmm3,xmm1 | |
00007ffe`fc6636ae 0fc6db1b shufps xmm3,xmm3,1Bh | |
00007ffe`fc6636b2 0f58cb addps xmm1,xmm3 | |
00007ffe`fc6636b5 f30f58c1 addss xmm0,xmm1 | |
00007ffe`fc6636b9 83c004 add eax,4 | |
00007ffe`fc6636bc 443bc0 cmp r8d,eax | |
00007ffe`fc6636bf 7fc5 jg 00007ffe`fc663686 ; LOOP |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extern "C" __declspec(dllexport) float vectorized_native_same_type_float(float* a, float* b, int length) | |
{ | |
float aux = 0; | |
for (int i = 0; i < length; i += 4) | |
{ | |
__m128 va = _mm_loadu_ps(a + i); | |
__m128 vb = _mm_loadu_ps(b + i); | |
__m128 dp = _mm_dp_ps(va, vb, 0xF1); | |
aux += dp.m128_f32[0]; | |
} | |
return aux; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static float VectorizedSameTypeFloat(float[] a, float[] b) | |
{ | |
float aux = 0; | |
int vecSize = Vector<float>.Count; | |
for (int i = 0; i < a.Length; i += vecSize) | |
{ | |
Vector<float> va = new Vector<float>(a, i); | |
Vector<float> vb = new Vector<float>(b, i); | |
aux += Vector.Dot(va, vb); | |
} | |
return aux; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment