Skip to content

Instantly share code, notes, and snippets.

@nagadomi
Created June 10, 2016 10:43
Show Gist options
  • Save nagadomi/a75bc3674c347cbcac4086925736e694 to your computer and use it in GitHub Desktop.
Save nagadomi/a75bc3674c347cbcac4086925736e694 to your computer and use it in GitHub Desktop.
// __attribute__((aligned(16))) float input[n], weight[n];
float dot_sse(float *input, float *weight, int n)
{
__attribute__((aligned(16))) float mm[4] = {0};
__m128 x, u;
int pk_lp = (n & 0xfffffffc);
int i;
float sum = 0;
u = _mm_setzero_ps();
for (i = 0; i < pk_lp; i += 4) {
u = _mm_add_ps(u, _mm_mul_ps(*(const __m128*)&input[i], *(const __m128*)&weight[i]));
}
_mm_store_ps(mm, u);
sum = mm[0] + mm[1] + mm[2] + mm[3];
for (i = pk_lp; i < n; ++i) {
sum += input[i] * weight[i];
}
return sum;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment