Skip to content

Instantly share code, notes, and snippets.

@horitaku1124
Last active September 22, 2019 02:12
Show Gist options
  • Save horitaku1124/4332ec84e14985b32aa5f15921ad9caa to your computer and use it in GitHub Desktop.
Save horitaku1124/4332ec84e14985b32aa5f15921ad9caa to your computer and use it in GitHub Desktop.
gcc -O2 -mavx512f vector.c -fopt-info-vec-all -S -o vector.s
gcc -O2 -mavx512f vector.c -fopt-info-vec-all -c  -o vector
gcc -O0 -mavx512f test3.c -fopt-info-vec-all -o test3 vector
#include<stdio.h>
#include<x86intrin.h>
#include "vector.h"
int main(void)
{
float a[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
float b[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
printf("%f\n", sumProduct(a, b, 16));
return 0;
}
#include "vector.h"
#include <immintrin.h>
float sumProduct(float* vec1, float* vec2, int num)
{
__m512 avx_sum = _mm512_setzero_ps();
for (int i = 0;i < num;i += 16) {
const __m512 a512 = _mm512_loadu_ps((double*)&vec1[i]);
const __m512 b512 = _mm512_loadu_ps((double*)&vec2[i]);
avx_sum = _mm512_fmadd_ps(a512, b512, avx_sum);
}
float __attribute__((aligned(32))) out[16] = {};
_mm512_storeu_ps(out, avx_sum);
float sum = 0;
for (int i = 0;i < 16;i++) {
sum += out[i];
}
return sum;
}
float sumProduct(float* vec1, float* vec2, int num);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment