Last active
October 12, 2017 07:00
-
-
Save Anime4000/7ec8a06f848427d0115f3d566957aba6 to your computer and use it in GitHub Desktop.
Do floating point math by using AVX feature in x86 CPU
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Copyright (c) CureComp Technology Trading & Services | |
Generic vs AVX | |
-------------- | |
Show how fast and how accurate CPU processing, | |
3 GHz Generic vs 1.8 GHz AVX | |
*/ | |
#include <immintrin.h> | |
#include <stdio.h> | |
#include <time.h> | |
int main() | |
{ | |
void multiply_and_add(const float* a, const float* b, const float* c, float* d); | |
__m256 multiply_and_add(__m256 a, __m256 b, __m256 c); | |
clock_t start, end, _start, _end; | |
double cpu_time, _cpu_time; | |
float a[8] = { 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8 }; | |
float b[8] = { 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7 }; | |
float c[8] = { 1, 2, 3, 4, 5, 6, 7, 8 }; | |
float d[8]; | |
start = clock(); | |
multiply_and_add(a,b,c,d); | |
end = clock(); | |
__m256 _a = _mm256_set_ps(1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8); | |
__m256 _b = _mm256_set_ps(2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7); | |
__m256 _c = _mm256_set_ps(1, 2, 3, 4, 5, 6, 7, 8); | |
_start = clock(); | |
__m256 result = multiply_and_add(_a,_b,_c); | |
_end = clock(); | |
float* f = (float*)&result; | |
cpu_time = ((double)(end - start)) / CLOCKS_PER_SEC; | |
_cpu_time = ((double)(_end - _start)) / CLOCKS_PER_SEC; | |
printf("Compare Non-AVX CPU (Core i3 1st Gen) vs AVX CPU (Core i3 2nd Gen)\n"); | |
printf("\nResults (No AVX):\n"); | |
for (int i = 0; i < 8; i++) { | |
printf("%f ", d[i]); | |
} | |
printf("\nResults (AVX):\n"); | |
for (int i = 7; 0 <= i; i--) { | |
printf("%f ", f[i]); | |
} | |
printf("\n\n"); | |
return 0; | |
} | |
void multiply_and_add(const float* a, const float* b, const float* c, float* d) { | |
for (int i = 0; i < 8; i++) { | |
d[i] = a[i] * b[i]; | |
d[i] = d[i] + c[i]; | |
} | |
} | |
__m256 multiply_and_add(__m256 a, __m256 b, __m256 c) { | |
return _mm256_fmadd_ps(a, b, c); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment