Last active
May 16, 2017 05:23
-
-
Save morris821028/1ffe3c81a0e8c1d207da2d82678c984e to your computer and use it in GitHub Desktop.
[SSE Acc] Pi - Monte Carlo method
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
all: pi | |
pi: pi.c | |
gcc-4.8 -Wall -std=c99 -O2 pi.c -o pi-4.8 | |
gcc-5 -std=c99 -O2 pi.c -o pi-5 | |
gcc-6 -std=c99 -O2 -march=native -msse -mavx pi.c -o pi-6 | |
gcc-4.8 -Wall -std=c99 -O2 -msse -msse2 -mavx pi-sse.c -o pi-sse | |
clang-3.8 -O2 pi.c -o pi-clang | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdint.h> | |
#include <x86intrin.h> | |
int main() { | |
const int r = 20000; | |
const int rr = r * r; | |
int c = 0; double a; | |
__m128i sse_ret = _mm_set_epi32(0, 0, 0, 0); | |
for (int x = 0; x < r; x++) { | |
int tr = rr-x*x; | |
__m128i sse_iy = _mm_set_epi32(0, 1, 2, 3); | |
__m128i sse_ic = _mm_set_epi32(4, 4, 4, 4); | |
__m128i sse_i16= _mm_set_epi32(16, 16, 16, 16); | |
__m128i sse_sy = _mm_set_epi32(0, 1, 4, 9); | |
__m128i sse_i1 = _mm_set_epi32(1, 1, 1, 1); | |
__m128i sse_bb = _mm_set_epi32(tr, tr, tr, tr); | |
for (int it = 0; it < r/4; it++) { | |
__m128i res = _mm_and_si128(_mm_cmplt_epi32(sse_sy, sse_bb), sse_i1); | |
sse_ret = _mm_add_epi32(sse_ret, res); | |
sse_sy = _mm_add_epi32(sse_sy, _mm_slli_epi32(sse_iy, 3)); | |
sse_sy = _mm_add_epi32(sse_sy, sse_i16); | |
sse_iy = _mm_add_epi32(sse_iy, sse_ic); | |
} | |
/* | |
for (int y = 0; y < r; y++) { | |
if (y*y < tr) | |
c++; | |
} | |
*/ | |
} | |
{ | |
static int32_t tmp[4] __attribute__ ((aligned (16))); | |
_mm_store_si128((__m128i*) &tmp[0], sse_ret); | |
c = tmp[0] + tmp[1] + tmp[2] + tmp[3]; | |
} | |
a = c * 4. / rr; | |
printf("%.8f\n", a); | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
int main() { | |
const int r = 20000; | |
const int rr = r * r; | |
int c = 0; double a; | |
for (int x = 0; x < r; x++) { | |
int tr = rr-x*x; | |
for (int y = 0; y < r; y++) { | |
if (y*y < tr) | |
c++; | |
} | |
} | |
a = c * 4. / rr; | |
printf("%.8f\n", a); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment