Skip to content

Instantly share code, notes, and snippets.

@morris821028
Last active May 16, 2017 05:23
Show Gist options
  • Save morris821028/1ffe3c81a0e8c1d207da2d82678c984e to your computer and use it in GitHub Desktop.
Save morris821028/1ffe3c81a0e8c1d207da2d82678c984e to your computer and use it in GitHub Desktop.
[SSE Acc] Pi - Monte Carlo method
all: pi
pi: pi.c
gcc-4.8 -Wall -std=c99 -O2 pi.c -o pi-4.8
gcc-5 -std=c99 -O2 pi.c -o pi-5
gcc-6 -std=c99 -O2 -march=native -msse -mavx pi.c -o pi-6
gcc-4.8 -Wall -std=c99 -O2 -msse -msse2 -mavx pi-sse.c -o pi-sse
clang-3.8 -O2 pi.c -o pi-clang
#include <stdio.h>
#include <stdint.h>
#include <x86intrin.h>
int main() {
const int r = 20000;
const int rr = r * r;
int c = 0; double a;
__m128i sse_ret = _mm_set_epi32(0, 0, 0, 0);
for (int x = 0; x < r; x++) {
int tr = rr-x*x;
__m128i sse_iy = _mm_set_epi32(0, 1, 2, 3);
__m128i sse_ic = _mm_set_epi32(4, 4, 4, 4);
__m128i sse_i16= _mm_set_epi32(16, 16, 16, 16);
__m128i sse_sy = _mm_set_epi32(0, 1, 4, 9);
__m128i sse_i1 = _mm_set_epi32(1, 1, 1, 1);
__m128i sse_bb = _mm_set_epi32(tr, tr, tr, tr);
for (int it = 0; it < r/4; it++) {
__m128i res = _mm_and_si128(_mm_cmplt_epi32(sse_sy, sse_bb), sse_i1);
sse_ret = _mm_add_epi32(sse_ret, res);
sse_sy = _mm_add_epi32(sse_sy, _mm_slli_epi32(sse_iy, 3));
sse_sy = _mm_add_epi32(sse_sy, sse_i16);
sse_iy = _mm_add_epi32(sse_iy, sse_ic);
}
/*
for (int y = 0; y < r; y++) {
if (y*y < tr)
c++;
}
*/
}
{
static int32_t tmp[4] __attribute__ ((aligned (16)));
_mm_store_si128((__m128i*) &tmp[0], sse_ret);
c = tmp[0] + tmp[1] + tmp[2] + tmp[3];
}
a = c * 4. / rr;
printf("%.8f\n", a);
return 0;
}
#include <stdio.h>
int main() {
const int r = 20000;
const int rr = r * r;
int c = 0; double a;
for (int x = 0; x < r; x++) {
int tr = rr-x*x;
for (int y = 0; y < r; y++) {
if (y*y < tr)
c++;
}
}
a = c * 4. / rr;
printf("%.8f\n", a);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment