Created
November 8, 2014 06:09
-
-
Save PkmX/b4c4e488e7a6f7717a7b to your computer and use it in GitHub Desktop.
Parallel Programming Homework 1 - Monte Carlo π estimation with pthread
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CC := gcc | |
CFLAGS := -std=gnu99 -Wall -pedantic -pthread -Ofast -march=native -funroll-loops -ftree-vectorize | |
BIN := pi | |
SRCS := pi.c | |
ZIP := HW1_0356162.zip | |
.PHONY: all clean zip | |
all: $(BIN) | |
$(BIN): $(SRCS) | |
$(CC) $(CFLAGS) $^ -o $@ | |
clean: | |
-rm $(BIN) | |
zip: $(ZIP) | |
$(ZIP): $(SRCS) Makefile | |
zip $@ $^ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <assert.h> | |
#include <stdint.h> | |
#include <stdlib.h> | |
#include <stdio.h> | |
#include <sys/sysinfo.h> | |
#include <pthread.h> | |
typedef int32_t intmindiv; | |
float intmindiv_generate(intmindiv* const rng) { | |
*rng *= 16807; | |
return (float) (*rng) / (float) 0x80000000; | |
} | |
uint64_t do_toss_impl(const uint64_t tosses) { | |
unsigned seed = time(NULL); | |
intmindiv rng = rand_r(&seed); | |
uint64_t in_circle = 0; | |
for (uint64_t i = 0; i < tosses; ++i) { | |
const float x = intmindiv_generate(&rng); | |
const float y = intmindiv_generate(&rng); | |
const float distance = x * x + y * y; | |
in_circle += (distance < 1.0f) ? 1 : 0; | |
} | |
return in_circle; | |
} | |
#ifdef __AVX__ | |
typedef int32_t v4si __attribute__ ((vector_size(16))); | |
typedef int32_t v8si __attribute__ ((vector_size(32))); | |
typedef float v8sf __attribute__ ((vector_size(32))); | |
typedef v8si intmindiv8; | |
v8sf intmindiv8_generate(intmindiv8* const rng) { | |
*rng *= 16807; | |
return __builtin_ia32_cvtdq2ps256(*rng) / (float) 0x80000000; | |
} | |
int v8si_hsum(const v8si v) { | |
const v4si u = __builtin_ia32_vextractf128_si256(v, 0) + __builtin_ia32_vextractf128_si256(v, 1); | |
const v4si w = __builtin_ia32_phaddd128(u, u); | |
return __builtin_ia32_phaddd128(w, w)[0]; | |
} | |
uint64_t do_toss_impl_avx(const uint64_t tosses) { | |
unsigned seed = time(NULL); | |
intmindiv8 vrng = { rand_r(&seed), rand_r(&seed), rand_r(&seed), rand_r(&seed), rand_r(&seed), rand_r(&seed), rand_r(&seed), rand_r(&seed) }; | |
v8si in_circles = {0, 0, 0, 0, 0, 0, 0, 0}; | |
for (uint64_t i = 0; i < tosses / 8; ++i) { | |
const v8sf vx = intmindiv8_generate(&vrng); | |
const v8sf vy = intmindiv8_generate(&vrng); | |
const v8sf vdistance = vx * vx + vy * vy; | |
const v8sf vradius = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; | |
in_circles -= (v8si) __builtin_ia32_cmpps256(vdistance, vradius, 1); | |
} | |
return v8si_hsum(in_circles) + do_toss_impl(tosses % 8); | |
} | |
#endif // __AVX__ | |
typedef struct toss_args_t { | |
uint64_t tosses; | |
uint64_t in_circle; | |
} toss_args; | |
void* do_toss(toss_args* const args) { | |
#ifdef __AVX__ | |
args->in_circle = do_toss_impl_avx(args->tosses); | |
#else | |
args->in_circle = do_toss_impl(args->tosses); | |
#endif | |
return NULL; | |
} | |
int main(const int argc, const char * const * const argv) { | |
assert(argc >= 2); | |
const int nprocs = argc >= 3 ? atoi(argv[2]) : get_nprocs_conf(); | |
const uint64_t tosses = atoll(argv[1]); | |
pthread_t threads[nprocs]; | |
toss_args args[nprocs]; | |
for (int n = 0; n < nprocs; ++n) { | |
args[n].tosses = tosses / nprocs + (n == 0 ? tosses % nprocs : 0); | |
args[n].in_circle = 0; | |
pthread_create(&threads[n], NULL, (void* (*)(void*)) do_toss, &args[n]); | |
} | |
uint64_t in_circle = 0; | |
for (int n = 0; n < nprocs; ++n) { | |
pthread_join(threads[n], NULL); | |
in_circle += args[n].in_circle; | |
} | |
const double pi = 4.0 * ((double) in_circle / (double) tosses); | |
printf("%.20g\n", pi); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment