Last active
August 29, 2015 14:12
-
-
Save jeromerobert/e4fffc1eb2a9168f6205 to your computer and use it in GitHub Desktop.
OpenBLAS bug #478
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// build with gcc -Ofast -g -lpthread -lblas -lrt openblas-bug478.c or | |
// gcc -Ofast -g -lpthread -lopenblas -lrt openblas-bug478.c | |
// run with OPENBLAS_NUM_THREADS=1 | |
#include <stdlib.h> | |
#include <stdio.h> | |
#include <cblas.h> | |
#include <time.h> | |
#include <pthread.h> | |
// number of loop for a 1x1 matrix. Change it if the test is | |
// too slow on you computer. | |
#define NLOOP 20e9 | |
typedef struct { | |
int matrix_size; | |
int n_loop; | |
int threaded; | |
} BenchParam; | |
void * dgemv_bench(BenchParam * param) | |
{ | |
int i, n; | |
n = param->n_loop; | |
int size = param->matrix_size; | |
double v = 1.01; | |
int one = 1; | |
double * A = calloc(size*size, sizeof(double)); | |
double * x = calloc(size, sizeof(double)); | |
double * y = calloc(size, sizeof(double)); | |
for(i = 0; i < size; i++) | |
y[i] = i; | |
for(i = 0; i < size * size; i++) | |
A[i] = i; | |
for(i = 0; i < param->n_loop; i++) | |
{ | |
dgemv_("N", &size, &size, &v, A, &size, y, &one, &v, y, &one); | |
} | |
if(param->threaded) | |
pthread_exit(NULL); | |
return NULL; | |
} | |
double thread_bench(int nloop, int nb_threads, int matrix_size, double reftime) | |
{ | |
BenchParam param; | |
pthread_t threads[nb_threads]; | |
int t, rc; | |
struct timespec tick, tock; | |
param.matrix_size = matrix_size; | |
clock_gettime(CLOCK_MONOTONIC, &tick); | |
param.threaded = 1; | |
for(t=0; t<nb_threads; t++){ | |
param.n_loop = nloop / nb_threads; | |
rc = pthread_create(&threads[t], NULL, dgemv_bench, ¶m); | |
if (rc){ | |
printf("ERROR; return code from pthread_create() is %d\n", rc); | |
exit(-1); | |
} | |
} | |
for(t=0; t<nb_threads; t++){ | |
pthread_join(threads[t], NULL); | |
} | |
clock_gettime(CLOCK_MONOTONIC, &tock); | |
double dt = (tock.tv_sec - tick.tv_sec) + (tock.tv_nsec - tick.tv_nsec) / 1e9; | |
if(reftime > 0) { | |
printf("Nb threads %d, matrix size %d, time %g, speedup %g\n", nb_threads, matrix_size, dt, reftime/dt); | |
return reftime; | |
} | |
else { | |
printf("Nb threads %d, matrix size %d, time %g\n", nb_threads, matrix_size, dt); | |
return dt; | |
} | |
} | |
int main(int argc, char * argv[]) { | |
int i, j; | |
struct timespec tick, tock; | |
int nb_threads[5] = {1, 2, 4, 6, 12}; | |
int matrix_sizes[5] = {20, 40, 60, 80, 200}; | |
for(j = 0; j < 5; j++) | |
{ | |
double ms = matrix_sizes[j]; | |
double reftime = -1; | |
for(i = 0; i < 5; i++) | |
reftime = thread_bench((int)(NLOOP/(ms*ms)), nb_threads[i], ms, reftime); | |
puts(""); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Nb threads 1, matrix size 20, time 11.1883 | |
Nb threads 2, matrix size 20, time 16.9531, speedup 0.659958 | |
Nb threads 4, matrix size 20, time 14.639, speedup 0.764281 | |
Nb threads 6, matrix size 20, time 15.0962, speedup 0.741134 | |
Nb threads 12, matrix size 20, time 13.8431, speedup 0.808224 | |
Nb threads 1, matrix size 40, time 6.76891 | |
Nb threads 2, matrix size 40, time 5.63293, speedup 1.20167 | |
Nb threads 4, matrix size 40, time 4.39875, speedup 1.53883 | |
Nb threads 6, matrix size 40, time 4.02665, speedup 1.68103 | |
Nb threads 12, matrix size 40, time 3.49245, speedup 1.93815 | |
Nb threads 1, matrix size 60, time 6.84569 | |
Nb threads 2, matrix size 60, time 3.9338, speedup 1.74022 | |
Nb threads 4, matrix size 60, time 2.27475, speedup 3.00943 | |
Nb threads 6, matrix size 60, time 1.8754, speedup 3.65024 | |
Nb threads 12, matrix size 60, time 1.81147, speedup 3.77907 | |
Nb threads 1, matrix size 80, time 6.14711 | |
Nb threads 2, matrix size 80, time 3.59512, speedup 1.70985 | |
Nb threads 4, matrix size 80, time 2.03429, speedup 3.02175 | |
Nb threads 6, matrix size 80, time 1.32202, speedup 4.6498 | |
Nb threads 12, matrix size 80, time 1.15665, speedup 5.31459 | |
Nb threads 1, matrix size 200, time 6.1784 | |
Nb threads 2, matrix size 200, time 3.37149, speedup 1.83254 | |
Nb threads 4, matrix size 200, time 1.68204, speedup 3.67315 | |
Nb threads 6, matrix size 200, time 1.1164, speedup 5.73275 | |
Nb threads 12, matrix size 200, time 0.636767, speedup 9.70276 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Nb threads 1, matrix size 20, time 21.3955 | |
Nb threads 2, matrix size 20, time 10.7553, speedup 1.9893 | |
Nb threads 4, matrix size 20, time 5.3625, speedup 3.98983 | |
Nb threads 6, matrix size 20, time 3.6023, speedup 5.93941 | |
Nb threads 12, matrix size 20, time 1.89335, speedup 11.3003 | |
Nb threads 1, matrix size 40, time 16.7607 | |
Nb threads 2, matrix size 40, time 8.41217, speedup 1.99243 | |
Nb threads 4, matrix size 40, time 4.19766, speedup 3.99286 | |
Nb threads 6, matrix size 40, time 2.79663, speedup 5.99317 | |
Nb threads 12, matrix size 40, time 1.47789, speedup 11.341 | |
Nb threads 1, matrix size 60, time 15.4368 | |
Nb threads 2, matrix size 60, time 7.7506, speedup 1.99169 | |
Nb threads 4, matrix size 60, time 3.8857, speedup 3.97272 | |
Nb threads 6, matrix size 60, time 4.18131, speedup 3.69185 | |
Nb threads 12, matrix size 60, time 1.32583, speedup 11.6431 | |
Nb threads 1, matrix size 80, time 16.6979 | |
Nb threads 2, matrix size 80, time 8.42563, speedup 1.98179 | |
Nb threads 4, matrix size 80, time 4.20207, speedup 3.97372 | |
Nb threads 6, matrix size 80, time 2.80974, speedup 5.94286 | |
Nb threads 12, matrix size 80, time 1.43335, speedup 11.6495 | |
Nb threads 1, matrix size 200, time 15.2966 | |
Nb threads 2, matrix size 200, time 7.6919, speedup 1.98866 | |
Nb threads 4, matrix size 200, time 3.85008, speedup 3.97305 | |
Nb threads 6, matrix size 200, time 2.70593, speedup 5.65297 | |
Nb threads 12, matrix size 200, time 1.32625, speedup 11.4599 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment