culurciello · January 3, 2016 01:29 · jeffhammond · Jan 13, 2014
diff --git a/mactest.c b/mactest.c
 /*
 Test of C code speed
 compile with: gcc -Ofast -fopenmp -mavx mactest.c
 or gfortran -O3 -fopenmp mactest.c 
 */
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <fcntl.h>
 #include <sys/time.h>
 #include <math.h>
 #include <omp.h>
 
 int main()
 {
  int i,j,k,l;
  // nb of operations:
  int dsize = 1024*64;
  int nthreads = 8;
  int vcrz = 8; // vectorize by #
  int nbOfAverages = 1024;
  int opsMAC = 2; // operations per MAC
  float a[nthreads][dsize], b[nthreads][dsize];
  long tops; //total ops
 
  struct timeval start,end;
  gettimeofday(&start, NULL);
 
  // this to test if all threads are running:
  //for (l=1;l<1000000;l++) {
 
 #pragma omp parallel for private (i,j,k)
  for (k=0; k<nthreads; k++) {
    //printf("Hello from thread %d, nthreads %d\n", omp_get_thread_num(), omp_get_num_threads());
    for(i=0;i<nbOfAverages;i++) {
      for (j = 0; j < (dsize-vcrz+1)/vcrz; j=j+vcrz) {
 //#pragma simd
 //#pragma vector aligned 
         a[k][j] = a[k][j] * b[k][j]; // MAC operations
         a[k][j+1] = a[k][j+1] * b[k][j+1];
         a[k][j+2] = a[k][j+2] * b[k][j+2];
         a[k][j+3] = a[k][j+3] * b[k][j+3];
         a[k][j+4] = a[k][j+4] * b[k][j+4];
         a[k][j+5] = a[k][j+5] * b[k][j+5];
         a[k][j+6] = a[k][j+6] * b[k][j+6];
         a[k][j+7] = a[k][j+7] * b[k][j+7];
      }
    }
  }
  //}
 
  gettimeofday(&end, NULL);
  double t = ((double) (end.tv_sec - start.tv_sec))
    + ((double) (end.tv_usec - start.tv_usec)) / 1e6; //reports time in [s] - verified!
 
  // report performance:
  tops = nthreads * opsMAC * nbOfAverages * dsize; // total ops
  //printf("\nclockstart, clockedn, CLOCKS_PER_SEC: %ld,%ld,%d", start, end, CLOCKS_PER_SEC);
  printf("\nTotal ops = %d, # of treads = %d", tops, nthreads);
  printf("\nTime in s: %lf:", t);
  printf("\nTest performance [G OP/s] %lf:", tops/t/1e9);
  printf("\n");
  return(0);
 }
	/*
	Test of C code speed
	compile with: gcc -Ofast -fopenmp -mavx mactest.c
	or gfortran -O3 -fopenmp mactest.c
	*/

	#include <stdio.h>
	#include <stdlib.h>
	#include <fcntl.h>
	#include <sys/time.h>
	#include <math.h>
	#include <omp.h>

	int main()
	{
	int i,j,k,l;
	// nb of operations:
	int dsize = 1024*64;
	int nthreads = 8;
	int vcrz = 8; // vectorize by #
	int nbOfAverages = 1024;
	int opsMAC = 2; // operations per MAC
	float a[nthreads][dsize], b[nthreads][dsize];
	long tops; //total ops

	struct timeval start,end;
	gettimeofday(&start, NULL);

	// this to test if all threads are running:
	//for (l=1;l<1000000;l++) {

	#pragma omp parallel for private (i,j,k)
	for (k=0; k<nthreads; k++) {
	//printf("Hello from thread %d, nthreads %d\n", omp_get_thread_num(), omp_get_num_threads());
	for(i=0;i<nbOfAverages;i++) {
	for (j = 0; j < (dsize-vcrz+1)/vcrz; j=j+vcrz) {
	//#pragma simd
	//#pragma vector aligned
	a[k][j] = a[k][j] * b[k][j]; // MAC operations
	a[k][j+1] = a[k][j+1] * b[k][j+1];
	a[k][j+2] = a[k][j+2] * b[k][j+2];
	a[k][j+3] = a[k][j+3] * b[k][j+3];
	a[k][j+4] = a[k][j+4] * b[k][j+4];
	a[k][j+5] = a[k][j+5] * b[k][j+5];
	a[k][j+6] = a[k][j+6] * b[k][j+6];
	a[k][j+7] = a[k][j+7] * b[k][j+7];
	}
	}
	}
	//}

	gettimeofday(&end, NULL);
	double t = ((double) (end.tv_sec - start.tv_sec))
	+ ((double) (end.tv_usec - start.tv_usec)) / 1e6; //reports time in [s] - verified!

	// report performance:
	tops = nthreads * opsMAC * nbOfAverages * dsize; // total ops
	//printf("\nclockstart, clockedn, CLOCKS_PER_SEC: %ld,%ld,%d", start, end, CLOCKS_PER_SEC);
	printf("\nTotal ops = %d, # of treads = %d", tops, nthreads);
	printf("\nTime in s: %lf:", t);
	printf("\nTest performance [G OP/s] %lf:", tops/t/1e9);
	printf("\n");
	return(0);
	}
No results found