Last active
February 12, 2019 15:33
-
-
Save TNick/8849997 to your computer and use it in GitHub Desktop.
Some routines to test blas and some code arround it
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Author: Nicu Tofan | |
* License: BSD | |
* | |
* See below for getRealTime() license. | |
*/ | |
#include <cblas.h> | |
#include <stdlib.h> | |
#include <stdio.h> | |
#define test_m 2 | |
#define test_k 3 | |
#define test_n 4 | |
#define test_m_extra 5 | |
#define test_k_extra 7 | |
#define test_n_extra 8 | |
#define test_row_major 1 | |
#define test_col_major 0 | |
double getRealTime(); | |
//! prints a matrix in either row major or column major order | |
static void printMatrix (double *data, int rows, int cols, int row_major) | |
{ | |
int i,j; | |
// column header | |
printf (" "); | |
for(j=0; j<cols;j++) { | |
printf (" Col %2d ", j+1); | |
} | |
printf ("\n"); | |
for(i=0; i<rows;i++) { | |
printf ("Row %2d: ", i+1); | |
for(j=0; j<cols;j++) { | |
if (row_major) { | |
printf ("%7.4f ", data[i*cols+j]); | |
} else { | |
printf ("%7.4f ", data[j*rows+i]); | |
} | |
} | |
printf ("\n"); | |
} | |
printf ("\n--------------------------------------------------------\n\n"); | |
} | |
//! test printMatrix() to make sure it works | |
static void testPrintMatrix() | |
{ | |
printf ("==========================================================\n"); | |
printf ("Test print function\n"); | |
printf ("==========================================================\n\n"); | |
printf ("Matrix in row major order\n\n"); | |
double A[2*3] = { | |
1, 2, 3, | |
4, 5, 6 | |
}; | |
printMatrix (A, 2, 3, test_row_major); | |
printf ("Matrix in column major order\n\n"); | |
double B[2*3] = { | |
1, 4, | |
2, 5, | |
3, 6 | |
}; | |
printMatrix (B, 2, 3, test_col_major); | |
} | |
//! testing the extra-space | |
static void testMulColMajorExtraSpace() | |
{ | |
printf ("==========================================================\n"); | |
printf ("Extra space in matrix not used in ops\n"); | |
printf ("==========================================================\n\n"); | |
/* MATRIX A[2x3] | |
Col 1 Col 2 Col 3 | |
Row 1: 1.0000 2.0000 3.0000 | |
Row 2: 4.0000 5.0000 6.0000 | |
*/ | |
printf ("A\n\n"); | |
double A[] = { | |
1, 4, 9, 9, 9, // 9's will not affect the computation | |
2, 5, 9, 9, 9, | |
3, 6, 9, 9, 9 | |
}; | |
printMatrix (A, test_m_extra, test_k, test_col_major); | |
/* MATRIX B[3x4] | |
Col 1 Col 2 Col 3 Col 4 | |
Row 1: 1.0000 2.0000 3.0000 4.0000 | |
Row 2: 0.1000 0.2000 0.3000 0.4000 | |
Row 3: 0.0100 0.0200 0.0300 0.0400 | |
*/ | |
printf ("B\n\n"); | |
double B[] = { | |
1.0, 0.1, 0.01, 9.0, 9.0, 9.0, 9.0, // 9's will not affect the computation | |
2.0, 0.2, 0.02, 9.0, 9.0, 9.0, 9.0, | |
3.0, 0.3, 0.03, 9.0, 9.0, 9.0, 9.0, | |
4.0, 0.4, 0.04, 9.0, 9.0, 9.0, 9.0, | |
}; | |
printMatrix (B, test_k_extra, test_n, test_col_major); | |
/* MATRIX C[2x4] | |
Col 1 Col 2 Col 3 Col 4 | |
Row 1: 1.2300 2.4600 3.6900 4.9200 | |
Row 2: 4.5600 9.1200 13.6800 18.2400 | |
*/ | |
double C[test_n_extra*test_n] = {0}; | |
printf ("C\n\n"); | |
// C = alpha*A*B+beta*C | |
cblas_dgemm( | |
CblasColMajor, | |
CblasNoTrans, // TRANSA: | |
CblasNoTrans, // TRANSB: | |
test_m, // A matrix: m rows, k columns | |
test_n, // B matrix: k rows, n columns | |
test_k, // C matrix: m rows, n columns | |
1.0, // alpha = 1.0 | |
A, // A source matrix | |
test_m_extra, // true lenght of a column in A | |
// if TRANSA = 'N' then >= m | |
// if TRANSA = 'T' then >= k | |
B, // B source matrix | |
test_k_extra, // true lenght of a column in B | |
// if TRANSB = 'N' then >= k | |
// if TRANSB = 'T' then >= n | |
0.0, // beta = 0.0 | |
C, // C destination matrix | |
test_n_extra); // true lenght of a row in C | |
// must be >= m | |
printMatrix (C, test_n_extra, test_n, test_col_major); | |
} | |
//! blas expects column major order | |
static void testMulColMajor() | |
{ | |
printf ("==========================================================\n"); | |
printf ("Test BLAS multiply - column major order\n"); | |
printf ("==========================================================\n\n"); | |
/* MATRIX A[2x3] | |
Col 1 Col 2 Col 3 | |
Row 1: 1.0000 2.0000 3.0000 | |
Row 2: 4.0000 5.0000 6.0000 | |
*/ | |
printf ("A\n\n"); | |
double A[test_m*test_k] = { | |
1, 4, | |
2, 5, | |
3, 6 | |
}; | |
printMatrix (A, test_m, test_k, test_col_major); | |
/* MATRIX B[3x4] | |
Col 1 Col 2 Col 3 Col 4 | |
Row 1: 1.0000 2.0000 3.0000 4.0000 | |
Row 2: 0.1000 0.2000 0.3000 0.4000 | |
Row 3: 0.0100 0.0200 0.0300 0.0400 | |
*/ | |
printf ("B\n\n"); | |
double B[test_k*test_n] = { | |
1.0, 0.1, 0.01, | |
2.0, 0.2, 0.02, | |
3.0, 0.3, 0.03, | |
4.0, 0.4, 0.04 | |
}; | |
printMatrix (B, test_k, test_n, test_col_major); | |
/* MATRIX C[2x4] | |
Col 1 Col 2 Col 3 Col 4 | |
Row 1: 1.2300 2.4600 3.6900 4.9200 | |
Row 2: 4.5600 9.1200 13.6800 18.2400 | |
*/ | |
double C[test_m*test_n] = {0}; | |
printf ("C\n\n"); | |
// http://www.math.utah.edu/software/lapack/lapack-blas/dgemm.html | |
/* | |
cblas_dgemm( | |
'N', // TRANSA: N - do not transpose A, T - transpose A | |
'N', // TRANSB: N - do not transpose A, T - transpose A | |
test_m, // A matrix: m rows, k columns | |
test_n, // B matrix: k rows, n columns | |
test_k, // C matrix: m rows, n columns | |
1.0, // alpha = 1.0 | |
A, // A source matrix | |
test_m, // true lenght of a column in A | |
// if TRANSA = 'N' then >= m | |
// if TRANSA = 'T' then >= k | |
B, // B source matrix | |
test_k, // true lenght of a column in B | |
// if TRANSB = 'N' then >= k | |
// if TRANSB = 'T' then >= n | |
0.0, // beta = 0.0 | |
C, // C destination matrix | |
test_m); // true lenght of a row in C | |
// must be >= m | |
*/ | |
// C = alpha*A*B+beta*C | |
cblas_dgemm( | |
CblasColMajor, | |
CblasNoTrans, // TRANSA: | |
CblasNoTrans, // TRANSB: | |
test_m, // A matrix: m rows, k columns | |
test_n, // B matrix: k rows, n columns | |
test_k, // C matrix: m rows, n columns | |
1.0, // alpha = 1.0 | |
A, // A source matrix | |
test_m, // true lenght of a column in A | |
// if TRANSA = 'N' then >= m | |
// if TRANSA = 'T' then >= k | |
B, // B source matrix | |
test_k, // true lenght of a column in B | |
// if TRANSB = 'N' then >= k | |
// if TRANSB = 'T' then >= n | |
0.0, // beta = 0.0 | |
C, // C destination matrix | |
test_m); // true lenght of a row in C | |
// must be >= m | |
printMatrix (C, test_m, test_n, test_col_major); | |
} | |
//! attempt the trickery | |
static void testMulRowMajor_trickery() | |
{ | |
printf ("==========================================================\n"); | |
printf ("Test BLAS multiply - row major order with trickery\n"); | |
printf ("==========================================================\n\n"); | |
/* MATRIX A[2x3] | |
Col 1 Col 2 Col 3 | |
Row 1: 1.0000 2.0000 3.0000 | |
Row 2: 4.0000 5.0000 6.0000 | |
*/ | |
printf ("A\n\n"); | |
double A[test_m*test_k] = { | |
1, 2, 3, | |
4, 5, 6 | |
}; | |
printMatrix (A, test_m, test_k, test_row_major); | |
/* MATRIX B[3x4] | |
Col 1 Col 2 Col 3 Col 4 | |
Row 1: 1.0000 2.0000 3.0000 4.0000 | |
Row 2: 0.1000 0.2000 0.3000 0.4000 | |
Row 3: 0.0100 0.0200 0.0300 0.0400 | |
*/ | |
printf ("B\n\n"); | |
double B[test_k*test_n] = { | |
1.00, 2.00, 3.00, 4.00, | |
0.10, 0.20, 0.30, 0.40, | |
0.01, 0.02, 0.03, 0.04 | |
}; | |
printMatrix (B, test_k, test_n, test_row_major); | |
/* MATRIX C[2x4] | |
Col 1 Col 2 Col 3 Col 4 | |
Row 1: 1.2300 2.4600 3.6900 4.9200 | |
Row 2: 4.5600 9.1200 13.6800 18.2400 | |
*/ | |
double C[test_m*test_n] = {0}; | |
printf ("C\n\n"); | |
// C = alpha*A*B+beta*C | |
cblas_dgemm( | |
CblasColMajor, | |
CblasNoTrans, // TRANSA: | |
CblasNoTrans, // TRANSB: | |
test_n, // B matrix: k rows, n columns | |
test_m, // A matrix: m rows, k columns | |
test_k, // C matrix: m rows, n columns | |
1.0, // alpha = 1.0 | |
B, // B source matrix | |
test_n, // true lenght of a column in B | |
// if TRANSB = 'N' then >= k | |
// if TRANSB = 'T' then >= n | |
A, // A source matrix | |
test_k, // true lenght of a column in A | |
// if TRANSA = 'N' then >= m | |
// if TRANSA = 'T' then >= k | |
0.0, // beta = 0.0 | |
C, // C destination matrix | |
test_n); // true lenght of a row in C | |
// must be >= m | |
printMatrix (C, test_m, test_n, test_row_major); | |
} | |
//! build in version | |
static void testMulRowMajor() | |
{ | |
printf ("==========================================================\n"); | |
printf ("Test BLAS multiply - row major order without trickery\n"); | |
printf ("==========================================================\n\n"); | |
/* MATRIX A[2x3] | |
Col 1 Col 2 Col 3 | |
Row 1: 1.0000 2.0000 3.0000 | |
Row 2: 4.0000 5.0000 6.0000 | |
*/ | |
printf ("A\n\n"); | |
double A[test_m*test_k] = { | |
1, 2, 3, | |
4, 5, 6 | |
}; | |
printMatrix (A, test_m, test_k, test_row_major); | |
/* MATRIX B[3x4] | |
Col 1 Col 2 Col 3 Col 4 | |
Row 1: 1.0000 2.0000 3.0000 4.0000 | |
Row 2: 0.1000 0.2000 0.3000 0.4000 | |
Row 3: 0.0100 0.0200 0.0300 0.0400 | |
*/ | |
printf ("B\n\n"); | |
double B[test_k*test_n] = { | |
1.00, 2.00, 3.00, 4.00, | |
0.10, 0.20, 0.30, 0.40, | |
0.01, 0.02, 0.03, 0.04 | |
}; | |
printMatrix (B, test_k, test_n, test_row_major); | |
/* MATRIX C[2x4] | |
Col 1 Col 2 Col 3 Col 4 | |
Row 1: 1.2300 2.4600 3.6900 4.9200 | |
Row 2: 4.5600 9.1200 13.6800 18.2400 | |
*/ | |
double C[test_m*test_n] = {0}; | |
printf ("C\n\n"); | |
// C = alpha*A*B+beta*C | |
cblas_dgemm( | |
CblasRowMajor, | |
CblasNoTrans, // TRANSA: | |
CblasNoTrans, // TRANSB: | |
test_m, // A matrix: m rows, k columns | |
test_n, // B matrix: k rows, n columns | |
test_k, // C matrix: m rows, n columns | |
1.0, // alpha = 1.0 | |
A, // A source matrix | |
test_k, // true lenght of a column in A | |
// if TRANSA = 'N' then >= m | |
// if TRANSA = 'T' then >= k | |
B, // B source matrix | |
test_n, // true lenght of a column in B | |
// if TRANSB = 'N' then >= k | |
// if TRANSB = 'T' then >= n | |
0.0, // beta = 0.0 | |
C, // C destination matrix | |
test_n); // true lenght of a row in C | |
// must be >= m | |
printMatrix (C, test_m, test_n, test_col_major); | |
} | |
#undef test_m | |
#undef test_n | |
#undef test_k | |
#define test_m 500 | |
#define test_k 600 | |
#define test_n 700 | |
#define test_spins 5 | |
// --------------------------------------------------------------------- | |
#define IMPLEMENTATION_ROW_MAJOR \ | |
cblas_dgemm( \ | |
CblasRowMajor, CblasNoTrans, CblasNoTrans, test_m, test_n, \ | |
test_k, 1.0, A, test_k, B, test_n, 0.0, C, test_n); \ | |
startTime = getRealTime (); \ | |
for(i=0; i<test_spins;i++) { \ | |
cblas_dgemm( \ | |
CblasRowMajor, CblasNoTrans, CblasNoTrans, test_m, test_n, \ | |
test_k, 1.0, A, test_k, B, test_n, 0.0, C, test_n); \ | |
} \ | |
endTime = getRealTime (); \ | |
printf ("- Build-in implementation - row major (%d) = %lf\n", test_spins, (endTime - startTime) ); | |
// --------------------------------------------------------------------- | |
#define IMPLEMENTATION_TRICKERY \ | |
cblas_dgemm( \ | |
CblasColMajor, CblasNoTrans, CblasNoTrans, test_n, test_m, \ | |
test_k, 1.0, B, test_n, A, test_k, 0.0, C, test_n); \ | |
startTime = getRealTime (); \ | |
for(i=0; i<test_spins;i++) { \ | |
cblas_dgemm( \ | |
CblasColMajor, CblasNoTrans, CblasNoTrans, test_n, test_m, \ | |
test_k, 1.0, B, test_n, A, test_k, 0.0, C, test_n); \ | |
} \ | |
endTime = getRealTime (); \ | |
printf ("- Trickery implementation (%d)= %lf\n", test_spins, (endTime - startTime) ); | |
// --------------------------------------------------------------------- | |
#define IMPLEMENTATION_COL_MAJOR \ | |
cblas_dgemm( \ | |
CblasColMajor, CblasNoTrans, CblasNoTrans, test_m, test_n, \ | |
test_k, 1.0, A, test_m, B, test_k, 0.0, C, test_m); \ | |
startTime = getRealTime (); \ | |
for(i=0; i<test_spins;i++) { \ | |
cblas_dgemm( \ | |
CblasColMajor, CblasNoTrans, CblasNoTrans, test_m, test_n, \ | |
test_k, 1.0, A, test_m, B, test_k, 0.0, C, test_m); \ | |
} \ | |
endTime = getRealTime (); \ | |
printf ("- Build-in implementation - column major (%d) = %lf\n", test_spins, (endTime - startTime) ); | |
// --------------------------------------------------------------------- | |
//! test my undestanding of the parameters | |
void compareBuildInWithTrickery() | |
{ | |
printf ("==========================================================\n"); | |
printf ("Compare these two methods\n"); | |
printf ("==========================================================\n\n"); | |
// srand((unsigned)time(NULL)); | |
double startTime, endTime; | |
int i; | |
double *A = (double*)malloc(test_m*test_k*sizeof(double)); | |
int a_tot = test_m*test_k; | |
double *iter = A; | |
for (i = 0; i<a_tot; i++) { | |
*iter = ((double)rand()/(double)RAND_MAX);; | |
iter++; | |
} | |
double *B = (double*)malloc(test_k*test_n*sizeof(double)); | |
int b_tot = test_k*test_n; | |
iter = B; | |
for (i = 0; i<b_tot; i++) { | |
*iter = ((double)rand()/(double)RAND_MAX);; | |
iter++; | |
} | |
double *C = (double*)malloc(test_m*test_n*sizeof(double)); | |
iter = C; | |
int c_tot = test_m*test_n; | |
for (i = 0; i<c_tot; i++) { | |
*iter = 0.0; | |
iter++; | |
} | |
IMPLEMENTATION_ROW_MAJOR; | |
IMPLEMENTATION_TRICKERY; | |
IMPLEMENTATION_COL_MAJOR; | |
IMPLEMENTATION_TRICKERY; | |
IMPLEMENTATION_ROW_MAJOR; | |
IMPLEMENTATION_COL_MAJOR; | |
IMPLEMENTATION_ROW_MAJOR; | |
IMPLEMENTATION_COL_MAJOR; | |
IMPLEMENTATION_TRICKERY; | |
IMPLEMENTATION_COL_MAJOR; | |
IMPLEMENTATION_ROW_MAJOR; | |
IMPLEMENTATION_TRICKERY; | |
IMPLEMENTATION_COL_MAJOR; | |
IMPLEMENTATION_TRICKERY; | |
IMPLEMENTATION_ROW_MAJOR; | |
IMPLEMENTATION_TRICKERY; | |
IMPLEMENTATION_COL_MAJOR; | |
IMPLEMENTATION_ROW_MAJOR; | |
} | |
int main(int argc, char **argv) | |
{ | |
testPrintMatrix(); | |
testMulColMajorExtraSpace(); | |
testMulColMajor(); | |
testMulRowMajor_trickery(); | |
testMulRowMajor(); | |
compareBuildInWithTrickery(); | |
return 0; | |
} | |
/* | |
* Author: David Robert Nadeau | |
* Site: http://NadeauSoftware.com/ | |
* License: Creative Commons Attribution 3.0 Unported License | |
* http://creativecommons.org/licenses/by/3.0/deed.en_US | |
* | |
* http://nadeausoftware.com/articles/2012/04/c_c_tip_how_measure_elapsed_real_time_benchmarking | |
*/ | |
#if defined(_WIN32) | |
#include <Windows.h> | |
#elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__)) | |
#include <unistd.h> /* POSIX flags */ | |
#include <time.h> /* clock_gettime(), time() */ | |
#include <sys/time.h> /* gethrtime(), gettimeofday() */ | |
#if defined(__MACH__) && defined(__APPLE__) | |
#include <mach/mach.h> | |
#include <mach/mach_time.h> | |
#endif | |
#else | |
#error "Unable to define getRealTime( ) for an unknown OS." | |
#endif | |
/** | |
* Returns the real time, in seconds, or -1.0 if an error occurred. | |
* | |
* Time is measured since an arbitrary and OS-dependent start time. | |
* The returned real time is only useful for computing an elapsed time | |
* between two calls to this function. | |
*/ | |
double getRealTime( ) | |
{ | |
#if defined(_WIN32) | |
FILETIME tm; | |
ULONGLONG t; | |
#if defined(NTDDI_WIN8) && NTDDI_VERSION >= NTDDI_WIN8 | |
/* Windows 8, Windows Server 2012 and later. ---------------- */ | |
GetSystemTimePreciseAsFileTime( &tm ); | |
#else | |
/* Windows 2000 and later. ---------------------------------- */ | |
GetSystemTimeAsFileTime( &tm ); | |
#endif | |
t = ((ULONGLONG)tm.dwHighDateTime << 32) | (ULONGLONG)tm.dwLowDateTime; | |
return (double)t / 10000000.0; | |
#elif (defined(__hpux) || defined(hpux)) || ((defined(__sun__) || defined(__sun) || defined(sun)) && (defined(__SVR4) || defined(__svr4__))) | |
/* HP-UX, Solaris. ------------------------------------------ */ | |
return (double)gethrtime( ) / 1000000000.0; | |
#elif defined(__MACH__) && defined(__APPLE__) | |
/* OSX. ----------------------------------------------------- */ | |
static double timeConvert = 0.0; | |
if ( timeConvert == 0.0 ) | |
{ | |
mach_timebase_info_data_t timeBase; | |
(void)mach_timebase_info( &timeBase ); | |
timeConvert = (double)timeBase.numer / | |
(double)timeBase.denom / | |
1000000000.0; | |
} | |
return (double)mach_absolute_time( ) * timeConvert; | |
#elif defined(_POSIX_VERSION) | |
/* POSIX. --------------------------------------------------- */ | |
#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) | |
{ | |
struct timespec ts; | |
#if defined(CLOCK_MONOTONIC_PRECISE) | |
/* BSD. --------------------------------------------- */ | |
const clockid_t id = CLOCK_MONOTONIC_PRECISE; | |
#elif defined(CLOCK_MONOTONIC_RAW) | |
/* Linux. ------------------------------------------- */ | |
const clockid_t id = CLOCK_MONOTONIC_RAW; | |
#elif defined(CLOCK_HIGHRES) | |
/* Solaris. ----------------------------------------- */ | |
const clockid_t id = CLOCK_HIGHRES; | |
#elif defined(CLOCK_MONOTONIC) | |
/* AIX, BSD, Linux, POSIX, Solaris. ----------------- */ | |
const clockid_t id = CLOCK_MONOTONIC; | |
#elif defined(CLOCK_REALTIME) | |
/* AIX, BSD, HP-UX, Linux, POSIX. ------------------- */ | |
const clockid_t id = CLOCK_REALTIME; | |
#else | |
const clockid_t id = (clockid_t)-1; /* Unknown. */ | |
#endif /* CLOCK_* */ | |
if ( id != (clockid_t)-1 && clock_gettime( id, &ts ) != -1 ) | |
return (double)ts.tv_sec + | |
(double)ts.tv_nsec / 1000000000.0; | |
/* Fall thru. */ | |
} | |
#endif /* _POSIX_TIMERS */ | |
/* AIX, BSD, Cygwin, HP-UX, Linux, OSX, POSIX, Solaris. ----- */ | |
struct timeval tm; | |
gettimeofday( &tm, NULL ); | |
return (double)tm.tv_sec + (double)tm.tv_usec / 1000000.0; | |
#else | |
return -1.0; /* Failed. */ | |
#endif | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment