Skip to content

Instantly share code, notes, and snippets.

@TNick
Last active February 12, 2019 15:33
Show Gist options
  • Save TNick/8849997 to your computer and use it in GitHub Desktop.
Save TNick/8849997 to your computer and use it in GitHub Desktop.
Some routines to test blas and some code arround it
/*
* Author: Nicu Tofan
* License: BSD
*
* See below for getRealTime() license.
*/
#include <cblas.h>
#include <stdlib.h>
#include <stdio.h>
#define test_m 2
#define test_k 3
#define test_n 4
#define test_m_extra 5
#define test_k_extra 7
#define test_n_extra 8
#define test_row_major 1
#define test_col_major 0
double getRealTime();
//! prints a matrix in either row major or column major order
static void printMatrix (double *data, int rows, int cols, int row_major)
{
int i,j;
// column header
printf (" ");
for(j=0; j<cols;j++) {
printf (" Col %2d ", j+1);
}
printf ("\n");
for(i=0; i<rows;i++) {
printf ("Row %2d: ", i+1);
for(j=0; j<cols;j++) {
if (row_major) {
printf ("%7.4f ", data[i*cols+j]);
} else {
printf ("%7.4f ", data[j*rows+i]);
}
}
printf ("\n");
}
printf ("\n--------------------------------------------------------\n\n");
}
//! test printMatrix() to make sure it works
static void testPrintMatrix()
{
printf ("==========================================================\n");
printf ("Test print function\n");
printf ("==========================================================\n\n");
printf ("Matrix in row major order\n\n");
double A[2*3] = {
1, 2, 3,
4, 5, 6
};
printMatrix (A, 2, 3, test_row_major);
printf ("Matrix in column major order\n\n");
double B[2*3] = {
1, 4,
2, 5,
3, 6
};
printMatrix (B, 2, 3, test_col_major);
}
//! testing the extra-space
static void testMulColMajorExtraSpace()
{
printf ("==========================================================\n");
printf ("Extra space in matrix not used in ops\n");
printf ("==========================================================\n\n");
/* MATRIX A[2x3]
Col 1 Col 2 Col 3
Row 1: 1.0000 2.0000 3.0000
Row 2: 4.0000 5.0000 6.0000
*/
printf ("A\n\n");
double A[] = {
1, 4, 9, 9, 9, // 9's will not affect the computation
2, 5, 9, 9, 9,
3, 6, 9, 9, 9
};
printMatrix (A, test_m_extra, test_k, test_col_major);
/* MATRIX B[3x4]
Col 1 Col 2 Col 3 Col 4
Row 1: 1.0000 2.0000 3.0000 4.0000
Row 2: 0.1000 0.2000 0.3000 0.4000
Row 3: 0.0100 0.0200 0.0300 0.0400
*/
printf ("B\n\n");
double B[] = {
1.0, 0.1, 0.01, 9.0, 9.0, 9.0, 9.0, // 9's will not affect the computation
2.0, 0.2, 0.02, 9.0, 9.0, 9.0, 9.0,
3.0, 0.3, 0.03, 9.0, 9.0, 9.0, 9.0,
4.0, 0.4, 0.04, 9.0, 9.0, 9.0, 9.0,
};
printMatrix (B, test_k_extra, test_n, test_col_major);
/* MATRIX C[2x4]
Col 1 Col 2 Col 3 Col 4
Row 1: 1.2300 2.4600 3.6900 4.9200
Row 2: 4.5600 9.1200 13.6800 18.2400
*/
double C[test_n_extra*test_n] = {0};
printf ("C\n\n");
// C = alpha*A*B+beta*C
cblas_dgemm(
CblasColMajor,
CblasNoTrans, // TRANSA:
CblasNoTrans, // TRANSB:
test_m, // A matrix: m rows, k columns
test_n, // B matrix: k rows, n columns
test_k, // C matrix: m rows, n columns
1.0, // alpha = 1.0
A, // A source matrix
test_m_extra, // true lenght of a column in A
// if TRANSA = 'N' then >= m
// if TRANSA = 'T' then >= k
B, // B source matrix
test_k_extra, // true lenght of a column in B
// if TRANSB = 'N' then >= k
// if TRANSB = 'T' then >= n
0.0, // beta = 0.0
C, // C destination matrix
test_n_extra); // true lenght of a row in C
// must be >= m
printMatrix (C, test_n_extra, test_n, test_col_major);
}
//! blas expects column major order
static void testMulColMajor()
{
printf ("==========================================================\n");
printf ("Test BLAS multiply - column major order\n");
printf ("==========================================================\n\n");
/* MATRIX A[2x3]
Col 1 Col 2 Col 3
Row 1: 1.0000 2.0000 3.0000
Row 2: 4.0000 5.0000 6.0000
*/
printf ("A\n\n");
double A[test_m*test_k] = {
1, 4,
2, 5,
3, 6
};
printMatrix (A, test_m, test_k, test_col_major);
/* MATRIX B[3x4]
Col 1 Col 2 Col 3 Col 4
Row 1: 1.0000 2.0000 3.0000 4.0000
Row 2: 0.1000 0.2000 0.3000 0.4000
Row 3: 0.0100 0.0200 0.0300 0.0400
*/
printf ("B\n\n");
double B[test_k*test_n] = {
1.0, 0.1, 0.01,
2.0, 0.2, 0.02,
3.0, 0.3, 0.03,
4.0, 0.4, 0.04
};
printMatrix (B, test_k, test_n, test_col_major);
/* MATRIX C[2x4]
Col 1 Col 2 Col 3 Col 4
Row 1: 1.2300 2.4600 3.6900 4.9200
Row 2: 4.5600 9.1200 13.6800 18.2400
*/
double C[test_m*test_n] = {0};
printf ("C\n\n");
// http://www.math.utah.edu/software/lapack/lapack-blas/dgemm.html
/*
cblas_dgemm(
'N', // TRANSA: N - do not transpose A, T - transpose A
'N', // TRANSB: N - do not transpose A, T - transpose A
test_m, // A matrix: m rows, k columns
test_n, // B matrix: k rows, n columns
test_k, // C matrix: m rows, n columns
1.0, // alpha = 1.0
A, // A source matrix
test_m, // true lenght of a column in A
// if TRANSA = 'N' then >= m
// if TRANSA = 'T' then >= k
B, // B source matrix
test_k, // true lenght of a column in B
// if TRANSB = 'N' then >= k
// if TRANSB = 'T' then >= n
0.0, // beta = 0.0
C, // C destination matrix
test_m); // true lenght of a row in C
// must be >= m
*/
// C = alpha*A*B+beta*C
cblas_dgemm(
CblasColMajor,
CblasNoTrans, // TRANSA:
CblasNoTrans, // TRANSB:
test_m, // A matrix: m rows, k columns
test_n, // B matrix: k rows, n columns
test_k, // C matrix: m rows, n columns
1.0, // alpha = 1.0
A, // A source matrix
test_m, // true lenght of a column in A
// if TRANSA = 'N' then >= m
// if TRANSA = 'T' then >= k
B, // B source matrix
test_k, // true lenght of a column in B
// if TRANSB = 'N' then >= k
// if TRANSB = 'T' then >= n
0.0, // beta = 0.0
C, // C destination matrix
test_m); // true lenght of a row in C
// must be >= m
printMatrix (C, test_m, test_n, test_col_major);
}
//! attempt the trickery
static void testMulRowMajor_trickery()
{
printf ("==========================================================\n");
printf ("Test BLAS multiply - row major order with trickery\n");
printf ("==========================================================\n\n");
/* MATRIX A[2x3]
Col 1 Col 2 Col 3
Row 1: 1.0000 2.0000 3.0000
Row 2: 4.0000 5.0000 6.0000
*/
printf ("A\n\n");
double A[test_m*test_k] = {
1, 2, 3,
4, 5, 6
};
printMatrix (A, test_m, test_k, test_row_major);
/* MATRIX B[3x4]
Col 1 Col 2 Col 3 Col 4
Row 1: 1.0000 2.0000 3.0000 4.0000
Row 2: 0.1000 0.2000 0.3000 0.4000
Row 3: 0.0100 0.0200 0.0300 0.0400
*/
printf ("B\n\n");
double B[test_k*test_n] = {
1.00, 2.00, 3.00, 4.00,
0.10, 0.20, 0.30, 0.40,
0.01, 0.02, 0.03, 0.04
};
printMatrix (B, test_k, test_n, test_row_major);
/* MATRIX C[2x4]
Col 1 Col 2 Col 3 Col 4
Row 1: 1.2300 2.4600 3.6900 4.9200
Row 2: 4.5600 9.1200 13.6800 18.2400
*/
double C[test_m*test_n] = {0};
printf ("C\n\n");
// C = alpha*A*B+beta*C
cblas_dgemm(
CblasColMajor,
CblasNoTrans, // TRANSA:
CblasNoTrans, // TRANSB:
test_n, // B matrix: k rows, n columns
test_m, // A matrix: m rows, k columns
test_k, // C matrix: m rows, n columns
1.0, // alpha = 1.0
B, // B source matrix
test_n, // true lenght of a column in B
// if TRANSB = 'N' then >= k
// if TRANSB = 'T' then >= n
A, // A source matrix
test_k, // true lenght of a column in A
// if TRANSA = 'N' then >= m
// if TRANSA = 'T' then >= k
0.0, // beta = 0.0
C, // C destination matrix
test_n); // true lenght of a row in C
// must be >= m
printMatrix (C, test_m, test_n, test_row_major);
}
//! build in version
static void testMulRowMajor()
{
printf ("==========================================================\n");
printf ("Test BLAS multiply - row major order without trickery\n");
printf ("==========================================================\n\n");
/* MATRIX A[2x3]
Col 1 Col 2 Col 3
Row 1: 1.0000 2.0000 3.0000
Row 2: 4.0000 5.0000 6.0000
*/
printf ("A\n\n");
double A[test_m*test_k] = {
1, 2, 3,
4, 5, 6
};
printMatrix (A, test_m, test_k, test_row_major);
/* MATRIX B[3x4]
Col 1 Col 2 Col 3 Col 4
Row 1: 1.0000 2.0000 3.0000 4.0000
Row 2: 0.1000 0.2000 0.3000 0.4000
Row 3: 0.0100 0.0200 0.0300 0.0400
*/
printf ("B\n\n");
double B[test_k*test_n] = {
1.00, 2.00, 3.00, 4.00,
0.10, 0.20, 0.30, 0.40,
0.01, 0.02, 0.03, 0.04
};
printMatrix (B, test_k, test_n, test_row_major);
/* MATRIX C[2x4]
Col 1 Col 2 Col 3 Col 4
Row 1: 1.2300 2.4600 3.6900 4.9200
Row 2: 4.5600 9.1200 13.6800 18.2400
*/
double C[test_m*test_n] = {0};
printf ("C\n\n");
// C = alpha*A*B+beta*C
cblas_dgemm(
CblasRowMajor,
CblasNoTrans, // TRANSA:
CblasNoTrans, // TRANSB:
test_m, // A matrix: m rows, k columns
test_n, // B matrix: k rows, n columns
test_k, // C matrix: m rows, n columns
1.0, // alpha = 1.0
A, // A source matrix
test_k, // true lenght of a column in A
// if TRANSA = 'N' then >= m
// if TRANSA = 'T' then >= k
B, // B source matrix
test_n, // true lenght of a column in B
// if TRANSB = 'N' then >= k
// if TRANSB = 'T' then >= n
0.0, // beta = 0.0
C, // C destination matrix
test_n); // true lenght of a row in C
// must be >= m
printMatrix (C, test_m, test_n, test_col_major);
}
#undef test_m
#undef test_n
#undef test_k
#define test_m 500
#define test_k 600
#define test_n 700
#define test_spins 5
// ---------------------------------------------------------------------
#define IMPLEMENTATION_ROW_MAJOR \
cblas_dgemm( \
CblasRowMajor, CblasNoTrans, CblasNoTrans, test_m, test_n, \
test_k, 1.0, A, test_k, B, test_n, 0.0, C, test_n); \
startTime = getRealTime (); \
for(i=0; i<test_spins;i++) { \
cblas_dgemm( \
CblasRowMajor, CblasNoTrans, CblasNoTrans, test_m, test_n, \
test_k, 1.0, A, test_k, B, test_n, 0.0, C, test_n); \
} \
endTime = getRealTime (); \
printf ("- Build-in implementation - row major (%d) = %lf\n", test_spins, (endTime - startTime) );
// ---------------------------------------------------------------------
#define IMPLEMENTATION_TRICKERY \
cblas_dgemm( \
CblasColMajor, CblasNoTrans, CblasNoTrans, test_n, test_m, \
test_k, 1.0, B, test_n, A, test_k, 0.0, C, test_n); \
startTime = getRealTime (); \
for(i=0; i<test_spins;i++) { \
cblas_dgemm( \
CblasColMajor, CblasNoTrans, CblasNoTrans, test_n, test_m, \
test_k, 1.0, B, test_n, A, test_k, 0.0, C, test_n); \
} \
endTime = getRealTime (); \
printf ("- Trickery implementation (%d)= %lf\n", test_spins, (endTime - startTime) );
// ---------------------------------------------------------------------
#define IMPLEMENTATION_COL_MAJOR \
cblas_dgemm( \
CblasColMajor, CblasNoTrans, CblasNoTrans, test_m, test_n, \
test_k, 1.0, A, test_m, B, test_k, 0.0, C, test_m); \
startTime = getRealTime (); \
for(i=0; i<test_spins;i++) { \
cblas_dgemm( \
CblasColMajor, CblasNoTrans, CblasNoTrans, test_m, test_n, \
test_k, 1.0, A, test_m, B, test_k, 0.0, C, test_m); \
} \
endTime = getRealTime (); \
printf ("- Build-in implementation - column major (%d) = %lf\n", test_spins, (endTime - startTime) );
// ---------------------------------------------------------------------
//! test my undestanding of the parameters
void compareBuildInWithTrickery()
{
printf ("==========================================================\n");
printf ("Compare these two methods\n");
printf ("==========================================================\n\n");
// srand((unsigned)time(NULL));
double startTime, endTime;
int i;
double *A = (double*)malloc(test_m*test_k*sizeof(double));
int a_tot = test_m*test_k;
double *iter = A;
for (i = 0; i<a_tot; i++) {
*iter = ((double)rand()/(double)RAND_MAX);;
iter++;
}
double *B = (double*)malloc(test_k*test_n*sizeof(double));
int b_tot = test_k*test_n;
iter = B;
for (i = 0; i<b_tot; i++) {
*iter = ((double)rand()/(double)RAND_MAX);;
iter++;
}
double *C = (double*)malloc(test_m*test_n*sizeof(double));
iter = C;
int c_tot = test_m*test_n;
for (i = 0; i<c_tot; i++) {
*iter = 0.0;
iter++;
}
IMPLEMENTATION_ROW_MAJOR;
IMPLEMENTATION_TRICKERY;
IMPLEMENTATION_COL_MAJOR;
IMPLEMENTATION_TRICKERY;
IMPLEMENTATION_ROW_MAJOR;
IMPLEMENTATION_COL_MAJOR;
IMPLEMENTATION_ROW_MAJOR;
IMPLEMENTATION_COL_MAJOR;
IMPLEMENTATION_TRICKERY;
IMPLEMENTATION_COL_MAJOR;
IMPLEMENTATION_ROW_MAJOR;
IMPLEMENTATION_TRICKERY;
IMPLEMENTATION_COL_MAJOR;
IMPLEMENTATION_TRICKERY;
IMPLEMENTATION_ROW_MAJOR;
IMPLEMENTATION_TRICKERY;
IMPLEMENTATION_COL_MAJOR;
IMPLEMENTATION_ROW_MAJOR;
}
int main(int argc, char **argv)
{
testPrintMatrix();
testMulColMajorExtraSpace();
testMulColMajor();
testMulRowMajor_trickery();
testMulRowMajor();
compareBuildInWithTrickery();
return 0;
}
/*
* Author: David Robert Nadeau
* Site: http://NadeauSoftware.com/
* License: Creative Commons Attribution 3.0 Unported License
* http://creativecommons.org/licenses/by/3.0/deed.en_US
*
* http://nadeausoftware.com/articles/2012/04/c_c_tip_how_measure_elapsed_real_time_benchmarking
*/
#if defined(_WIN32)
#include <Windows.h>
#elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__))
#include <unistd.h> /* POSIX flags */
#include <time.h> /* clock_gettime(), time() */
#include <sys/time.h> /* gethrtime(), gettimeofday() */
#if defined(__MACH__) && defined(__APPLE__)
#include <mach/mach.h>
#include <mach/mach_time.h>
#endif
#else
#error "Unable to define getRealTime( ) for an unknown OS."
#endif
/**
* Returns the real time, in seconds, or -1.0 if an error occurred.
*
* Time is measured since an arbitrary and OS-dependent start time.
* The returned real time is only useful for computing an elapsed time
* between two calls to this function.
*/
double getRealTime( )
{
#if defined(_WIN32)
FILETIME tm;
ULONGLONG t;
#if defined(NTDDI_WIN8) && NTDDI_VERSION >= NTDDI_WIN8
/* Windows 8, Windows Server 2012 and later. ---------------- */
GetSystemTimePreciseAsFileTime( &tm );
#else
/* Windows 2000 and later. ---------------------------------- */
GetSystemTimeAsFileTime( &tm );
#endif
t = ((ULONGLONG)tm.dwHighDateTime << 32) | (ULONGLONG)tm.dwLowDateTime;
return (double)t / 10000000.0;
#elif (defined(__hpux) || defined(hpux)) || ((defined(__sun__) || defined(__sun) || defined(sun)) && (defined(__SVR4) || defined(__svr4__)))
/* HP-UX, Solaris. ------------------------------------------ */
return (double)gethrtime( ) / 1000000000.0;
#elif defined(__MACH__) && defined(__APPLE__)
/* OSX. ----------------------------------------------------- */
static double timeConvert = 0.0;
if ( timeConvert == 0.0 )
{
mach_timebase_info_data_t timeBase;
(void)mach_timebase_info( &timeBase );
timeConvert = (double)timeBase.numer /
(double)timeBase.denom /
1000000000.0;
}
return (double)mach_absolute_time( ) * timeConvert;
#elif defined(_POSIX_VERSION)
/* POSIX. --------------------------------------------------- */
#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0)
{
struct timespec ts;
#if defined(CLOCK_MONOTONIC_PRECISE)
/* BSD. --------------------------------------------- */
const clockid_t id = CLOCK_MONOTONIC_PRECISE;
#elif defined(CLOCK_MONOTONIC_RAW)
/* Linux. ------------------------------------------- */
const clockid_t id = CLOCK_MONOTONIC_RAW;
#elif defined(CLOCK_HIGHRES)
/* Solaris. ----------------------------------------- */
const clockid_t id = CLOCK_HIGHRES;
#elif defined(CLOCK_MONOTONIC)
/* AIX, BSD, Linux, POSIX, Solaris. ----------------- */
const clockid_t id = CLOCK_MONOTONIC;
#elif defined(CLOCK_REALTIME)
/* AIX, BSD, HP-UX, Linux, POSIX. ------------------- */
const clockid_t id = CLOCK_REALTIME;
#else
const clockid_t id = (clockid_t)-1; /* Unknown. */
#endif /* CLOCK_* */
if ( id != (clockid_t)-1 && clock_gettime( id, &ts ) != -1 )
return (double)ts.tv_sec +
(double)ts.tv_nsec / 1000000000.0;
/* Fall thru. */
}
#endif /* _POSIX_TIMERS */
/* AIX, BSD, Cygwin, HP-UX, Linux, OSX, POSIX, Solaris. ----- */
struct timeval tm;
gettimeofday( &tm, NULL );
return (double)tm.tv_sec + (double)tm.tv_usec / 1000000.0;
#else
return -1.0; /* Failed. */
#endif
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment