Last active
April 13, 2016 22:08
-
-
Save diegopso/8381fe15a9b6ee22bd1410191f1d2387 to your computer and use it in GitHub Desktop.
Laboratório 6 Tópico em Linguagem de Programação - UNICAMP 2016-1
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*************************************************************************** | |
* Copyright (C) 2014 by Edson Borin and Raul Baldin * | |
* [email protected] * | |
* * | |
* This program is free software; you can redistribute it and/or modify * | |
* it under the terms of the GNU General Public License as published by * | |
* the Free Software Foundation; either version 2 of the License, or * | |
* (at your option) any later version. * | |
* * | |
* This program is distributed in the hope that it will be useful, * | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of * | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * | |
* GNU General Public License for more details. * | |
* * | |
* You should have received a copy of the GNU General Public License * | |
* along with this program; if not, write to the * | |
* Free Software Foundation, Inc., * | |
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * | |
***************************************************************************/ | |
#include <float.h> // FLT_MAX | |
#include <stdio.h> // printf | |
/* Matrices dimentions. */ | |
#ifndef MATRIX_M | |
#define MATRIX_M 5 | |
#endif | |
#ifndef MATRIX_K | |
#define MATRIX_K 5 | |
#endif | |
#ifndef MATRIX_N | |
#define MATRIX_N 5 | |
#endif | |
/* Array data type. */ | |
#ifndef DATATYPE | |
#define DATATYPE double | |
#endif | |
/* Number of times each kernel will be executed. */ | |
#define RPT 10 | |
/* Useful macros! */ | |
#define MIN(x,y) ((x)<(y)?(x):(y)) | |
#define MAX(x,y) ((x)>(y)?(x):(y)) | |
#define XSTR(s) STR(s) | |
#define STR(s) #s | |
/*------------------------------------------------*/ | |
/* Code to remove data from the processor caches. */ | |
#define KB (1024) | |
#define MB (1024 * KB) | |
#define GB (1024 * MB) | |
#define LARGEST_CACHE_SZ (16 * MB) | |
static unsigned char dummy_buffer[LARGEST_CACHE_SZ]; | |
void clean_cache() | |
{ | |
unsigned long long i; | |
for (i=0; i<LARGEST_CACHE_SZ; i++) | |
dummy_buffer[i] += 1; | |
} | |
/*------------------------------------------------*/ | |
/* Code to read the wall clock time. */ | |
#include <sys/time.h> | |
double mysecond() | |
{ | |
struct timeval tp; | |
struct timezone tzp; | |
gettimeofday(&tp,&tzp); | |
return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 ); | |
} | |
/*------------------------------------------------*/ | |
/* Numeric kernels and data . */ | |
/* Matrices. */ | |
DATATYPE ma[MATRIX_M*MATRIX_K]; | |
DATATYPE mb[MATRIX_K*MATRIX_N]; | |
DATATYPE mc[MATRIX_M*MATRIX_N]; | |
DATATYPE mbt[MATRIX_K*MATRIX_N]; | |
/* Kernel name. */ | |
const char* kernel_name = "mat_mult_naive"; | |
void mat_mult_naive (int m, int n, int k, double *A, double *B, double *C) | |
{ | |
unsigned long i, ik, pk, j, jk, p; | |
double t; | |
int BLK = 2; | |
for (jk = 0; jk < (n); jk+=BLK) { | |
for (ik = 0; ik < (m); ik+=BLK) { | |
for (pk = 0; pk < (k); pk+=BLK) { | |
for (i = ik; i < (ik + BLK) && i < m; i++) { | |
for (j = jk; j < (jk+BLK) && j < n; j++) { | |
t = 0; | |
for (p = pk; p < (pk + BLK) && p < k; p++) { | |
t += A[i*k + p] * B[j*k + p]; | |
printf("C[%d][%d] += A[%d][%d] * B[%d][%d]\n", i, j, i, p, j, p); | |
} | |
C[i*n+j] += t ; | |
} | |
} | |
} | |
} | |
} | |
/* | |
for (i = 0; i < (m); i++) { | |
for (j = jk; j < n; j++) { | |
t = 0; | |
for (p = 0; p < (k); p++) { | |
t += A[i*k + p] * B[j*k + p]; | |
} | |
C[j+i*n] += t ; | |
} | |
} | |
for (i = 0; i < (m); i++) { | |
for (j = 0; j < jk; j++) { | |
t = 0; | |
for (p = pk; p < (k); p++) { | |
t += A[i*k + p] * B[j*k + p]; | |
} | |
C[j+i*n] += t ; | |
} | |
}*/ | |
} | |
void transpor(int m, int n, double *B, double *TB) | |
{ | |
int i,j; | |
for (i=0; i<m; i++) | |
for (j=0; j<n; j++) | |
TB[j*m+i] = B[i*n+j]; | |
} | |
void init_matrices() | |
{ | |
int i,j; | |
for (i=0; i<MATRIX_N; i++) | |
for (j=0; j<MATRIX_M;j++) | |
mc[i*MATRIX_M+j] = 0; | |
for (i=0; i<MATRIX_N; i++) | |
for (j=0; j<MATRIX_K;j++) | |
mb[i*MATRIX_K+j] = i*j; | |
for (i=0; i<MATRIX_M; i++) | |
for (j=0; j<MATRIX_K;j++) | |
ma[i*MATRIX_K+j] = i*j; | |
} | |
void kernel() | |
{ | |
mat_mult_naive (MATRIX_M, MATRIX_N, MATRIX_K, ma, mbt, mc); | |
} | |
/* Amount of bytes accessed: (2 (read A, read B) * M*N*K + 1 (write C) * M*N ) * element size (in bytes) */ | |
double bytes = (2*(MATRIX_M * MATRIX_N * MATRIX_K) + 1*(MATRIX_M * MATRIX_N)) * sizeof(DATATYPE); | |
double fops = (MATRIX_M * MATRIX_N * MATRIX_K) * 2 /* 1 mult + 1 sum */; | |
/* -----------------------------*/ | |
int main() | |
{ | |
unsigned long long k; | |
double times[RPT]; | |
double mintime = FLT_MAX; | |
double avgtime = 0; | |
double maxtime = 0; | |
double rate, avgrate; | |
double flrate, flavgrate; | |
double t; | |
printf("Kernel name : %s\n",kernel_name); | |
printf("Matrix datatype : %s\n", XSTR(DATATYPE)); | |
printf("# of runs : %d\n", RPT); | |
printf("Matrices size : C(%i x %i) = A(%i x %i) x B(%i x %i)\n", | |
MATRIX_M, MATRIX_N, MATRIX_M, MATRIX_K, MATRIX_K, MATRIX_N); | |
init_matrices(); | |
transpor(MATRIX_N, MATRIX_K, mb, mbt); | |
/* Main loop. */ | |
for (k=0; k<RPT; k++) | |
{ | |
clean_cache(); | |
t = mysecond(); | |
/* Kernel */ | |
kernel(); | |
times[k] = mysecond() - t; | |
//printf(" -> %6.2f s\n", times[k]); | |
} | |
/* Final report */ | |
for (k=1; k<RPT; k++) | |
/* Discard first iteration (k=1). */ | |
{ | |
avgtime = avgtime + times[k]; | |
mintime = MIN(mintime, times[k]); | |
maxtime = MAX(maxtime, times[k]); | |
} | |
avgtime = avgtime / (RPT-1); | |
rate = (bytes / mintime) / GB; | |
avgrate = (bytes / avgtime) / GB; | |
flrate = (fops / mintime) / MB; | |
flavgrate = (fops / avgtime) / MB; | |
printf("Best Rate GB/s : %6.2f\n",rate); | |
printf("Avg Rate GB/s : %6.2f\n",avgrate); | |
printf("Best MFLOPS : %6.2f\n",flrate); | |
printf("Avg MFLOPS : %6.2f\n",flavgrate); | |
printf("Avg time : %6.2f\n",avgtime); | |
printf("Min time : %6.2f\n",mintime); | |
printf("Max time : %6.2f\n",maxtime); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment