Last active
December 19, 2024 11:26
-
-
Save danielml3/308d50570c00229f728dbe2d1313c046 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#define BLOCKSIZE 8 | |
#define UNROLL 8 | |
const char* dgemm_desc = "dgemm using cache blocking and unrolling"; // Aqui puedes dar una pequeña descripcion de tu programa | |
void do_block (int n, int si, int sj, int sk, | |
float *A, float *B, float *C) | |
{ | |
for ( int i = si; i < si+BLOCKSIZE; i++ ) | |
for ( int j = sj; j < sj+BLOCKSIZE; j++) | |
{ | |
float cij = C[i*n+j]; | |
for( int k = sk; k < sk+BLOCKSIZE; k += UNROLL) | |
for (int x = 0; x < UNROLL; x++) | |
cij += A[(k+x)+i*n] * B[j+(k+x)*n]; /* cij += A[i][k]*B[k][j]*/ | |
C[i*n+j] = cij; /*C[i][j] = cij*/ | |
} | |
} | |
void square_dgemm (int n, float* A, float* B, float* C) | |
{ | |
if ( n < BLOCKSIZE ) | |
{ | |
printf("matrix size should be larger than blocksize\n"); | |
return; | |
} | |
for (int sj = 0; sj < n; sj += BLOCKSIZE * UNROLL) | |
for (int x = 0; x < UNROLL; x++) | |
for (int si = 0; si < n; si += BLOCKSIZE * UNROLL) | |
for (int y = 0; y < UNROLL; y++) | |
for (int sk = 0; sk < n; sk += BLOCKSIZE * UNROLL) | |
for (int z = 0; z < UNROLL; z++) | |
do_block(n, si + BLOCKSIZE * x, sj + BLOCKSIZE * y, sk + BLOCKSIZE * z, A, B, C); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment