Skip to content

Instantly share code, notes, and snippets.

@danielml3
Last active December 19, 2024 11:26
Show Gist options
  • Save danielml3/308d50570c00229f728dbe2d1313c046 to your computer and use it in GitHub Desktop.
Save danielml3/308d50570c00229f728dbe2d1313c046 to your computer and use it in GitHub Desktop.
#include <stdio.h>
#define BLOCKSIZE 8
#define UNROLL 8
const char* dgemm_desc = "dgemm using cache blocking and unrolling"; // Aqui puedes dar una pequeña descripcion de tu programa
void do_block (int n, int si, int sj, int sk,
float *A, float *B, float *C)
{
for ( int i = si; i < si+BLOCKSIZE; i++ )
for ( int j = sj; j < sj+BLOCKSIZE; j++)
{
float cij = C[i*n+j];
for( int k = sk; k < sk+BLOCKSIZE; k += UNROLL)
for (int x = 0; x < UNROLL; x++)
cij += A[(k+x)+i*n] * B[j+(k+x)*n]; /* cij += A[i][k]*B[k][j]*/
C[i*n+j] = cij; /*C[i][j] = cij*/
}
}
void square_dgemm (int n, float* A, float* B, float* C)
{
if ( n < BLOCKSIZE )
{
printf("matrix size should be larger than blocksize\n");
return;
}
for (int sj = 0; sj < n; sj += BLOCKSIZE * UNROLL)
for (int x = 0; x < UNROLL; x++)
for (int si = 0; si < n; si += BLOCKSIZE * UNROLL)
for (int y = 0; y < UNROLL; y++)
for (int sk = 0; sk < n; sk += BLOCKSIZE * UNROLL)
for (int z = 0; z < UNROLL; z++)
do_block(n, si + BLOCKSIZE * x, sj + BLOCKSIZE * y, sk + BLOCKSIZE * z, A, B, C);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment