Skip to content

Instantly share code, notes, and snippets.

@Barakat
Created December 15, 2016 10:39
Show Gist options
  • Select an option

  • Save Barakat/73a55f26f28fc75492b853f4db628a64 to your computer and use it in GitHub Desktop.

Select an option

Save Barakat/73a55f26f28fc75492b853f4db628a64 to your computer and use it in GitHub Desktop.
Cache optimized serial matrix multiplication
#include <stdlib.h>
#include <time.h>
#include <stdio.h>
#include <assert.h>
#include <stdbool.h>
typedef struct {
size_t rows;
size_t columns;
float data[1];
} Matrix;
#define GET_2D_FLAT_ADDRESS(data, columns, i, j) ((data) + (i) * (columns) + (j))
static inline float
matrix_get_element(const Matrix *matrix, size_t i, size_t j)
{
return *GET_2D_FLAT_ADDRESS(matrix->data, matrix->columns, i, j);
}
static inline float
matrix_set_element(Matrix *matrix, size_t i, size_t j, float value)
{
return *GET_2D_FLAT_ADDRESS(matrix->data, matrix->columns, i, j) = value;
}
#undef GET_2D_FLAT_ADDRESS
Matrix *
matrix_alloc(size_t rows, size_t columns)
{
size_t size = sizeof(Matrix) + (rows * columns - 1) * sizeof(float);
Matrix *self = malloc(size);
self->rows = rows;
self->columns = columns;
return self;
}
Matrix *
matrix_new(size_t rows, size_t columns, const float *data)
{
Matrix *self = matrix_alloc(rows, columns);
for (size_t i = 0 ; i < rows * columns ; ++i)
{
self->data[i] = data[i];
}
return self;
}
Matrix *
matrix_new_random(size_t rows, size_t columns)
{
Matrix *self = matrix_alloc(rows, columns);
srand(time(NULL));
for (size_t i = 0 ; i < rows * columns ; ++i)
{
self->data[i] = (float)rand() / (float)RAND_MAX;
}
return self;
}
void
matrix_dump(const Matrix *self, FILE *fp)
{
for (size_t i = 0 ; i < self->rows ; ++i)
{
fprintf(fp, "{");
for (size_t j = 0 ; self->columns > 1 && j < self->columns - 1 ; ++j)
{
fprintf(fp, "%f, ", matrix_get_element(self, i, j));
}
fprintf(fp, "%f}\n", matrix_get_element(self, i, self->columns - 1));
}
fprintf(fp, "\n");
}
void
matrix_destroy(Matrix *self)
{
free(self);
}
Matrix *
matrix_transponse(const Matrix *self)
{
Matrix *transponsed = matrix_alloc(self->columns, self->rows);
for (size_t i = 0; i < self->rows ; ++i)
{
for (size_t j = 0; j < self->columns ; ++j)
{
matrix_set_element(transponsed, j, i, matrix_get_element(self, i, j));
}
}
return transponsed;
}
Matrix *
matrix_multiply_cache_optimized(const Matrix *self, const Matrix *other)
{
assert(self->columns == other->rows);
Matrix *other_transponsed = matrix_transponse(other);
Matrix *result = matrix_alloc(self->rows, other->columns);
for (size_t i = 0 ; i < result->rows ; ++i)
{
for (size_t j = 0 ; j < result->columns ; ++j)
{
// BEGIN_PARALLEL (i, j)
float value = 0.0f;
for (size_t k = 0 ; k < other_transponsed->columns ; ++k)
{
value += matrix_get_element(self, i, k) * matrix_get_element(other_transponsed, j, k);
}
matrix_set_element(result, i, j, value);
// END_PARALLEL
}
}
matrix_destroy(other_transponsed);
return result;
}
Matrix *
matrix_multiply(const Matrix *self, const Matrix *other)
{
assert(self->columns == other->rows);
Matrix *result = matrix_alloc(self->rows, other->columns);
for (size_t i = 0 ; i < result->rows ; ++i)
{
for (size_t j = 0 ; j < result->columns ; ++j)
{
// BEGIN_PARALLEL (i, j)
float value = 0.0f;
for (size_t k = 0 ; k < other->rows ; ++k)
{
value += matrix_get_element(self, i, k) * matrix_get_element(other, k, j);
}
matrix_set_element(result, i, j, value);
// END_PARALLEL
}
}
return result;
}
#include <unistd.h>
int main()
{
// {1.000000, 2.000000}
// {3.000000, 4.000000}
//
// {5.000000, 6.000000, 7.000000, 8.000000}
// {9.000000, 10.000000, 11.000000, 12.000000}
//
// {23.000000, 26.000000, 29.000000, 32.000000}
// {51.000000, 58.000000, 65.000000, 72.000000}
// static const float data1[2*2] = {
// 1, 2,
// 3, 4
// };
//
// static const float data2[2*4] = {
// 5, 6, 7, 8,
// 9, 10, 11, 12
// };
//
// Matrix* matrix1 = matrix_new(2, 2, data1);
// Matrix* matrix2 = matrix_new(2, 4, data2);
Matrix* matrix1 = matrix_new_random(2000, 2000);
Matrix* matrix2 = matrix_new_random(2000, 2000);
Matrix* result = matrix_multiply_cache_optimized(matrix1, matrix2);
// matrix_dump(matrix1, stdout);
// matrix_dump(matrix2, stdout);
// matrix_dump(result, stdout);
matrix_destroy(matrix1);
matrix_destroy(matrix2);
matrix_destroy(result);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment