Created
December 15, 2016 10:39
-
-
Save Barakat/73a55f26f28fc75492b853f4db628a64 to your computer and use it in GitHub Desktop.
Cache optimized serial matrix multiplication
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <stdlib.h> | |
| #include <time.h> | |
| #include <stdio.h> | |
| #include <assert.h> | |
| #include <stdbool.h> | |
| typedef struct { | |
| size_t rows; | |
| size_t columns; | |
| float data[1]; | |
| } Matrix; | |
| #define GET_2D_FLAT_ADDRESS(data, columns, i, j) ((data) + (i) * (columns) + (j)) | |
| static inline float | |
| matrix_get_element(const Matrix *matrix, size_t i, size_t j) | |
| { | |
| return *GET_2D_FLAT_ADDRESS(matrix->data, matrix->columns, i, j); | |
| } | |
| static inline float | |
| matrix_set_element(Matrix *matrix, size_t i, size_t j, float value) | |
| { | |
| return *GET_2D_FLAT_ADDRESS(matrix->data, matrix->columns, i, j) = value; | |
| } | |
| #undef GET_2D_FLAT_ADDRESS | |
| Matrix * | |
| matrix_alloc(size_t rows, size_t columns) | |
| { | |
| size_t size = sizeof(Matrix) + (rows * columns - 1) * sizeof(float); | |
| Matrix *self = malloc(size); | |
| self->rows = rows; | |
| self->columns = columns; | |
| return self; | |
| } | |
| Matrix * | |
| matrix_new(size_t rows, size_t columns, const float *data) | |
| { | |
| Matrix *self = matrix_alloc(rows, columns); | |
| for (size_t i = 0 ; i < rows * columns ; ++i) | |
| { | |
| self->data[i] = data[i]; | |
| } | |
| return self; | |
| } | |
| Matrix * | |
| matrix_new_random(size_t rows, size_t columns) | |
| { | |
| Matrix *self = matrix_alloc(rows, columns); | |
| srand(time(NULL)); | |
| for (size_t i = 0 ; i < rows * columns ; ++i) | |
| { | |
| self->data[i] = (float)rand() / (float)RAND_MAX; | |
| } | |
| return self; | |
| } | |
| void | |
| matrix_dump(const Matrix *self, FILE *fp) | |
| { | |
| for (size_t i = 0 ; i < self->rows ; ++i) | |
| { | |
| fprintf(fp, "{"); | |
| for (size_t j = 0 ; self->columns > 1 && j < self->columns - 1 ; ++j) | |
| { | |
| fprintf(fp, "%f, ", matrix_get_element(self, i, j)); | |
| } | |
| fprintf(fp, "%f}\n", matrix_get_element(self, i, self->columns - 1)); | |
| } | |
| fprintf(fp, "\n"); | |
| } | |
| void | |
| matrix_destroy(Matrix *self) | |
| { | |
| free(self); | |
| } | |
| Matrix * | |
| matrix_transponse(const Matrix *self) | |
| { | |
| Matrix *transponsed = matrix_alloc(self->columns, self->rows); | |
| for (size_t i = 0; i < self->rows ; ++i) | |
| { | |
| for (size_t j = 0; j < self->columns ; ++j) | |
| { | |
| matrix_set_element(transponsed, j, i, matrix_get_element(self, i, j)); | |
| } | |
| } | |
| return transponsed; | |
| } | |
| Matrix * | |
| matrix_multiply_cache_optimized(const Matrix *self, const Matrix *other) | |
| { | |
| assert(self->columns == other->rows); | |
| Matrix *other_transponsed = matrix_transponse(other); | |
| Matrix *result = matrix_alloc(self->rows, other->columns); | |
| for (size_t i = 0 ; i < result->rows ; ++i) | |
| { | |
| for (size_t j = 0 ; j < result->columns ; ++j) | |
| { | |
| // BEGIN_PARALLEL (i, j) | |
| float value = 0.0f; | |
| for (size_t k = 0 ; k < other_transponsed->columns ; ++k) | |
| { | |
| value += matrix_get_element(self, i, k) * matrix_get_element(other_transponsed, j, k); | |
| } | |
| matrix_set_element(result, i, j, value); | |
| // END_PARALLEL | |
| } | |
| } | |
| matrix_destroy(other_transponsed); | |
| return result; | |
| } | |
| Matrix * | |
| matrix_multiply(const Matrix *self, const Matrix *other) | |
| { | |
| assert(self->columns == other->rows); | |
| Matrix *result = matrix_alloc(self->rows, other->columns); | |
| for (size_t i = 0 ; i < result->rows ; ++i) | |
| { | |
| for (size_t j = 0 ; j < result->columns ; ++j) | |
| { | |
| // BEGIN_PARALLEL (i, j) | |
| float value = 0.0f; | |
| for (size_t k = 0 ; k < other->rows ; ++k) | |
| { | |
| value += matrix_get_element(self, i, k) * matrix_get_element(other, k, j); | |
| } | |
| matrix_set_element(result, i, j, value); | |
| // END_PARALLEL | |
| } | |
| } | |
| return result; | |
| } | |
| #include <unistd.h> | |
| int main() | |
| { | |
| // {1.000000, 2.000000} | |
| // {3.000000, 4.000000} | |
| // | |
| // {5.000000, 6.000000, 7.000000, 8.000000} | |
| // {9.000000, 10.000000, 11.000000, 12.000000} | |
| // | |
| // {23.000000, 26.000000, 29.000000, 32.000000} | |
| // {51.000000, 58.000000, 65.000000, 72.000000} | |
| // static const float data1[2*2] = { | |
| // 1, 2, | |
| // 3, 4 | |
| // }; | |
| // | |
| // static const float data2[2*4] = { | |
| // 5, 6, 7, 8, | |
| // 9, 10, 11, 12 | |
| // }; | |
| // | |
| // Matrix* matrix1 = matrix_new(2, 2, data1); | |
| // Matrix* matrix2 = matrix_new(2, 4, data2); | |
| Matrix* matrix1 = matrix_new_random(2000, 2000); | |
| Matrix* matrix2 = matrix_new_random(2000, 2000); | |
| Matrix* result = matrix_multiply_cache_optimized(matrix1, matrix2); | |
| // matrix_dump(matrix1, stdout); | |
| // matrix_dump(matrix2, stdout); | |
| // matrix_dump(result, stdout); | |
| matrix_destroy(matrix1); | |
| matrix_destroy(matrix2); | |
| matrix_destroy(result); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment