pi@raspberrypi:~/cache_test $ ./a.out
exec time normal: 42.615200sec
allsum = 134217728
exec time block: 25.836692sec
allsum = 134217728
Last active
August 16, 2018 16:23
-
-
Save tnishinaga/dc66d45d773f1b8d0e2bda335fe0e44b to your computer and use it in GitHub Desktop.
Raspberry Pi zero上で行列積を計算するコード。
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
#include <stdlib.h> | |
#include <stdio.h> | |
#include <time.h> | |
#define MAT_A_ROW 512 | |
#define MAT_A_COL 512 | |
#define MAT_B_ROW 512 | |
#define MAT_B_COL 512 | |
uint32_t *matrix_create(uint32_t row, uint32_t col) | |
{ | |
if ((uint64_t)INT32_MAX <= ((uint64_t)row * (uint64_t)col)) { | |
return NULL; | |
} | |
uint32_t *matrix = NULL; | |
matrix = malloc(sizeof(uint32_t) * (row * col)); | |
if (matrix == NULL) { | |
return NULL; | |
} | |
for(uint32_t i = 0; i < (row * col); i++) { | |
// matrix[i] = i + 1; | |
matrix[i] = 1; | |
} | |
return matrix; | |
} | |
static inline uint32_t mat_idx(uint32_t row, uint32_t col, uint32_t col_length) | |
{ | |
return row * col_length + col; | |
} | |
uint32_t *matrix_dot_normal( | |
int32_t *matA, uint32_t matA_row, uint32_t matA_col, | |
int32_t *matB, uint32_t matB_row, uint32_t matB_col | |
) | |
{ | |
if (matA_col != matB_row) { | |
// error | |
return NULL; | |
} | |
uint32_t *matC = NULL; | |
uint32_t matC_row = matA_row; | |
uint32_t matC_col = matB_col; | |
matC = malloc(sizeof(uint32_t) * (matC_row * matC_col)); | |
if (matC == NULL) { | |
return NULL; | |
} | |
for (uint32_t i = 0; i < matC_row; i++) { | |
for (uint32_t j = 0; j < matC_col; j++) { | |
matC[mat_idx(i, j, matC_col)] = 0; | |
} | |
} | |
for (uint32_t i = 0; i < matC_row; i++) { | |
for (uint32_t j = 0; j < matC_col; j++) { | |
for (uint32_t k = 0; k < matA_col; k++) { | |
matC[mat_idx(i, j, matC_col)] += matA[mat_idx(i, k, matA_col)] * matB[mat_idx(k, j, matB_col)]; | |
// printf("%d = %d x %d\n", matA[mat_idx(i, k, matA_col)] * matB[mat_idx(k, j, matB_col)], matA[mat_idx(i, k, matA_col)], matB[mat_idx(k, j, matB_col)]); | |
// printf("C[%d][%d] = %d\n", i, j, matC[mat_idx(i, j, matC_col)]); | |
} | |
} | |
} | |
return matC; | |
} | |
void matrix_print(uint32_t *mat, uint32_t row, uint32_t col) | |
{ | |
for (uint32_t i = 0; i < row; i++) { | |
for (uint32_t j = 0; j < col; j++) { | |
printf("%ld ", mat[mat_idx(i, j, col)]); | |
} | |
printf("\n"); | |
} | |
} | |
uint64_t matrix_allsum(uint32_t *mat, uint32_t row, uint32_t col) | |
{ | |
uint64_t sum = 0; | |
for (uint32_t i = 0; i < row; i++) { | |
for (uint32_t j = 0; j < col; j++) { | |
sum += (uint64_t)mat[mat_idx(i, j, col)]; | |
} | |
} | |
return sum; | |
} | |
uint32_t *matrix_dot_block( | |
int32_t *matA, uint32_t matA_row, uint32_t matA_col, | |
int32_t *matB, uint32_t matB_row, uint32_t matB_col | |
) | |
{ | |
if (matA_col != matB_row) { | |
// error | |
return NULL; | |
} | |
uint32_t *matC = NULL; | |
uint32_t matC_row = matA_row; | |
uint32_t matC_col = matB_col; | |
matC = malloc(sizeof(uint32_t) * (matC_row * matC_col)); | |
if (matC == NULL) { | |
return NULL; | |
} | |
for (uint32_t i = 0; i < matC_row; i++) { | |
for (uint32_t j = 0; j < matC_col; j++) { | |
matC[mat_idx(i, j, matC_col)] = 0; | |
} | |
} | |
uint32_t bsize = 16; | |
// 外側ブロック移動 | |
for (uint32_t ib = 0; ib < matC_row; ib += bsize) { | |
for (uint32_t jb = 0; jb < matC_col; jb += bsize) { | |
for (uint32_t kb = 0; kb < matA_col; kb += bsize) { | |
// ブロック内計算 | |
for (uint32_t i = ib; i < (ib + bsize); i++) { | |
for (uint32_t j = jb; j < (jb + bsize); j++) { | |
for (uint32_t k = kb; k < (kb + bsize); k++) { | |
matC[mat_idx(i, j, matC_col)] += matA[mat_idx(i, k, matA_col)] * matB[mat_idx(k, j, matB_col)]; | |
// printf("%d = %d x %d\n", matA[mat_idx(i, k, matA_col)] * matB[mat_idx(k, j, matB_col)], matA[mat_idx(i, k, matA_col)], matB[mat_idx(k, j, matB_col)]); | |
// printf("C[%d][%d] = %d\n", i, j, matC[mat_idx(i, j, matC_col)]); | |
} | |
} | |
} | |
} | |
} | |
} | |
return matC; | |
} | |
int main(void) | |
{ | |
uint32_t *matA = matrix_create(MAT_A_ROW, MAT_A_COL); | |
uint32_t *matB = matrix_create(MAT_B_ROW, MAT_B_COL); | |
if (matA == NULL || matB == NULL) { | |
printf("matA/B is NULL\n"); | |
return -1; | |
} | |
clock_t normal_start = clock(); | |
uint32_t *matC_normal = matrix_dot_normal(matA, MAT_A_ROW, MAT_A_COL, matB, MAT_B_ROW, MAT_B_COL); | |
clock_t normal_end = clock(); | |
if (matC_normal == NULL) { | |
printf("matC is NULL\n"); | |
return -1; | |
} | |
printf("exec time normal: %fsec\n", (double)(normal_end - normal_start) / CLOCKS_PER_SEC); | |
uint64_t sum = matrix_allsum(matC_normal, MAT_A_ROW, MAT_B_COL); | |
printf("allsum = %lld\n", sum); | |
// clear cache | |
clock_t block_start = clock(); | |
uint32_t *matC_block = matrix_dot_block(matA, MAT_A_ROW, MAT_A_COL, matB, MAT_B_ROW, MAT_B_COL); | |
clock_t block_end = clock(); | |
if (matC_block == NULL) { | |
printf("matC is NULL\n"); | |
return -1; | |
} | |
printf("exec time block: %fsec\n", (double)(block_end - block_start) / CLOCKS_PER_SEC); | |
uint64_t sum_block = matrix_allsum(matC_block, MAT_A_ROW, MAT_B_COL); | |
printf("allsum = %lld\n", sum_block); | |
free(matA); | |
free(matB); | |
free(matC_normal); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment