Last active
March 22, 2017 12:07
-
-
Save cristipufu/50a0a864eecb8d3010cbdd54a1929cb0 to your computer and use it in GitHub Desktop.
Matrix multiply optimizations
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdio.h> | |
#include <sys/time.h> | |
double **new_matrix(int n) | |
{ | |
double **m; | |
double *buf; | |
buf = (double *) calloc(n * n, sizeof(double)); | |
int i; | |
m = (double **) calloc(n, sizeof(double *)); | |
for (i = 0; i < n; i++) | |
m[i] = buf + i * n; | |
return m; | |
} | |
void fill_matrix(double **a, double **b, double **c, int N) | |
{ | |
int i, j; | |
for (i = 0; i < N; i++) { | |
for (j = 0; j < N; j++) { | |
a[i][j] = rand() % 100; | |
b[i][j] = rand() % 100; | |
c[i][j] = 0; | |
} | |
} | |
} | |
void reset_result(double **c, int N) | |
{ | |
int i, j; | |
for (i = 0; i < N; i++) | |
for (j = 0; j < N; j++) | |
c[i][j]=0; | |
} | |
void print_time(char *msg, struct timeval start, struct timeval finish) | |
{ | |
double t = (finish.tv_sec - start.tv_sec) + (double)(finish.tv_usec - start.tv_usec) / 1000000.0; | |
printf("[Time = %lf] - %s\n", msg, t); | |
} | |
int main(int argc, char** argv){ | |
struct timeval start, finish; | |
double t; | |
int i, j, k, N = 1000; | |
double **a = new_matrix(N); | |
double **b = new_matrix(N); | |
double **c = new_matrix(N); | |
fill_matrix(a, b, c, N); | |
/* Basic */ | |
gettimeofday(&start, 0); | |
for (i = 0; i < N; i++) | |
{ | |
for (j = 0; j < N; j++) | |
{ | |
for (k = 0; k < N; k++) | |
{ | |
c[i][j] += a[i][k] * b[k][j]; | |
} | |
} | |
} | |
gettimeofday(&finish, 0); | |
print_time("Basic", start, finish); | |
/* Optimization 1 - Constants */ | |
reset_result(c, N); | |
gettimeofday(&start, 0); | |
for (i = 0; i < N; i++) | |
{ | |
for (j = 0; j < N; j++) | |
{ | |
register double suma = 0.0; | |
for (k = 0; k < N; k++) | |
{ | |
suma += a[i][k] * b[k][j]; | |
} | |
c[i][j] = suma; | |
} | |
} | |
gettimeofday(&finish, 0); | |
print_time("Optimization 1 - Constants", start, finish); | |
/* Optimization 2 - Vector access */ | |
reset_result(c, N); | |
gettimeofday(&start, 0); | |
for(i = 0; i < N; i++) | |
{ | |
double *orig_pa = &a[i][0]; | |
for(j = 0; j < N; j++) | |
{ | |
double *pa = orig_pa; | |
double *pb = &b[0][j]; | |
register double suma = 0; | |
for(k = 0; k < N; k++) | |
{ | |
suma += *pa * *pb; | |
pa++; | |
pb += N; | |
} | |
c[i][j] = suma; | |
} | |
} | |
gettimeofday(&finish, 0); | |
print_time("Optimization 2 - Vector access", start, finish); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdio.h> | |
#include <sys/time.h> | |
#include <stdbool.h> | |
double **new_matrix(int n) | |
{ | |
double **m; | |
double *buf; | |
buf = (double *) calloc(n * n, sizeof(double)); | |
int i; | |
m = (double **) calloc(n, sizeof(double *)); | |
for (i = 0; i < n; i++) | |
m[i] = buf + i * n; | |
return m; | |
} | |
void fill_matrix(double **a, double **b, double **c1, double **c2, int N) | |
{ | |
int i, j; | |
for (i = 0; i < N; i++) { | |
for (j = 0; j < N; j++) { | |
a[i][j] = rand() % 100; | |
b[i][j] = rand() % 100; | |
c1[i][j] = 0; | |
c2[i][j] = 0; | |
} | |
} | |
} | |
bool compare_matrix(double **c1, double **c2, int n) | |
{ | |
int i, j; | |
bool ok = true; | |
for (i = 0; i < n; i++) | |
{ | |
if (!ok) | |
break; | |
for (j = 0; j < n; j++) | |
{ | |
if (c1[i][j] != c2[i][j]) | |
{ | |
ok = false; | |
break; | |
} | |
} | |
} | |
return ok; | |
} | |
void print_time(char *msg, struct timeval start, struct timeval finish) | |
{ | |
double t = (finish.tv_sec - start.tv_sec) + (double)(finish.tv_usec - start.tv_usec) / 1000000.0; | |
printf("[Time = %lf] - %s\n", msg, t); | |
} | |
int main(int argc, char** argv) { | |
struct timeval start, finish; | |
double t; | |
int i, j, k, N = 1000; | |
int q = 4; | |
double **a = new_matrix(N); | |
double **b = new_matrix(N); | |
double **c1 = new_matrix(N); | |
double **c2 = new_matrix(N); | |
fill_matrix(a, b, c1, c2, N); | |
/* Basic */ | |
gettimeofday(&start, 0); | |
for (i = 0; i < N; i++) | |
{ | |
for (j = 0; j < N; j++) | |
{ | |
for (k = 0; k < N; k++) | |
{ | |
c1[i][j] += a[i][k] * b[k][j]; | |
} | |
} | |
} | |
gettimeofday(&finish, 0); | |
print_time("Basic", start, finish); | |
/* Cache optimization */ | |
gettimeofday(&start, 0); | |
int block = N / q; | |
int j0, k0; | |
for(j0 = 0; j0 < N; j0 += block) | |
{ | |
for(k0 = 0; k0 < N; k0 += block) | |
{ | |
for(i = 0; i < N; i++) | |
{ | |
for(j = j0; j < ((j0 + block) > N ? N : (j0 + block)); j++) | |
{ | |
register double suma = 0.0; | |
for(k = k0; k < ((k0 + block) > N ? N : (k0 + block)); k++) | |
{ | |
suma += a[i][k] * b[k][j]; | |
} | |
c2[i][j] += suma; | |
} | |
} | |
} | |
} | |
gettimeofday(&finish, 0); | |
print_time("Cache optimization", start, finish); | |
/* Check result c1 equals c2 */ | |
bool ok = compare_matrix(c1, c2, N); | |
if (ok) | |
printf("Result OK: C1 == C2 with b: %d\n", q); | |
else | |
printf("Result NOT ok: C1 != C2 with b: %d\n", q); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdio.h> | |
#include <sys/time.h> | |
double **new_matrix(int n) | |
{ | |
double **m; | |
double *buf; | |
buf = (double *) calloc(n * n, sizeof(double)); | |
int i; | |
m = (double **) calloc(n, sizeof(double *)); | |
for (i = 0; i < n; i++) | |
m[i] = buf + i * n; | |
return m; | |
} | |
void fill_matrix(double **a, double **b, double **c, int N) | |
{ | |
int i, j; | |
for (i = 0; i < N; i++) { | |
for (j = 0; j < N; j++) { | |
a[i][j] = rand() % 100; | |
b[i][j] = rand() % 100; | |
c[i][j] = 0; | |
} | |
} | |
} | |
void reset_result(double **c, int N) | |
{ | |
int i, j; | |
for (i = 0; i < N; i++) | |
for (j = 0; j < N; j++) | |
c[i][j]=0; | |
} | |
void print_time(char *msg, struct timeval start, struct timeval finish) | |
{ | |
double t = (finish.tv_sec - start.tv_sec) + (double)(finish.tv_usec - start.tv_usec) / 1000000.0; | |
printf("[Time = %lf] - %s\n", msg, t); | |
} | |
int main(int argc, char** argv){ | |
struct timeval start, finish; | |
double t; | |
int i, j, k, N = 1000; | |
double **a = new_matrix(N); | |
double **b = new_matrix(N); | |
double **c = new_matrix(N); | |
fill_matrix(a, b, c, N); | |
/* Loop order i - j - k */ | |
gettimeofday(&start, 0); | |
for (i = 0; i < N; i++) | |
{ | |
for (j = 0; j < N; j++) | |
{ | |
for (k = 0; k < N; k++) | |
{ | |
c[i][j] += a[i][k] * b[k][j]; | |
} | |
} | |
} | |
gettimeofday(&finish, 0); | |
print_time("[i - j - k]", start, finish); | |
/* Loop order i - k - j */ | |
reset_result(c, N); | |
gettimeofday(&start, 0); | |
for (i = 0; i < N; i++) | |
{ | |
for (j = 0; j < N; j++) | |
{ | |
for (k = 0; k < N; k++) | |
{ | |
c[i][k] += a[i][j] * b[k][j]; | |
} | |
} | |
} | |
gettimeofday(&finish, 0); | |
print_time("[i - k - j]", start, finish); | |
/* Loop order k - j - i */ | |
reset_result(c, N); | |
gettimeofday(&start, 0); | |
for (i = 0; i < N; i++) | |
{ | |
for (j = 0; j < N; j++) | |
{ | |
for (k = 0; k < N; k++) | |
{ | |
c[k][j] += a[i][k] * b[i][j]; | |
} | |
} | |
} | |
gettimeofday(&finish, 0); | |
print_time("[k - j - i]", start, finish); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment