Created
April 17, 2016 14:26
-
-
Save morris821028/f1570c84817dc2edf8a9a93bbc82b086 to your computer and use it in GitHub Desktop.
Judge Girl - Fast Matrix Multiplication (OpenMP) duff's device solution
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include "matrix.h" | |
// #define DEBUG | |
#define UINT unsigned long | |
#define MAXN 2048 | |
void rand_gen(UINT c, int N, UINT A[][MAXN]) { | |
UINT x = 2, n = N*N; | |
for (int i = 0; i < N; i++) { | |
for (int j = 0; j < N; j++) { | |
x = (x * x + c + i + j)%n; | |
A[i][j] = x; | |
} | |
} | |
} | |
void print_matrix(int N, UINT A[][MAXN]) { | |
for (int i = 0; i < N; i++) { | |
fprintf(stderr, "["); | |
for (int j = 0; j < N; j++) | |
fprintf(stderr, " %u", A[i][j]); | |
fprintf(stderr, " ]\n"); | |
} | |
} | |
UINT hash(UINT x) { | |
return (x * 2654435761LU); | |
} | |
UINT signature(int N, UINT A[][MAXN]) { | |
UINT h = 0; | |
for (int i = 0; i < N; i++) { | |
for (int j = 0; j < N; j++) | |
h = hash(h + A[i][j]); | |
} | |
return h; | |
} | |
UINT A[MAXN][MAXN], B[MAXN][MAXN], C[MAXN][MAXN]; | |
int main() { | |
int N, S1, S2; | |
while (scanf("%d %d %d", &N, &S1, &S2) == 3) { | |
rand_gen(S1, N, A); | |
rand_gen(S2, N, B); | |
multiply(N, A, B, C); | |
#ifdef DEBUG | |
print_matrix(N, A); | |
print_matrix(N, B); | |
print_matrix(N, C); | |
#endif | |
printf("%u\n", signature(N, C)); | |
} | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "matrix.h" | |
#define LOOP_UNROLL 8 | |
void transpose(int N, UINT A[][2048]) { | |
UINT t; | |
for (int i = 0; i < N; i++) | |
for (int j = i+1; j < N; j++) | |
t = A[i][j], A[i][j] = A[j][i], A[j][i] = t; | |
} | |
void multiply(int N, UINT A[][2048], UINT B[][2048], UINT C[][2048]) { | |
transpose(N, B); | |
#pragma omp parallel for | |
for (int i = N-1; i >= 0; i--) { | |
for (int j = N-1; j >= 0; j--) { | |
register UINT sum = 0; | |
UINT *a = &A[i][0], *b = &B[j][0]; | |
int k = N; | |
switch (k % LOOP_UNROLL) { | |
case 0: do { sum += *a * *b, a++, b++; | |
case 7: sum += *a * *b, a++, b++; | |
case 6: sum += *a * *b, a++, b++; | |
case 5: sum += *a * *b, a++, b++; | |
case 4: sum += *a * *b, a++, b++; | |
case 3: sum += *a * *b, a++, b++; | |
case 2: sum += *a * *b, a++, b++; | |
case 1: sum += *a * *b, a++, b++; | |
} while ((k -= LOOP_UNROLL) > 0); | |
} | |
C[i][j] = sum; | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define UINT unsigned long | |
void multiply(int N, UINT A[][2048], UINT B[][2048], UINT C[][2048]); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment