Skip to content

Instantly share code, notes, and snippets.

@morris821028
Created April 17, 2016 14:26
Show Gist options
  • Save morris821028/f1570c84817dc2edf8a9a93bbc82b086 to your computer and use it in GitHub Desktop.
Save morris821028/f1570c84817dc2edf8a9a93bbc82b086 to your computer and use it in GitHub Desktop.
Judge Girl - Fast Matrix Multiplication (OpenMP) duff's device solution
#include <stdio.h>
#include "matrix.h"
// #define DEBUG
#define UINT unsigned long
#define MAXN 2048
void rand_gen(UINT c, int N, UINT A[][MAXN]) {
UINT x = 2, n = N*N;
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
x = (x * x + c + i + j)%n;
A[i][j] = x;
}
}
}
void print_matrix(int N, UINT A[][MAXN]) {
for (int i = 0; i < N; i++) {
fprintf(stderr, "[");
for (int j = 0; j < N; j++)
fprintf(stderr, " %u", A[i][j]);
fprintf(stderr, " ]\n");
}
}
UINT hash(UINT x) {
return (x * 2654435761LU);
}
UINT signature(int N, UINT A[][MAXN]) {
UINT h = 0;
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++)
h = hash(h + A[i][j]);
}
return h;
}
UINT A[MAXN][MAXN], B[MAXN][MAXN], C[MAXN][MAXN];
int main() {
int N, S1, S2;
while (scanf("%d %d %d", &N, &S1, &S2) == 3) {
rand_gen(S1, N, A);
rand_gen(S2, N, B);
multiply(N, A, B, C);
#ifdef DEBUG
print_matrix(N, A);
print_matrix(N, B);
print_matrix(N, C);
#endif
printf("%u\n", signature(N, C));
}
return 0;
}
#include "matrix.h"
#define LOOP_UNROLL 8
void transpose(int N, UINT A[][2048]) {
UINT t;
for (int i = 0; i < N; i++)
for (int j = i+1; j < N; j++)
t = A[i][j], A[i][j] = A[j][i], A[j][i] = t;
}
void multiply(int N, UINT A[][2048], UINT B[][2048], UINT C[][2048]) {
transpose(N, B);
#pragma omp parallel for
for (int i = N-1; i >= 0; i--) {
for (int j = N-1; j >= 0; j--) {
register UINT sum = 0;
UINT *a = &A[i][0], *b = &B[j][0];
int k = N;
switch (k % LOOP_UNROLL) {
case 0: do { sum += *a * *b, a++, b++;
case 7: sum += *a * *b, a++, b++;
case 6: sum += *a * *b, a++, b++;
case 5: sum += *a * *b, a++, b++;
case 4: sum += *a * *b, a++, b++;
case 3: sum += *a * *b, a++, b++;
case 2: sum += *a * *b, a++, b++;
case 1: sum += *a * *b, a++, b++;
} while ((k -= LOOP_UNROLL) > 0);
}
C[i][j] = sum;
}
}
}
#define UINT unsigned long
void multiply(int N, UINT A[][2048], UINT B[][2048], UINT C[][2048]);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment