First let's make some assumptions:
one base op (+, *, -, /) takes 1 unit of time
hypercube SF (ts, tw)
Code:
| if (0 == myid) { | |
| for (row = 0; row < n; row ++) { | |
| proc = row % nprocs; | |
| if (proc != 0) { | |
| MPI_Send(&A[row], N, ..., proc); | |
| MPI_Send(&B[row], N, ..., proc); | |
| } | |
| } | |
| } else { | |
| // Recv | |
| } | |
| for (norm = 0; norm < N - 1) { | |
| MPI_Bcase(&A[norm], ..., norm % nprocs); | |
| for (row = norm + 1, ... ) { | |
| if (myid = row % nprocs) { | |
| for (col = norm; col < N, col ++) { | |
| } | |
| } | |
| } | |
| } |