Last active
December 13, 2015 20:09
-
-
Save perdacherMartin/4967972 to your computer and use it in GitHub Desktop.
MPI-C, jacobi iteration, row-wise decomposition with MPI_Scatterv
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| Perdacher Martin | |
| jacobi iteration, with row-wise decomposition | |
| * to distribute the matrix, MPI_Scatterv is used (row-wise decomposition) | |
| */ | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <math.h> | |
| #include "mpi.h" | |
| #define MASTER 0 | |
| #define REPETITIONS 10 | |
| #define MPI_SENDTYPE MPI_DOUBLE | |
| #define MAX_ITER 5000 | |
| typedef double T_datatype; | |
| T_datatype** InitializeMatrix(long const N); | |
| T_datatype** InitializeLocalMatrix(int nprocs, long const N); | |
| void DistributeMatrix(T_datatype **matrix, T_datatype **local_matrix, int const myrank, int const nprocs, long const n, long *chunksize); | |
| void IterateJacobi(T_datatype **v_local,int const myrank,int const nprocs,long const n,long const chunksize, double *diffnorm, int *iterations); | |
| void GatherMatrixAtMaster(T_datatype **matrix, T_datatype **v_local, int const myrank, int const nprocs,long const n, long const chunksize); | |
| int main(int argc, char* argv[]) { | |
| int myrank, nprocs,iterations; | |
| long chunksize,n; | |
| double time, slowest,times[REPETITIONS],diffnorm; | |
| T_datatype **v=NULL; | |
| T_datatype **v_local=NULL; | |
| MPI_Status recv_status; | |
| MPI_Request request; | |
| MPI_Init(&argc, &argv); | |
| MPI_Comm_rank(MPI_COMM_WORLD, &myrank); | |
| MPI_Comm_size(MPI_COMM_WORLD, &nprocs); | |
| if ( argc != 2 ){ | |
| if ( myrank == MASTER ){ | |
| printf("Usage: mpiexec -n nodecount jacobi n\n"); | |
| printf("with n is the size of the matrix. (nodecount < n)\n\n"); | |
| } | |
| MPI_Abort( MPI_COMM_WORLD, 1 ); | |
| return 1; | |
| } | |
| n = atol(argv[1]); | |
| if ( myrank == MASTER ){ | |
| v = InitializeMatrix(n); | |
| } | |
| v_local = InitializeLocalMatrix(nprocs,n); | |
| for ( int i = 0 ; i < REPETITIONS ; ++i ){ | |
| MPI_Barrier(MPI_COMM_WORLD); | |
| time = MPI_Wtime(); | |
| DistributeMatrix(v,v_local,myrank,nprocs,n,&chunksize); | |
| IterateJacobi(v_local,myrank,nprocs,n,chunksize, &diffnorm,&iterations); | |
| time = MPI_Wtime() - time; | |
| MPI_Barrier(MPI_COMM_WORLD); | |
| MPI_Reduce(&time, &slowest, 1, MPI_DOUBLE, MPI_MAX, MASTER, MPI_COMM_WORLD); | |
| if (myrank == MASTER) { | |
| times[i]=slowest; | |
| } | |
| } | |
| GatherMatrixAtMaster(v,v_local,myrank,nprocs,n,chunksize); | |
| // if ( myrank == 0 ){ | |
| // for ( int i=0 ; i < n ; i++ ){ | |
| // for ( int j=0 ; j < n ; j++ ){ | |
| // printf("%f, ", v[i][j]); | |
| // } | |
| // printf("\n"); | |
| // } | |
| // } | |
| if ( myrank == MASTER ){ | |
| double min = times[0]; | |
| for (int i = 1; i < REPETITIONS ; i++) { | |
| min = ( times[i] < min ) ? times[i] : min; | |
| } | |
| printf("%d;%ld;%f;%f;%d\n", nprocs, n, min, diffnorm,iterations); | |
| } | |
| if ( myrank == MASTER ){ | |
| free(v); | |
| } | |
| free(v_local); | |
| MPI_Finalize(); | |
| } | |
| T_datatype** InitializeMatrix(long const N){ | |
| T_datatype **array = (T_datatype**) malloc( sizeof(T_datatype *) * N); | |
| T_datatype *array_data = (T_datatype *)malloc(N * N *sizeof(T_datatype)); | |
| int i,j; | |
| if ( array == NULL || array_data == NULL ){ | |
| printf("Error allocating memory!\n\n"); | |
| exit(EXIT_FAILURE); | |
| } | |
| for ( i=0l ; i < N ; ++i ){ | |
| array[i] = &(array_data[i*N]); | |
| } | |
| // initialize all points with reasonable starting values for the potential | |
| for (i = 1; i < N; i++) | |
| for (j = 1; j < N; j++) | |
| array[i][j] = 51.0; | |
| // boundary conditions | |
| for (j = 0; j < N; j++) array[0][j] = 30; /// top | |
| for (j = 0; j < N; j++) array[N-1][j] = 70; /// bottom | |
| for (i = 0; i < N; i++) array[i][0] = 0; /// left | |
| for (i = 0; i < N; i++) array[i][N-1] = 100; /// right | |
| return array; | |
| } | |
| T_datatype** InitializeLocalMatrix(int nprocs, long const n){ | |
| long chunksize = ( n - ((long)n/nprocs ) * (nprocs - 1 ) ); | |
| chunksize += 2; // overlapping area | |
| T_datatype **local_matrix=NULL; | |
| T_datatype *lm_data=NULL; | |
| lm_data = (T_datatype*) malloc(sizeof(T_datatype) * chunksize * n); | |
| local_matrix = (T_datatype**) malloc (sizeof(T_datatype) * chunksize); | |
| for ( int i = 0 ; i < chunksize ; ++i ){ | |
| local_matrix[i] = &(lm_data[i*n]); | |
| } | |
| for ( int i = 0 ; i < chunksize ; ++i ){ | |
| for ( int j = 0 ; j < chunksize ; ++j ){ | |
| local_matrix[i][j] = 0; | |
| } | |
| } | |
| return local_matrix; | |
| } | |
| void DistributeMatrix(T_datatype **matrix, T_datatype **local_matrix, int const myrank, int const nprocs, long const n, long *chunksize){ | |
| int i,j, sendcounts[nprocs], displs[nprocs]; | |
| T_datatype *sendbuffer=NULL; | |
| long temp; | |
| MPI_Datatype MPI_rowtype; | |
| MPI_Type_contiguous(n, MPI_SENDTYPE, &MPI_rowtype); | |
| MPI_Type_commit(&MPI_rowtype); | |
| *chunksize = (long) n / nprocs; | |
| // preparing sendcounts and displs for MPI_Scatterv | |
| for ( i = 0 ; i < nprocs ; ++i ){ | |
| sendcounts[i] = ( i == nprocs - 1 ) ? *chunksize + ( n - ( *chunksize * nprocs ) ) :*chunksize ; | |
| displs[i] = *chunksize * i; | |
| sendcounts[i] += ( i == 0 || i == nprocs-1 ) ? 1 : 2 ; | |
| displs[i] -= (i == 0 ) ? 0 : 1; | |
| } | |
| *chunksize = sendcounts[myrank] ; | |
| sendbuffer = NULL; | |
| if ( myrank == MASTER ) | |
| sendbuffer = &(matrix[0][0]); | |
| if ( nprocs != 1 ){ | |
| MPI_Scatterv( sendbuffer, sendcounts, displs, MPI_rowtype, &(local_matrix[0][0]), *chunksize, MPI_rowtype, MASTER, MPI_COMM_WORLD ); | |
| }else{ | |
| local_matrix = matrix; | |
| } | |
| MPI_Type_free(&MPI_rowtype); | |
| } | |
| void IterateJacobi(T_datatype **v_local,int const myrank,int const nprocs,long const N,long const chunksize, double *diff2norm,int *iterations){ | |
| double diffnorm=0, gdiffnorm=0; | |
| int i,j; | |
| long iter=0; | |
| T_datatype **v_local_tmp=NULL; | |
| T_datatype *p_local_tmp=NULL,*p_local=NULL,*swap=NULL; | |
| MPI_Datatype MPI_rowtype; | |
| MPI_Status recv_status; | |
| MPI_Type_contiguous(N, MPI_SENDTYPE, &MPI_rowtype); | |
| MPI_Type_commit(&MPI_rowtype); | |
| v_local_tmp = InitializeLocalMatrix(nprocs,N); | |
| // // copy boundaries from v_local to v_local_tmp | |
| for ( i=0 ; i < N ; ++i ){ | |
| v_local_tmp[0][i] = v_local[0][i]; | |
| v_local_tmp[chunksize-1][i] = v_local[chunksize-1][i]; | |
| } | |
| for ( i=0 ; i < chunksize; ++i ){ | |
| v_local_tmp[i][0] = v_local[i][0]; | |
| v_local_tmp[i][N-1] = v_local[i][N-1]; | |
| } | |
| p_local = &v_local[0][0]; | |
| p_local_tmp = &v_local_tmp[0][0]; | |
| do | |
| { | |
| diffnorm = 0.0; | |
| // calculate new values | |
| for (i = 1; i < chunksize - 1 ; i++){ | |
| for (j = 1; j < N - 1 ; j++){ | |
| *(p_local_tmp+i*N+j) = 0.25 * ( *(p_local+(i-1)*N+j) + *(p_local+(i+1)*N+j) + *(p_local+(i*N)+j-1) + *(p_local+(i*N)+j+1)); | |
| diffnorm += ( *(p_local_tmp+i*N+j) - *(p_local+i*N+j) ) * | |
| ( *(p_local_tmp+i*N+j) - *(p_local+i*N+j) ); | |
| } | |
| } | |
| swap = p_local; | |
| p_local = p_local_tmp; | |
| p_local_tmp = swap; | |
| // synchronize | |
| // send and recv bottom | |
| if ( myrank != nprocs - 1 ) | |
| MPI_Sendrecv(p_local+(chunksize - 2)*N + 0 , 1, MPI_rowtype, myrank+1, myrank, p_local+(chunksize - 1) * N + 0, 1, MPI_rowtype,myrank+1,myrank,MPI_COMM_WORLD,&recv_status); | |
| // send and recv top | |
| if ( myrank != 0 ) | |
| MPI_Sendrecv(p_local+1*N + 0, 1, MPI_rowtype, myrank-1, myrank-1, p_local+0*N+0, 1, MPI_rowtype, myrank-1, myrank-1, MPI_COMM_WORLD, &recv_status); | |
| MPI_Allreduce( &diffnorm, &gdiffnorm, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD ); | |
| gdiffnorm = sqrt( gdiffnorm ); | |
| iter++; | |
| }while( gdiffnorm > 1.0e-2 && iter < MAX_ITER ); | |
| *diff2norm = gdiffnorm; | |
| *iterations = iter; | |
| // if ( myrank == 0 ){ | |
| // printf("iterations: %d, gdiffnorm:%f, diffnorm:%f\n", iterations, gdiffnorm, diffnorm); | |
| // for ( int i = 0 ; i < chunksize ; i++ ){ | |
| // for ( int j = 0 ; j < N ; j++ ){ | |
| // printf("%f, ", v_local[i][j]); | |
| // } | |
| // printf("\n"); | |
| // } | |
| // } | |
| // free v_local_tmp | |
| free(v_local_tmp); | |
| MPI_Type_free(&MPI_rowtype); | |
| } | |
| void GatherMatrixAtMaster(T_datatype **matrix, T_datatype **v_local, int const myrank, int const nprocs,long const n, long const chunksize){ | |
| int recvcounts[nprocs], displs[nprocs]; | |
| int i,chunk = n/nprocs; | |
| MPI_Datatype MPI_rowtype; | |
| MPI_Status recv_status; | |
| MPI_Type_contiguous(n, MPI_SENDTYPE, &MPI_rowtype); | |
| MPI_Type_commit(&MPI_rowtype); | |
| // gather matrix at MASTER node | |
| for ( i = 0 ; i < nprocs ; ++i ){ | |
| recvcounts[i] = ( i == nprocs - 1 ) ? chunk + ( n - ( chunk * nprocs ) ) : chunk ; | |
| displs[i] = i * chunk ; | |
| } | |
| if ( myrank == MASTER ){ | |
| MPI_Gatherv( &v_local[0][0], chunksize-1, MPI_rowtype, &matrix[0][0], recvcounts, displs, MPI_rowtype, MASTER, MPI_COMM_WORLD ); | |
| }else if ( myrank == nprocs - 1 ){ | |
| MPI_Gatherv( &v_local[1][0], chunksize-1, MPI_rowtype, NULL, recvcounts, displs, MPI_rowtype, MASTER, MPI_COMM_WORLD ); | |
| }else { | |
| MPI_Gatherv( &v_local[1][0], chunksize-2, MPI_rowtype, NULL, recvcounts, displs, MPI_rowtype, MASTER, MPI_COMM_WORLD ); | |
| } | |
| MPI_Type_free(&MPI_rowtype); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment