Created
January 13, 2013 20:09
-
-
Save perdacherMartin/4525933 to your computer and use it in GitHub Desktop.
Multiply of n*n-matrix with an n-vector * to distribute the vector, MPI_Scatterv is used (each process has rows of the vector)
* to distribute the matrix, MPI_Scatterv is used (colum-wise decomposition)
* to collect the results MPI_Reduce_Scatter is used
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| Perdacher Martin | |
| Multiply of n*n-matrix with an n-vector | |
| * to distribute the vector, MPI_Scatterv is used (each process has rows of the vector) | |
| * to distribute the matrix, MPI_Scatterv is used (colum-wise decomposition) | |
| * to collect the results MPI_Reduce_Scatter is used | |
| */ | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include "mpi.h" | |
| #define MASTER 0 | |
| #define REPETITIONS 10 | |
| #define MPI_SENDTYPE MPI_DOUBLE | |
| typedef double T_datatype; | |
| T_datatype* InitArray(long size, T_datatype value); | |
| T_datatype* InitVector(long size, T_datatype value); | |
| void DistributeVector(T_datatype *root_vector, T_datatype *local_vector, int myrank, int nprocs, long n, long chunksize); | |
| void DistributeMatrix(T_datatype *root_matrix, T_datatype *local_matrix, int myrank, int nprocs, long n, long chunksize); | |
| void CalculateAndReduceScatter(T_datatype *local_matrix, T_datatype *local_vector, T_datatype *resultvector, int myrank, int nprocs, long n, long chunksize); | |
| int main(int argc, char *argv[]){ | |
| 21int myrank, nprocs,columncount=0; | |
| long n, chunksize, i, j; | |
| double times[REPETITIONS]; | |
| double time, slowest,h,x,endresult; | |
| T_datatype *root_matrix=NULL; | |
| T_datatype *root_vector=NULL; | |
| T_datatype *local_matrix=NULL, *local_vector=NULL; | |
| T_datatype *resultvector=NULL; | |
| MPI_Datatype MPI_SplitM; | |
| MPI_Status status; | |
| MPI_Request request; | |
| MPI_Init(&argc, &argv); | |
| MPI_Comm_rank(MPI_COMM_WORLD, &myrank); | |
| MPI_Comm_size(MPI_COMM_WORLD, &nprocs); | |
| if ( argc != 2 ){ | |
| if ( myrank == MASTER ){ | |
| printf("Usage: mpiexec -n nodecount matVec2aa n\n"); | |
| printf("with n is the size of the matrix. (nodecount < n)\n\n"); | |
| } | |
| return 1; | |
| } | |
| n = atol(argv[1]); | |
| if ( n < nprocs ){ | |
| if ( myrank == MASTER ){ | |
| printf("Usage: mpiexec -n nodecount matVec2aa n\n"); | |
| printf("with n > nodecount)\n\n"); | |
| } | |
| return 1; | |
| } | |
| if ( myrank == MASTER ){ | |
| root_matrix = InitArray(n, 1.0); | |
| root_vector = InitVector(n, 1.0); // vector which is distributed with allgatherv to "vector" | |
| } | |
| resultvector = InitVector(n, 0.0); | |
| columncount = n - ((int)n/nprocs ) * (nprocs - 1 ) ; | |
| chunksize = (myrank == nprocs - 1 ) ? columncount : ( n / nprocs ); | |
| columncount = ( columncount > chunksize ) ? columncount : chunksize; // maximum width for the local_matrix | |
| local_matrix = (T_datatype*) malloc (sizeof(T_datatype) * n * columncount); | |
| local_vector = (T_datatype*) malloc (sizeof(T_datatype) * chunksize ); | |
| for ( i = 0 ; i < REPETITIONS ; ++i ){ | |
| MPI_Barrier(MPI_COMM_WORLD); | |
| time = MPI_Wtime(); | |
| // measuring code | |
| // distribute the vector across all processes | |
| DistributeVector(root_vector, local_vector, myrank, nprocs, n, chunksize); | |
| // | |
| // // distribute the matrix across all processes | |
| DistributeMatrix(root_matrix, local_matrix, myrank, nprocs, n, chunksize); // columndistribution of the matrix | |
| // printf("rank:%d, matrix:%f\n", myrank, local_matrix[0][0]); | |
| // calculate result at MASTER in the resultvector | |
| CalculateAndReduceScatter(local_matrix, local_vector, resultvector, myrank, nprocs, n, chunksize); | |
| time = MPI_Wtime() - time; | |
| MPI_Barrier(MPI_COMM_WORLD); | |
| MPI_Reduce(&time, &slowest, 1, MPI_DOUBLE, MPI_MAX, MASTER, MPI_COMM_WORLD); | |
| if (myrank == MASTER) { | |
| times[i]=slowest; | |
| } | |
| } | |
| if ( myrank == MASTER ){ | |
| double min = times[0]; | |
| for (int i = 1; i < REPETITIONS ; i++) { | |
| min = ( times[i] < min ) ? times[i] : min; | |
| } | |
| printf("%d;%ld;%f\n", nprocs, n, min); | |
| } | |
| // verification | |
| // if ( myrank == 0 ){ | |
| // printf("Matrix:\n"); | |
| // | |
| // for ( i = 0 ; i < n ; ++i ){ | |
| // for ( j = 0 ; j < n ; j++ ){ | |
| // printf("%f, ", root_matrix[i*n+j]); | |
| // } | |
| // printf("\n"); | |
| // } | |
| // printf("\n\n"); | |
| // | |
| // printf("Vector:\n"); | |
| // for ( i = 0 ; i < n ; ++i ){ | |
| // printf("%f\n", root_vector[i]); | |
| // } | |
| // | |
| // printf("\n\n"); | |
| // printf("result:\n"); | |
| // | |
| // for ( i=0 ; i < n ; ++i ){ | |
| // printf("%f\n", resultvector[i]); | |
| // } | |
| // | |
| // } | |
| // | |
| // clean up an free memory | |
| if ( myrank == MASTER ){ | |
| free(root_vector); | |
| free(root_matrix); | |
| } | |
| free(resultvector); | |
| free(local_vector); | |
| free(local_matrix); | |
| // | |
| // // mpi-cleanup | |
| MPI_Finalize(); | |
| } | |
| // initializes a size*size - array | |
| T_datatype* InitArray(long size, T_datatype value){ | |
| long i,j; | |
| T_datatype *array = (T_datatype*) malloc( sizeof(T_datatype) * size * size); | |
| if ( array == NULL ){ | |
| printf("Error allocating memory!\n\n"); | |
| MPI_Finalize(); | |
| exit(EXIT_FAILURE); | |
| } | |
| // initialisation of the array | |
| for ( i=0l ; i < size ; ++i ){ | |
| for ( j=0l ; j < size ; ++j ){ | |
| //array[i][j] = i*size + j; | |
| array[i*size + j] = j; | |
| } | |
| } | |
| // returns a n*n-Array | |
| return array; | |
| } | |
| T_datatype* InitVector(long size, T_datatype value){ | |
| long i; | |
| T_datatype *vector = (T_datatype*) malloc (sizeof(T_datatype) * size ); | |
| if ( vector == NULL ){ | |
| printf("Error allocating memory!\n\n"); | |
| exit(EXIT_FAILURE); | |
| } | |
| for ( i = 0l ; i < size ; ++i ){ | |
| vector[i] = value; | |
| } | |
| return vector; | |
| } | |
| void DistributeMatrix(T_datatype *root_matrix, T_datatype *local_matrix, int myrank, int nprocs, long n, long chunksize){ | |
| // distribute the matrix using scatterv | |
| int i,j, sendcounts[nprocs], displs[nprocs]; | |
| T_datatype *sendbuffer=NULL; | |
| long temp; | |
| MPI_Datatype MPI_coltype, MPI_coltype2; | |
| int chunk, lastchunk; | |
| chunk=n/nprocs; | |
| lastchunk= n - (chunk * ( nprocs - 1) ); | |
| MPI_Type_vector(n, 1, n, MPI_SENDTYPE, &MPI_coltype2); | |
| MPI_Type_create_resized( MPI_coltype2, 0, sizeof(T_datatype), &MPI_coltype); | |
| MPI_Type_commit(&MPI_coltype); | |
| // preparing sendcounts and displs for MPI_Scatterv | |
| for ( i = 0 ; i < nprocs ; ++i ){ | |
| //sendcounts[i] = chunksize ; | |
| if ( i == nprocs - 1 ) | |
| sendcounts[i] = lastchunk; | |
| else | |
| sendcounts[i] = chunk; | |
| displs[i] = i*chunk; | |
| } | |
| sendbuffer = NULL; | |
| if ( myrank == MASTER ) | |
| sendbuffer = &(root_matrix[0]); | |
| MPI_Scatterv( sendbuffer, sendcounts, displs, MPI_coltype, &(local_matrix[0]), chunksize*n, MPI_SENDTYPE, MASTER, MPI_COMM_WORLD ); | |
| MPI_Type_free(&MPI_coltype); | |
| } | |
| // distribute the vector using mpi_scatterv | |
| void DistributeVector(T_datatype *root_vector, T_datatype *local_vector, int myrank, int nprocs, long n, long chunksize){ | |
| int sendcounts[nprocs], displs[nprocs],i; | |
| int chunk, lastchunk; | |
| chunk=n/nprocs; | |
| lastchunk= n - (chunk * ( nprocs - 1) ); | |
| sendcounts[0] = chunk; | |
| for ( i = 0 ; i < nprocs ; i++ ){ | |
| sendcounts[i] = (i == nprocs - 1 ) ? lastchunk : chunk; | |
| displs[i] = i*chunk; | |
| } | |
| if ( myrank == MASTER ){ | |
| MPI_Scatterv(&(root_vector[0]), sendcounts, displs, MPI_SENDTYPE, &local_vector[0], chunksize, MPI_SENDTYPE, MASTER, MPI_COMM_WORLD); | |
| }else{ | |
| MPI_Scatterv(NULL, sendcounts, displs, MPI_SENDTYPE, &local_vector[0], chunksize, MPI_SENDTYPE, MASTER, MPI_COMM_WORLD); | |
| } | |
| } | |
| void CalculateAndReduceScatter(T_datatype *local_matrix, T_datatype *local_vector, T_datatype *resultvector, int myrank, int nprocs, long n, long chunksize){ | |
| T_datatype *im_result; // intermediate result | |
| int recvcounts[nprocs]; | |
| int chunk = n / nprocs; | |
| // calculation for each node | |
| im_result = InitVector(n, 0.0); | |
| for ( int i = 0 ; i < nprocs ; ++i ){ | |
| recvcounts[i]=n; | |
| } | |
| for ( int i = 0 ; i < chunksize ; ++i ){ | |
| for ( int j = 0 ; j < n ; ++j ){ | |
| im_result[j] = im_result[j] + local_matrix[i*n+j] * local_vector[i]; | |
| } | |
| } | |
| MPI_Reduce_scatter(im_result, resultvector, recvcounts, MPI_SENDTYPE, MPI_SUM, MPI_COMM_WORLD); | |
| free(im_result); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment