Skip to content

Instantly share code, notes, and snippets.

@perdacherMartin
Created January 13, 2013 20:09
Show Gist options
  • Select an option

  • Save perdacherMartin/4525933 to your computer and use it in GitHub Desktop.

Select an option

Save perdacherMartin/4525933 to your computer and use it in GitHub Desktop.
Multiply of n*n-matrix with an n-vector * to distribute the vector, MPI_Scatterv is used (each process has rows of the vector) * to distribute the matrix, MPI_Scatterv is used (colum-wise decomposition) * to collect the results MPI_Reduce_Scatter is used
/*
Perdacher Martin
Multiply of n*n-matrix with an n-vector
* to distribute the vector, MPI_Scatterv is used (each process has rows of the vector)
* to distribute the matrix, MPI_Scatterv is used (colum-wise decomposition)
* to collect the results MPI_Reduce_Scatter is used
*/
#include <stdio.h>
#include <stdlib.h>
#include "mpi.h"
#define MASTER 0
#define REPETITIONS 10
#define MPI_SENDTYPE MPI_DOUBLE
typedef double T_datatype;
T_datatype* InitArray(long size, T_datatype value);
T_datatype* InitVector(long size, T_datatype value);
void DistributeVector(T_datatype *root_vector, T_datatype *local_vector, int myrank, int nprocs, long n, long chunksize);
void DistributeMatrix(T_datatype *root_matrix, T_datatype *local_matrix, int myrank, int nprocs, long n, long chunksize);
void CalculateAndReduceScatter(T_datatype *local_matrix, T_datatype *local_vector, T_datatype *resultvector, int myrank, int nprocs, long n, long chunksize);
int main(int argc, char *argv[]){
21int myrank, nprocs,columncount=0;
long n, chunksize, i, j;
double times[REPETITIONS];
double time, slowest,h,x,endresult;
T_datatype *root_matrix=NULL;
T_datatype *root_vector=NULL;
T_datatype *local_matrix=NULL, *local_vector=NULL;
T_datatype *resultvector=NULL;
MPI_Datatype MPI_SplitM;
MPI_Status status;
MPI_Request request;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
if ( argc != 2 ){
if ( myrank == MASTER ){
printf("Usage: mpiexec -n nodecount matVec2aa n\n");
printf("with n is the size of the matrix. (nodecount < n)\n\n");
}
return 1;
}
n = atol(argv[1]);
if ( n < nprocs ){
if ( myrank == MASTER ){
printf("Usage: mpiexec -n nodecount matVec2aa n\n");
printf("with n > nodecount)\n\n");
}
return 1;
}
if ( myrank == MASTER ){
root_matrix = InitArray(n, 1.0);
root_vector = InitVector(n, 1.0); // vector which is distributed with allgatherv to "vector"
}
resultvector = InitVector(n, 0.0);
columncount = n - ((int)n/nprocs ) * (nprocs - 1 ) ;
chunksize = (myrank == nprocs - 1 ) ? columncount : ( n / nprocs );
columncount = ( columncount > chunksize ) ? columncount : chunksize; // maximum width for the local_matrix
local_matrix = (T_datatype*) malloc (sizeof(T_datatype) * n * columncount);
local_vector = (T_datatype*) malloc (sizeof(T_datatype) * chunksize );
for ( i = 0 ; i < REPETITIONS ; ++i ){
MPI_Barrier(MPI_COMM_WORLD);
time = MPI_Wtime();
// measuring code
// distribute the vector across all processes
DistributeVector(root_vector, local_vector, myrank, nprocs, n, chunksize);
//
// // distribute the matrix across all processes
DistributeMatrix(root_matrix, local_matrix, myrank, nprocs, n, chunksize); // columndistribution of the matrix
// printf("rank:%d, matrix:%f\n", myrank, local_matrix[0][0]);
// calculate result at MASTER in the resultvector
CalculateAndReduceScatter(local_matrix, local_vector, resultvector, myrank, nprocs, n, chunksize);
time = MPI_Wtime() - time;
MPI_Barrier(MPI_COMM_WORLD);
MPI_Reduce(&time, &slowest, 1, MPI_DOUBLE, MPI_MAX, MASTER, MPI_COMM_WORLD);
if (myrank == MASTER) {
times[i]=slowest;
}
}
if ( myrank == MASTER ){
double min = times[0];
for (int i = 1; i < REPETITIONS ; i++) {
min = ( times[i] < min ) ? times[i] : min;
}
printf("%d;%ld;%f\n", nprocs, n, min);
}
// verification
// if ( myrank == 0 ){
// printf("Matrix:\n");
//
// for ( i = 0 ; i < n ; ++i ){
// for ( j = 0 ; j < n ; j++ ){
// printf("%f, ", root_matrix[i*n+j]);
// }
// printf("\n");
// }
// printf("\n\n");
//
// printf("Vector:\n");
// for ( i = 0 ; i < n ; ++i ){
// printf("%f\n", root_vector[i]);
// }
//
// printf("\n\n");
// printf("result:\n");
//
// for ( i=0 ; i < n ; ++i ){
// printf("%f\n", resultvector[i]);
// }
//
// }
//
// clean up an free memory
if ( myrank == MASTER ){
free(root_vector);
free(root_matrix);
}
free(resultvector);
free(local_vector);
free(local_matrix);
//
// // mpi-cleanup
MPI_Finalize();
}
// initializes a size*size - array
T_datatype* InitArray(long size, T_datatype value){
long i,j;
T_datatype *array = (T_datatype*) malloc( sizeof(T_datatype) * size * size);
if ( array == NULL ){
printf("Error allocating memory!\n\n");
MPI_Finalize();
exit(EXIT_FAILURE);
}
// initialisation of the array
for ( i=0l ; i < size ; ++i ){
for ( j=0l ; j < size ; ++j ){
//array[i][j] = i*size + j;
array[i*size + j] = j;
}
}
// returns a n*n-Array
return array;
}
T_datatype* InitVector(long size, T_datatype value){
long i;
T_datatype *vector = (T_datatype*) malloc (sizeof(T_datatype) * size );
if ( vector == NULL ){
printf("Error allocating memory!\n\n");
exit(EXIT_FAILURE);
}
for ( i = 0l ; i < size ; ++i ){
vector[i] = value;
}
return vector;
}
void DistributeMatrix(T_datatype *root_matrix, T_datatype *local_matrix, int myrank, int nprocs, long n, long chunksize){
// distribute the matrix using scatterv
int i,j, sendcounts[nprocs], displs[nprocs];
T_datatype *sendbuffer=NULL;
long temp;
MPI_Datatype MPI_coltype, MPI_coltype2;
int chunk, lastchunk;
chunk=n/nprocs;
lastchunk= n - (chunk * ( nprocs - 1) );
MPI_Type_vector(n, 1, n, MPI_SENDTYPE, &MPI_coltype2);
MPI_Type_create_resized( MPI_coltype2, 0, sizeof(T_datatype), &MPI_coltype);
MPI_Type_commit(&MPI_coltype);
// preparing sendcounts and displs for MPI_Scatterv
for ( i = 0 ; i < nprocs ; ++i ){
//sendcounts[i] = chunksize ;
if ( i == nprocs - 1 )
sendcounts[i] = lastchunk;
else
sendcounts[i] = chunk;
displs[i] = i*chunk;
}
sendbuffer = NULL;
if ( myrank == MASTER )
sendbuffer = &(root_matrix[0]);
MPI_Scatterv( sendbuffer, sendcounts, displs, MPI_coltype, &(local_matrix[0]), chunksize*n, MPI_SENDTYPE, MASTER, MPI_COMM_WORLD );
MPI_Type_free(&MPI_coltype);
}
// distribute the vector using mpi_scatterv
void DistributeVector(T_datatype *root_vector, T_datatype *local_vector, int myrank, int nprocs, long n, long chunksize){
int sendcounts[nprocs], displs[nprocs],i;
int chunk, lastchunk;
chunk=n/nprocs;
lastchunk= n - (chunk * ( nprocs - 1) );
sendcounts[0] = chunk;
for ( i = 0 ; i < nprocs ; i++ ){
sendcounts[i] = (i == nprocs - 1 ) ? lastchunk : chunk;
displs[i] = i*chunk;
}
if ( myrank == MASTER ){
MPI_Scatterv(&(root_vector[0]), sendcounts, displs, MPI_SENDTYPE, &local_vector[0], chunksize, MPI_SENDTYPE, MASTER, MPI_COMM_WORLD);
}else{
MPI_Scatterv(NULL, sendcounts, displs, MPI_SENDTYPE, &local_vector[0], chunksize, MPI_SENDTYPE, MASTER, MPI_COMM_WORLD);
}
}
void CalculateAndReduceScatter(T_datatype *local_matrix, T_datatype *local_vector, T_datatype *resultvector, int myrank, int nprocs, long n, long chunksize){
T_datatype *im_result; // intermediate result
int recvcounts[nprocs];
int chunk = n / nprocs;
// calculation for each node
im_result = InitVector(n, 0.0);
for ( int i = 0 ; i < nprocs ; ++i ){
recvcounts[i]=n;
}
for ( int i = 0 ; i < chunksize ; ++i ){
for ( int j = 0 ; j < n ; ++j ){
im_result[j] = im_result[j] + local_matrix[i*n+j] * local_vector[i];
}
}
MPI_Reduce_scatter(im_result, resultvector, recvcounts, MPI_SENDTYPE, MPI_SUM, MPI_COMM_WORLD);
free(im_result);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment