perdacherMartin · January 13, 2013 20:09
diff --git a/matVec2ab.c b/matVec2ab.c
 /*
 Perdacher Martin
 Multiply of n*n-matrix with an n-vector

 * to distribute the vector, MPI_Scatterv is used (each process has rows of the vector)
 * to distribute the matrix, MPI_Scatterv is used (colum-wise decomposition)
 * to collect the results MPI_Reduce_Scatter is used

 */

 #include <stdio.h>
 #include <stdlib.h>
 #include "mpi.h"

 #define MASTER 0
 #define REPETITIONS 10
 #define MPI_SENDTYPE MPI_DOUBLE

 typedef double T_datatype;

 T_datatype* InitArray(long size, T_datatype value);
 T_datatype* InitVector(long size, T_datatype value);
 void DistributeVector(T_datatype *root_vector, T_datatype *local_vector, int myrank, int nprocs, long n, long chunksize);
 void DistributeMatrix(T_datatype *root_matrix, T_datatype *local_matrix, int myrank, int nprocs, long n, long chunksize);
 void CalculateAndReduceScatter(T_datatype *local_matrix, T_datatype *local_vector, T_datatype *resultvector, int myrank, int nprocs, long n, long chunksize);

 int main(int argc, char *argv[]){
 21int myrank, nprocs,columncount=0; 
 	long n, chunksize, i, j;
 	double times[REPETITIONS];
 	double time, slowest,h,x,endresult;
 	T_datatype *root_matrix=NULL;
 	T_datatype *root_vector=NULL;
 	T_datatype *local_matrix=NULL, *local_vector=NULL;
 	T_datatype *resultvector=NULL;
 	MPI_Datatype MPI_SplitM;
 	
  	MPI_Status status;
 	MPI_Request request;
 	
  	MPI_Init(&argc, &argv);
  	MPI_Comm_rank(MPI_COMM_WORLD, &myrank);  
  	MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
  
 	if ( argc != 2 ){
 		if ( myrank == MASTER ){
 	    	printf("Usage: mpiexec -n nodecount matVec2aa n\n");
 			printf("with n is the size of the matrix. (nodecount < n)\n\n");			
 		}
 		return 1;
    }
 	
 	n = atol(argv[1]);
 	
 	if ( n < nprocs ){
 		if ( myrank == MASTER ){
 	    	printf("Usage: mpiexec -n nodecount matVec2aa n\n");
 			printf("with n > nodecount)\n\n");
 		}
 		return 1;
 	}
 	
 	if ( myrank == MASTER ){
 		root_matrix = InitArray(n, 1.0);
 		root_vector = InitVector(n, 1.0); // vector which is distributed with allgatherv to "vector"		
 	}
 	resultvector = InitVector(n, 0.0);
 	
 	columncount =  n - ((int)n/nprocs ) * (nprocs - 1 ) ; 
 	chunksize = (myrank == nprocs - 1 ) ? columncount : ( n / nprocs );
 	columncount = ( columncount > chunksize ) ? columncount : chunksize; // maximum width for the local_matrix
 	
 	local_matrix = (T_datatype*) malloc (sizeof(T_datatype) * n * columncount);

 	local_vector = (T_datatype*) malloc (sizeof(T_datatype) * chunksize ); 
 	
 	for ( i = 0 ; i < REPETITIONS ; ++i ){

 		MPI_Barrier(MPI_COMM_WORLD);
 		time = MPI_Wtime();	

 // measuring code
 		
 		// distribute the vector across all processes
 		DistributeVector(root_vector, local_vector, myrank, nprocs, n, chunksize);
 		
 //		
 //		// distribute the matrix across all processes
 		DistributeMatrix(root_matrix, local_matrix, myrank, nprocs, n, chunksize); // columndistribution of the matrix

 //		printf("rank:%d, matrix:%f\n", myrank, local_matrix[0][0]);
 		// calculate result at MASTER in the resultvector
 		CalculateAndReduceScatter(local_matrix, local_vector, resultvector, myrank, nprocs, n, chunksize);
 		
 		
 		time = MPI_Wtime() - time;
 		MPI_Barrier(MPI_COMM_WORLD); 
 		MPI_Reduce(&time, &slowest, 1, MPI_DOUBLE, MPI_MAX, MASTER, MPI_COMM_WORLD);
 		
 		if (myrank == MASTER) {
 	    	times[i]=slowest;
 	   	}
 	}
 	
 	
 	if ( myrank == MASTER ){
 		double min = times[0];
 	   	for (int i = 1; i < REPETITIONS ; i++) {
 			min = ( times[i] < min ) ? times[i] : min;
 	   	}
 		printf("%d;%ld;%f\n", nprocs, n, min); 
 	}

 // verification	
 //	if ( myrank == 0 ){
 //  	printf("Matrix:\n");
 //  
 // 		for ( i = 0 ; i < n ; ++i ){
 //  			for ( j = 0 ; j < n ; j++ ){
 //  				printf("%f, ", root_matrix[i*n+j]);	
 //  			}
 //  			printf("\n");
 //  		}
 //  		printf("\n\n");
 //  	
 //		printf("Vector:\n");
 //		for ( i = 0 ; i < n ; ++i ){
 //			printf("%f\n", root_vector[i]);
 //		}
 //	
 //  	printf("\n\n");
 //  	printf("result:\n");
 //  	
 //  	for ( i=0 ; i < n ; ++i ){
 //  		printf("%f\n", resultvector[i]);
 //  	}
 //  	
 //	}
 //
 	
 	// clean up an free memory
 	if ( myrank == MASTER ){
 		free(root_vector);
 		free(root_matrix);
 	}	
 	free(resultvector);
 	free(local_vector);
 	free(local_matrix);
 //	
 //	// mpi-cleanup
  	MPI_Finalize();	
 	
 }

 // initializes a size*size - array
 T_datatype* InitArray(long size, T_datatype value){
 	long i,j;
 	T_datatype *array = (T_datatype*) malloc( sizeof(T_datatype) * size * size);
 	
 	if ( array == NULL ){
 		printf("Error allocating memory!\n\n");
 		MPI_Finalize();
 		exit(EXIT_FAILURE);
 	}

 	// initialisation of the array
 	for ( i=0l ; i < size ; ++i ){
 		for ( j=0l ; j < size ; ++j ){
 			//array[i][j] = i*size + j;	
 			array[i*size + j] = j;
 		}
 	}
 	
 	// returns a n*n-Array
 	return array; 
 }

 T_datatype*  InitVector(long size, T_datatype value){
 	long i;
 	T_datatype *vector = (T_datatype*) malloc (sizeof(T_datatype) * size );
 	
 	if ( vector == NULL ){
 		printf("Error allocating memory!\n\n");
 		exit(EXIT_FAILURE);
 	}
 	for ( i = 0l ; i < size ; ++i ){
 		vector[i] = value;
 	}

 	return vector;
 }

 void DistributeMatrix(T_datatype *root_matrix, T_datatype *local_matrix, int myrank, int nprocs, long n, long chunksize){
 // distribute the matrix using scatterv
 	int i,j, sendcounts[nprocs], displs[nprocs];
 	T_datatype *sendbuffer=NULL;
 	long temp;
 	MPI_Datatype MPI_coltype, MPI_coltype2; 
 	int chunk, lastchunk;
 	
 	chunk=n/nprocs;
 	lastchunk= n - (chunk * ( nprocs - 1) );

 	MPI_Type_vector(n, 1, n, MPI_SENDTYPE, &MPI_coltype2);
    MPI_Type_create_resized( MPI_coltype2, 0, sizeof(T_datatype), &MPI_coltype);
 	MPI_Type_commit(&MPI_coltype);

 	// preparing sendcounts and displs for MPI_Scatterv
 	for ( i = 0 ; i < nprocs ; ++i ){
 		//sendcounts[i] = chunksize ;
 		if ( i == nprocs - 1 )
 			sendcounts[i] = lastchunk;
 		else
 			sendcounts[i] = chunk;
 			
 		displs[i] = i*chunk;
 	}
 	
 	sendbuffer = NULL;
 	if ( myrank == MASTER )
 		sendbuffer = &(root_matrix[0]);
 	
 	MPI_Scatterv( sendbuffer, sendcounts, displs, MPI_coltype, &(local_matrix[0]), chunksize*n, MPI_SENDTYPE, MASTER, MPI_COMM_WORLD );		

 	MPI_Type_free(&MPI_coltype);
 	
 }

 // distribute the vector using mpi_scatterv
 void  DistributeVector(T_datatype *root_vector, T_datatype *local_vector, int myrank, int nprocs, long n, long chunksize){
 	int sendcounts[nprocs], displs[nprocs],i;
 	int chunk, lastchunk;
 	
 	chunk=n/nprocs;
 	lastchunk= n - (chunk * ( nprocs - 1) );
 	sendcounts[0] = chunk;


 	for ( i = 0 ; i < nprocs ; i++ ){
 		sendcounts[i] = (i == nprocs - 1 ) ? lastchunk : chunk;
 		displs[i] = i*chunk;
 	}
 	
 	if ( myrank == MASTER ){
 		MPI_Scatterv(&(root_vector[0]), sendcounts, displs, MPI_SENDTYPE, &local_vector[0], chunksize, MPI_SENDTYPE, MASTER, MPI_COMM_WORLD);				
 	}else{
 		MPI_Scatterv(NULL, sendcounts, displs, MPI_SENDTYPE, &local_vector[0], chunksize, MPI_SENDTYPE, MASTER, MPI_COMM_WORLD);		
 	}
 	
 }

 void CalculateAndReduceScatter(T_datatype *local_matrix, T_datatype *local_vector, T_datatype *resultvector, int myrank, int nprocs, long n, long chunksize){
 	T_datatype *im_result; // intermediate result
 	int recvcounts[nprocs];
 	int chunk = n / nprocs;
 	
 // calculation for each node
 	im_result = InitVector(n, 0.0);
 	for ( int i = 0 ; i < nprocs ; ++i ){
 		recvcounts[i]=n;
 	}
 	
 	for ( int i = 0 ; i < chunksize ; ++i ){
 		for ( int j = 0 ; j < n ; ++j ){
 			im_result[j] = im_result[j] + local_matrix[i*n+j] * local_vector[i];
 		}
 	}		
 	
 	MPI_Reduce_scatter(im_result, resultvector, recvcounts, MPI_SENDTYPE, MPI_SUM, MPI_COMM_WORLD);
 	
 	free(im_result);
 }
	/*
	Perdacher Martin
	Multiply of n*n-matrix with an n-vector

	* to distribute the vector, MPI_Scatterv is used (each process has rows of the vector)
	* to distribute the matrix, MPI_Scatterv is used (colum-wise decomposition)
	* to collect the results MPI_Reduce_Scatter is used

	*/

	#include <stdio.h>
	#include <stdlib.h>
	#include "mpi.h"

	#define MASTER 0
	#define REPETITIONS 10
	#define MPI_SENDTYPE MPI_DOUBLE

	typedef double T_datatype;

	T_datatype* InitArray(long size, T_datatype value);
	T_datatype* InitVector(long size, T_datatype value);
	void DistributeVector(T_datatype root_vector, T_datatype local_vector, int myrank, int nprocs, long n, long chunksize);
	void DistributeMatrix(T_datatype root_matrix, T_datatype local_matrix, int myrank, int nprocs, long n, long chunksize);
	void CalculateAndReduceScatter(T_datatype local_matrix, T_datatype local_vector, T_datatype *resultvector, int myrank, int nprocs, long n, long chunksize);

	int main(int argc, char *argv[]){
	21int myrank, nprocs,columncount=0;
	long n, chunksize, i, j;
	double times[REPETITIONS];
	double time, slowest,h,x,endresult;
	T_datatype *root_matrix=NULL;
	T_datatype *root_vector=NULL;
	T_datatype local_matrix=NULL, local_vector=NULL;
	T_datatype *resultvector=NULL;
	MPI_Datatype MPI_SplitM;

	MPI_Status status;
	MPI_Request request;

	MPI_Init(&argc, &argv);
	MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
	MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

	if ( argc != 2 ){
	if ( myrank == MASTER ){
	printf("Usage: mpiexec -n nodecount matVec2aa n\n");
	printf("with n is the size of the matrix. (nodecount < n)\n\n");
	}
	return 1;
	}

	n = atol(argv[1]);

	if ( n < nprocs ){
	if ( myrank == MASTER ){
	printf("Usage: mpiexec -n nodecount matVec2aa n\n");
	printf("with n > nodecount)\n\n");
	}
	return 1;
	}

	if ( myrank == MASTER ){
	root_matrix = InitArray(n, 1.0);
	root_vector = InitVector(n, 1.0); // vector which is distributed with allgatherv to "vector"
	}
	resultvector = InitVector(n, 0.0);

	columncount = n - ((int)n/nprocs ) * (nprocs - 1 ) ;
	chunksize = (myrank == nprocs - 1 ) ? columncount : ( n / nprocs );
	columncount = ( columncount > chunksize ) ? columncount : chunksize; // maximum width for the local_matrix

	local_matrix = (T_datatype) malloc (sizeof(T_datatype) n * columncount);

	local_vector = (T_datatype) malloc (sizeof(T_datatype) chunksize );

	for ( i = 0 ; i < REPETITIONS ; ++i ){

	MPI_Barrier(MPI_COMM_WORLD);
	time = MPI_Wtime();

	// measuring code

	// distribute the vector across all processes
	DistributeVector(root_vector, local_vector, myrank, nprocs, n, chunksize);

	//
	// // distribute the matrix across all processes
	DistributeMatrix(root_matrix, local_matrix, myrank, nprocs, n, chunksize); // columndistribution of the matrix

	// printf("rank:%d, matrix:%f\n", myrank, local_matrix[0][0]);
	// calculate result at MASTER in the resultvector
	CalculateAndReduceScatter(local_matrix, local_vector, resultvector, myrank, nprocs, n, chunksize);


	time = MPI_Wtime() - time;
	MPI_Barrier(MPI_COMM_WORLD);
	MPI_Reduce(&time, &slowest, 1, MPI_DOUBLE, MPI_MAX, MASTER, MPI_COMM_WORLD);

	if (myrank == MASTER) {
	times[i]=slowest;
	}
	}


	if ( myrank == MASTER ){
	double min = times[0];
	for (int i = 1; i < REPETITIONS ; i++) {
	min = ( times[i] < min ) ? times[i] : min;
	}
	printf("%d;%ld;%f\n", nprocs, n, min);
	}

	// verification
	// if ( myrank == 0 ){
	// printf("Matrix:\n");
	//
	// for ( i = 0 ; i < n ; ++i ){
	// for ( j = 0 ; j < n ; j++ ){
	// printf("%f, ", root_matrix[i*n+j]);
	// }
	// printf("\n");
	// }
	// printf("\n\n");
	//
	// printf("Vector:\n");
	// for ( i = 0 ; i < n ; ++i ){
	// printf("%f\n", root_vector[i]);
	// }
	//
	// printf("\n\n");
	// printf("result:\n");
	//
	// for ( i=0 ; i < n ; ++i ){
	// printf("%f\n", resultvector[i]);
	// }
	//
	// }
	//

	// clean up an free memory
	if ( myrank == MASTER ){
	free(root_vector);
	free(root_matrix);
	}
	free(resultvector);
	free(local_vector);
	free(local_matrix);
	//
	// // mpi-cleanup
	MPI_Finalize();

	}

	// initializes a size*size - array
	T_datatype* InitArray(long size, T_datatype value){
	long i,j;
	T_datatype array = (T_datatype) malloc( sizeof(T_datatype) * size * size);

	if ( array == NULL ){
	printf("Error allocating memory!\n\n");
	MPI_Finalize();
	exit(EXIT_FAILURE);
	}

	// initialisation of the array
	for ( i=0l ; i < size ; ++i ){
	for ( j=0l ; j < size ; ++j ){
	//array[i][j] = i*size + j;
	array[i*size + j] = j;
	}
	}

	// returns a n*n-Array
	return array;
	}

	T_datatype* InitVector(long size, T_datatype value){
	long i;
	T_datatype vector = (T_datatype) malloc (sizeof(T_datatype) * size );

	if ( vector == NULL ){
	printf("Error allocating memory!\n\n");
	exit(EXIT_FAILURE);
	}
	for ( i = 0l ; i < size ; ++i ){
	vector[i] = value;
	}

	return vector;
	}

	void DistributeMatrix(T_datatype root_matrix, T_datatype local_matrix, int myrank, int nprocs, long n, long chunksize){
	// distribute the matrix using scatterv
	int i,j, sendcounts[nprocs], displs[nprocs];
	T_datatype *sendbuffer=NULL;
	long temp;
	MPI_Datatype MPI_coltype, MPI_coltype2;
	int chunk, lastchunk;

	chunk=n/nprocs;
	lastchunk= n - (chunk * ( nprocs - 1) );

	MPI_Type_vector(n, 1, n, MPI_SENDTYPE, &MPI_coltype2);
	MPI_Type_create_resized( MPI_coltype2, 0, sizeof(T_datatype), &MPI_coltype);
	MPI_Type_commit(&MPI_coltype);

	// preparing sendcounts and displs for MPI_Scatterv
	for ( i = 0 ; i < nprocs ; ++i ){
	//sendcounts[i] = chunksize ;
	if ( i == nprocs - 1 )
	sendcounts[i] = lastchunk;
	else
	sendcounts[i] = chunk;

	displs[i] = i*chunk;
	}

	sendbuffer = NULL;
	if ( myrank == MASTER )
	sendbuffer = &(root_matrix[0]);

	MPI_Scatterv( sendbuffer, sendcounts, displs, MPI_coltype, &(local_matrix[0]), chunksize*n, MPI_SENDTYPE, MASTER, MPI_COMM_WORLD );

	MPI_Type_free(&MPI_coltype);

	}

	// distribute the vector using mpi_scatterv
	void DistributeVector(T_datatype root_vector, T_datatype local_vector, int myrank, int nprocs, long n, long chunksize){
	int sendcounts[nprocs], displs[nprocs],i;
	int chunk, lastchunk;

	chunk=n/nprocs;
	lastchunk= n - (chunk * ( nprocs - 1) );
	sendcounts[0] = chunk;


	for ( i = 0 ; i < nprocs ; i++ ){
	sendcounts[i] = (i == nprocs - 1 ) ? lastchunk : chunk;
	displs[i] = i*chunk;
	}

	if ( myrank == MASTER ){
	MPI_Scatterv(&(root_vector[0]), sendcounts, displs, MPI_SENDTYPE, &local_vector[0], chunksize, MPI_SENDTYPE, MASTER, MPI_COMM_WORLD);
	}else{
	MPI_Scatterv(NULL, sendcounts, displs, MPI_SENDTYPE, &local_vector[0], chunksize, MPI_SENDTYPE, MASTER, MPI_COMM_WORLD);
	}

	}

	void CalculateAndReduceScatter(T_datatype local_matrix, T_datatype local_vector, T_datatype *resultvector, int myrank, int nprocs, long n, long chunksize){
	T_datatype *im_result; // intermediate result
	int recvcounts[nprocs];
	int chunk = n / nprocs;

	// calculation for each node
	im_result = InitVector(n, 0.0);
	for ( int i = 0 ; i < nprocs ; ++i ){
	recvcounts[i]=n;
	}

	for ( int i = 0 ; i < chunksize ; ++i ){
	for ( int j = 0 ; j < n ; ++j ){
	im_result[j] = im_result[j] + local_matrix[in+j] local_vector[i];
	}
	}

	MPI_Reduce_scatter(im_result, resultvector, recvcounts, MPI_SENDTYPE, MPI_SUM, MPI_COMM_WORLD);

	free(im_result);
	}
No results found