perdacherMartin · January 13, 2013 20:06
diff --git a/matVec2aa.c b/matVec2aa.c
 /*
 Perdacher Martin
 Multiply of n*n-matrix with an n-vector

 * to distribute the vector, MPI_Allgether is used
 * to distribute the matrix, MPI_Scatterv is used (row-wise decomposition)

 */

 #include <stdio.h>
 #include <stdlib.h>
 #include "mpi.h"

 #define MASTER 0
 #define REPETITIONS 10
 #define MPI_SENDTYPE MPI_DOUBLE

 typedef double T_datatype;

 T_datatype** InitArray(long size, T_datatype value);
 T_datatype*  InitVector(long size, T_datatype value);
 void  DistributeVector(T_datatype *vector, T_datatype *new_vector, int myrank, int nprocs, long n);
 void DistributeMatrix(T_datatype **matrix, T_datatype **local_matrix, int myrank, int nprocs, long n, long *chunksize);
 void  CalculateAndGather(T_datatype **local_matrix, T_datatype *vector, T_datatype *resultvector, int myrank, int nprocs, long n, long chunksize);

 int main(int argc, char *argv[]){
 21int myrank, nprocs,rest=0;
 	long n, chunksize, i, j;
 	double times[REPETITIONS];
 	double time, slowest,h,x,endresult;
 	T_datatype **matrix=NULL;
 	T_datatype *vector=NULL;
 	T_datatype **local_matrix=NULL, *lm_data=NULL;
 	T_datatype *resultvector=NULL;
 	T_datatype *mvector=NULL;
 	MPI_Datatype MPI_SplitM;
 	
  	MPI_Status recv_status;
 	MPI_Request request;
 	
  	MPI_Init(&argc, &argv);
  	MPI_Comm_rank(MPI_COMM_WORLD, &myrank);  
  	MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
  
 	if ( argc != 2 ){
 		if ( myrank == MASTER ){
 	    	printf("Usage: mpiexec -n nodecount matVec2aa n\n");
 			printf("with n is the size of the matrix. (nodecount < n)\n\n");			
 		}
 		return 1;
    }
 	
 	n = atol(argv[1]);
 	
 	if ( n < nprocs ){
 		if ( myrank == MASTER ){
 	    	printf("Usage: mpiexec -n nodecount matVec2aa n\n");
 			printf("with n > nodecount)\n\n");
 		}
 		return 1;
 	}
 	
 	if ( myrank == MASTER ){
 		matrix = InitArray(n, 1.0);
 		mvector = InitVector(n, 1.0); // vector which is distributed with allgatherv to "vector"		
 		resultvector = (T_datatype*) malloc (sizeof(T_datatype) * n ); 
 	}
 	rest = ( n - ((int)n/nprocs ) * (nprocs - 1 ) );
 	chunksize = (myrank == nprocs - 1 ) ? rest : ( n / nprocs );
 	lm_data = (T_datatype*) malloc(sizeof(T_datatype) * chunksize * n);
 	local_matrix = (T_datatype**) malloc (sizeof(T_datatype) * chunksize);
 	for ( i = 0 ; i < chunksize ; ++i ){
 		local_matrix[i] = &(lm_data[i*n]);
 	}
 	
 	vector = (T_datatype*) malloc (sizeof(T_datatype) * n ); 
 	
 	for ( i = 0 ; i < REPETITIONS ; ++i ){

 		MPI_Barrier(MPI_COMM_WORLD);
 		time = MPI_Wtime();	

 // 	    measuring code
 		
 		// distribute the vector across all processes
 		DistributeVector(mvector, vector, myrank, nprocs, n);
 		
 		// distribute the matrix across all processes
 		DistributeMatrix(matrix, local_matrix, myrank, nprocs, n, &chunksize); // chunksize is the size of the local_matrix	
 		
 //		printf("rank:%d, matrix:%f\n", myrank, local_matrix[0][0]);
 		// calculate result at MASTER in the resultvector
 		CalculateAndGather(local_matrix, vector, resultvector, myrank, nprocs, n, chunksize);
 		
 		
 		time = MPI_Wtime() - time;
 		MPI_Barrier(MPI_COMM_WORLD); 
 		MPI_Reduce(&time, &slowest, 1, MPI_DOUBLE, MPI_MAX, MASTER, MPI_COMM_WORLD);
 		
 		if (myrank == MASTER) {
 	    	times[i]=slowest;
 	   	}
 	}
 	
 	
 	if ( myrank == MASTER ){
 		double min = times[0];
 	   	for (int i = 1; i < REPETITIONS ; i++) {
 			min = ( times[i] < min ) ? times[i] : min;
 	   	}
 		printf("%d;%ld;%f\n", nprocs, n, min); 
 	}

 // verification	
 //	if ( myrank == 0 ){
 //  	printf("Matrix:\n");
 //  
 // 		for ( i = 0 ; i < n ; ++i ){
 //  			for ( j = 0 ; j < n ; j++ ){
 //  				printf("%f, ", matrix[i][j]);	
 //  			}
 //  			printf("\n");
 //  		}
 //  		printf("\n\n");
 //  	
 //		printf("Vector:\n");
 //		for ( i = 0 ; i < n ; ++i ){
 //			printf("%f\n", vector[i]);
 //		}
 //	
 //  	printf("\n\n");
 //  	printf("result:\n");
 //  	
 //  	for ( i=0 ; i < n ; ++i ){
 //  		printf("%f\n", resultvector[i]);
 //  	}
 //  	
 //	}

 	
 	// clean up an free memory
 	if ( myrank == MASTER ){
 		free(matrix);
 		free(resultvector);
 		free(mvector);
 	}	
 	free(vector);
 	free(local_matrix);
 	free(lm_data);
 //	
 //	// mpi-cleanup
  	MPI_Finalize();	
 	
 }

 // initializes a size*size - array
 T_datatype** InitArray( long size, T_datatype value){
 	long i,j;
 	T_datatype **array = (T_datatype**) malloc( sizeof(T_datatype *) * size);
 	T_datatype *array_data = (T_datatype *)malloc(size * size *sizeof(T_datatype));
 	
 	if ( array == NULL || array_data == NULL ){
 		printf("Error allocating memory!\n\n");
 		exit(EXIT_FAILURE);
 	}
 	for ( i=0l ; i < size ; ++i ){
 		array[i] = &(array_data[i*size]);
 	}
 	
 	
 	// initialisation of the array
 	for ( i=0l ; i < size ; ++i ){
 		for ( j=0l ; j < size ; ++j ){
 			//array[i][j] = i*size + j;	
 			array[i][j] = value;
 		}
 	}
 	
 	// returns a n*n-Array
 	return array; 
 }

 T_datatype*  InitVector(long size, T_datatype value){
 	long i;
 	T_datatype *vector = (T_datatype*) malloc (sizeof(T_datatype) * size );
 	
 	if ( vector == NULL ){
 		printf("Error allocating memory!\n\n");
 		exit(EXIT_FAILURE);
 	}
 	for ( i = 0l ; i < size ; ++i ){
 		vector[i] = value;
 	}

 	return vector;
 }

 // distribute the matrix using scatterv
 void DistributeMatrix(T_datatype **matrix, T_datatype **local_matrix, int myrank, int nprocs, long n, long *chunksize){
 	int i,j, sendcounts[nprocs], displs[nprocs];
 	T_datatype *sendbuffer=NULL;
 	long temp;
 	MPI_Datatype MPI_rowtype; 

 	MPI_Type_contiguous(n, MPI_SENDTYPE, &MPI_rowtype);
 	MPI_Type_commit(&MPI_rowtype);
 	
 	*chunksize = (long) n / nprocs;

 	// preparing sendcounts and displs for MPI_Scatterv
 	for ( i = 0 ; i < nprocs ; ++i ){
 		sendcounts[i] = ( i == nprocs - 1 ) ? *chunksize + ( n - ( *chunksize * nprocs ) ) :*chunksize ;
 		displs[i] = *chunksize * i;		
 	}

 	*chunksize = sendcounts[myrank] ;
 	
 	sendbuffer = NULL;
 	if ( myrank == MASTER )
 		sendbuffer = &(matrix[0][0]);
 	
 	MPI_Scatterv( sendbuffer, sendcounts, displs, MPI_rowtype, &(local_matrix[0][0]), *chunksize, MPI_rowtype, MASTER, MPI_COMM_WORLD );
 	
 	MPI_Type_free(&MPI_rowtype);
 	
 }


 // distribute the vector using mpi_allgatherv
 void DistributeVector(T_datatype *vector, T_datatype *new_vector, int myrank, int nprocs, long n){
 	int recvcounts[nprocs], displs[nprocs],i;
 	
 	recvcounts[0] = n;
 	displs[0] = 0;
 	for ( i = 1 ; i < nprocs ; i++ ){
 		recvcounts[i] = 0;
 		displs[i] = n;
 	}
 		
 	if ( myrank == MASTER ){
 		MPI_Allgatherv(&(vector[0]), n, MPI_SENDTYPE, &new_vector[0], recvcounts, displs, MPI_SENDTYPE, MPI_COMM_WORLD);				
 	}else{
 		MPI_Allgatherv(NULL, 0, MPI_SENDTYPE, &new_vector[0], recvcounts, displs, MPI_SENDTYPE, MPI_COMM_WORLD);		
 	}
 	
 }

 void CalculateAndGather(T_datatype **local_matrix, T_datatype *vector, T_datatype *resultvector, int myrank, int nprocs, long n, long chunksize){
 	T_datatype *im_result=NULL; // intermediate result
 	int recvcounts[nprocs], displs[nprocs];
 	int chunk = n / nprocs;
 	int i,j;
 	
 // calculation for each node
 	im_result = InitVector(chunksize, 0.0);

 	for ( i = 0 ; i < chunksize ; ++i ){
 		for ( j = 0 ; j < n ; ++j ){
 			im_result[i] = im_result[i] + (local_matrix[i][j] * vector[j%n]);
 		}
 	}

 // gather im_result into resultvector on MASTER
 	for ( i = 0 ; i < nprocs ; ++i ){
 		recvcounts[i] = (i == nprocs - 1 ) ? chunk + ( n - ( chunk * nprocs ) ) : chunk;
 		displs[i] = i * chunk ;
 		
 	}
 	
 	
 	MPI_Gatherv( im_result, chunksize, MPI_SENDTYPE, resultvector, recvcounts, displs, MPI_SENDTYPE, MASTER, MPI_COMM_WORLD );
 	
 	free(im_result);
 }
	/*
	Perdacher Martin
	Multiply of n*n-matrix with an n-vector

	* to distribute the vector, MPI_Allgether is used
	* to distribute the matrix, MPI_Scatterv is used (row-wise decomposition)

	*/

	#include <stdio.h>
	#include <stdlib.h>
	#include "mpi.h"

	#define MASTER 0
	#define REPETITIONS 10
	#define MPI_SENDTYPE MPI_DOUBLE

	typedef double T_datatype;

	T_datatype** InitArray(long size, T_datatype value);
	T_datatype* InitVector(long size, T_datatype value);
	void DistributeVector(T_datatype vector, T_datatype new_vector, int myrank, int nprocs, long n);
	void DistributeMatrix(T_datatype matrix, T_datatype local_matrix, int myrank, int nprocs, long n, long *chunksize);
	void CalculateAndGather(T_datatype *local_matrix, T_datatype vector, T_datatype *resultvector, int myrank, int nprocs, long n, long chunksize);

	int main(int argc, char *argv[]){
	21int myrank, nprocs,rest=0;
	long n, chunksize, i, j;
	double times[REPETITIONS];
	double time, slowest,h,x,endresult;
	T_datatype **matrix=NULL;
	T_datatype *vector=NULL;
	T_datatype *local_matrix=NULL, lm_data=NULL;
	T_datatype *resultvector=NULL;
	T_datatype *mvector=NULL;
	MPI_Datatype MPI_SplitM;

	MPI_Status recv_status;
	MPI_Request request;

	MPI_Init(&argc, &argv);
	MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
	MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

	if ( argc != 2 ){
	if ( myrank == MASTER ){
	printf("Usage: mpiexec -n nodecount matVec2aa n\n");
	printf("with n is the size of the matrix. (nodecount < n)\n\n");
	}
	return 1;
	}

	n = atol(argv[1]);

	if ( n < nprocs ){
	if ( myrank == MASTER ){
	printf("Usage: mpiexec -n nodecount matVec2aa n\n");
	printf("with n > nodecount)\n\n");
	}
	return 1;
	}

	if ( myrank == MASTER ){
	matrix = InitArray(n, 1.0);
	mvector = InitVector(n, 1.0); // vector which is distributed with allgatherv to "vector"
	resultvector = (T_datatype) malloc (sizeof(T_datatype) n );
	}
	rest = ( n - ((int)n/nprocs ) * (nprocs - 1 ) );
	chunksize = (myrank == nprocs - 1 ) ? rest : ( n / nprocs );
	lm_data = (T_datatype) malloc(sizeof(T_datatype) chunksize * n);
	local_matrix = (T_datatype*) malloc (sizeof(T_datatype) chunksize);
	for ( i = 0 ; i < chunksize ; ++i ){
	local_matrix[i] = &(lm_data[i*n]);
	}

	vector = (T_datatype) malloc (sizeof(T_datatype) n );

	for ( i = 0 ; i < REPETITIONS ; ++i ){

	MPI_Barrier(MPI_COMM_WORLD);
	time = MPI_Wtime();

	// measuring code

	// distribute the vector across all processes
	DistributeVector(mvector, vector, myrank, nprocs, n);

	// distribute the matrix across all processes
	DistributeMatrix(matrix, local_matrix, myrank, nprocs, n, &chunksize); // chunksize is the size of the local_matrix

	// printf("rank:%d, matrix:%f\n", myrank, local_matrix[0][0]);
	// calculate result at MASTER in the resultvector
	CalculateAndGather(local_matrix, vector, resultvector, myrank, nprocs, n, chunksize);


	time = MPI_Wtime() - time;
	MPI_Barrier(MPI_COMM_WORLD);
	MPI_Reduce(&time, &slowest, 1, MPI_DOUBLE, MPI_MAX, MASTER, MPI_COMM_WORLD);

	if (myrank == MASTER) {
	times[i]=slowest;
	}
	}


	if ( myrank == MASTER ){
	double min = times[0];
	for (int i = 1; i < REPETITIONS ; i++) {
	min = ( times[i] < min ) ? times[i] : min;
	}
	printf("%d;%ld;%f\n", nprocs, n, min);
	}

	// verification
	// if ( myrank == 0 ){
	// printf("Matrix:\n");
	//
	// for ( i = 0 ; i < n ; ++i ){
	// for ( j = 0 ; j < n ; j++ ){
	// printf("%f, ", matrix[i][j]);
	// }
	// printf("\n");
	// }
	// printf("\n\n");
	//
	// printf("Vector:\n");
	// for ( i = 0 ; i < n ; ++i ){
	// printf("%f\n", vector[i]);
	// }
	//
	// printf("\n\n");
	// printf("result:\n");
	//
	// for ( i=0 ; i < n ; ++i ){
	// printf("%f\n", resultvector[i]);
	// }
	//
	// }


	// clean up an free memory
	if ( myrank == MASTER ){
	free(matrix);
	free(resultvector);
	free(mvector);
	}
	free(vector);
	free(local_matrix);
	free(lm_data);
	//
	// // mpi-cleanup
	MPI_Finalize();

	}

	// initializes a size*size - array
	T_datatype** InitArray( long size, T_datatype value){
	long i,j;
	T_datatype array = (T_datatype) malloc( sizeof(T_datatype ) size);
	T_datatype array_data = (T_datatype )malloc(size * size *sizeof(T_datatype));

	if ( array == NULL \|\| array_data == NULL ){
	printf("Error allocating memory!\n\n");
	exit(EXIT_FAILURE);
	}
	for ( i=0l ; i < size ; ++i ){
	array[i] = &(array_data[i*size]);
	}


	// initialisation of the array
	for ( i=0l ; i < size ; ++i ){
	for ( j=0l ; j < size ; ++j ){
	//array[i][j] = i*size + j;
	array[i][j] = value;
	}
	}

	// returns a n*n-Array
	return array;
	}

	T_datatype* InitVector(long size, T_datatype value){
	long i;
	T_datatype vector = (T_datatype) malloc (sizeof(T_datatype) * size );

	if ( vector == NULL ){
	printf("Error allocating memory!\n\n");
	exit(EXIT_FAILURE);
	}
	for ( i = 0l ; i < size ; ++i ){
	vector[i] = value;
	}

	return vector;
	}

	// distribute the matrix using scatterv
	void DistributeMatrix(T_datatype matrix, T_datatype local_matrix, int myrank, int nprocs, long n, long *chunksize){
	int i,j, sendcounts[nprocs], displs[nprocs];
	T_datatype *sendbuffer=NULL;
	long temp;
	MPI_Datatype MPI_rowtype;

	MPI_Type_contiguous(n, MPI_SENDTYPE, &MPI_rowtype);
	MPI_Type_commit(&MPI_rowtype);

	*chunksize = (long) n / nprocs;

	// preparing sendcounts and displs for MPI_Scatterv
	for ( i = 0 ; i < nprocs ; ++i ){
	sendcounts[i] = ( i == nprocs - 1 ) ? chunksize + ( n - ( chunksize * nprocs ) ) :*chunksize ;
	displs[i] = chunksize i;
	}

	*chunksize = sendcounts[myrank] ;

	sendbuffer = NULL;
	if ( myrank == MASTER )
	sendbuffer = &(matrix[0][0]);

	MPI_Scatterv( sendbuffer, sendcounts, displs, MPI_rowtype, &(local_matrix[0][0]), *chunksize, MPI_rowtype, MASTER, MPI_COMM_WORLD );

	MPI_Type_free(&MPI_rowtype);

	}


	// distribute the vector using mpi_allgatherv
	void DistributeVector(T_datatype vector, T_datatype new_vector, int myrank, int nprocs, long n){
	int recvcounts[nprocs], displs[nprocs],i;

	recvcounts[0] = n;
	displs[0] = 0;
	for ( i = 1 ; i < nprocs ; i++ ){
	recvcounts[i] = 0;
	displs[i] = n;
	}

	if ( myrank == MASTER ){
	MPI_Allgatherv(&(vector[0]), n, MPI_SENDTYPE, &new_vector[0], recvcounts, displs, MPI_SENDTYPE, MPI_COMM_WORLD);
	}else{
	MPI_Allgatherv(NULL, 0, MPI_SENDTYPE, &new_vector[0], recvcounts, displs, MPI_SENDTYPE, MPI_COMM_WORLD);
	}

	}

	void CalculateAndGather(T_datatype *local_matrix, T_datatype vector, T_datatype *resultvector, int myrank, int nprocs, long n, long chunksize){
	T_datatype *im_result=NULL; // intermediate result
	int recvcounts[nprocs], displs[nprocs];
	int chunk = n / nprocs;
	int i,j;

	// calculation for each node
	im_result = InitVector(chunksize, 0.0);

	for ( i = 0 ; i < chunksize ; ++i ){
	for ( j = 0 ; j < n ; ++j ){
	im_result[i] = im_result[i] + (local_matrix[i][j] * vector[j%n]);
	}
	}

	// gather im_result into resultvector on MASTER
	for ( i = 0 ; i < nprocs ; ++i ){
	recvcounts[i] = (i == nprocs - 1 ) ? chunk + ( n - ( chunk * nprocs ) ) : chunk;
	displs[i] = i * chunk ;

	}


	MPI_Gatherv( im_result, chunksize, MPI_SENDTYPE, resultvector, recvcounts, displs, MPI_SENDTYPE, MASTER, MPI_COMM_WORLD );

	free(im_result);
	}
No results found