robintw · April 11, 2011 18:57
diff --git a/MPITranspose_Square.c b/MPITranspose_Square.c
 #include<stdio.h>
 #include<stdlib.h>
 #include<array_alloc.h>
 #include<math.h>
 #include<mpi.h>

 #define NAN (0.0/0.0)

 struct global_index
 {
  int x;
  int y;
 };

 struct local_index
 {
 	int procX;
 	int procY;
 	int arrayX;
 	int arrayY;
 };

 struct global_index local_to_global(int procX, int procY, int arrayX, int arrayY, int cols, int rows)
 {
 	struct global_index result;
 	/* printf("Inside GI: pX: %d, pY: %d, aX: %d, aY: %d, cols: %d, rows: %d\n", procX, procY, arrayX, arrayY, cols, rows); */
 	
 	result.x = arrayX + (procX * cols);
 	result.y = arrayY + (procY * rows);
 	
 	return result;
 }

 struct local_index global_to_local(int x, int y, int cols, int rows)
 {
 	struct local_index result;
 	
 	result.arrayX = x % cols;
 	result.procX = (int) x / (int) cols;
 	
 	result.arrayY = y % rows;
 	result.procY = (int) y / (int) rows;
 	
 	return result;
 }

 /* Factorisation function taken from Lab Sheet 9 - written (I assume) by Ian Bush */
 /* (Reformatted to fit with my coding style) */
 void factorise(int n, int *x, int*y)
 {
    /* Factorise n into two numbers which are as close to equal as possible */
    *x = sqrt( n ) + 0.5;
    *y = n / *x;

    /* Loop will definitely terminate when x == 1 */
    while ( *x * *y != n )
    {
 		(*x)--;
 		*y = n / *x;
    }
 }

 int main(int argc, char ** argv)
 {
 	/* ----------- VARIABLE DECLARATIONS ----------- */

 	/* Size of matrix - the matrix will be n x n */
 	int n;

 	/* Number of processes and rank of this process*/
 	int nprocs, rank;
 	
 	/* Size and periodicity of cartesian grid */
 	int dim_size[2];
 	int periods[2];
 	
 	/* Number of processes in x and y directions */
 	int npx, npy;
 	
 	/* Offset for this process in x and y directions */
 	int x_offset, y_offset;
 	
 	/* The standard (normal) number of rows and cols
 	per core - not valid for the last in a row or col */
 	int std_rows_in_core, std_cols_in_core;
 	
 	/* Coordinates in cartesian grid, and num of rows and cols
 	this process has */
 	int coords[2];
 	int rows_in_core;
 	int cols_in_core;
 	
 	/* Array to store the chunk of the matrix that we have */
 	double **array;
 	double **new_array;
 	
 	MPI_Status status;
 	
 	/* Loop variables */
 	int i, j;
 	
 	/* Cartesian communicator */
 	MPI_Comm cart_comm;
 	
 	int coords2[2];
 	int rank2;

 	int global_x1, global_x2;
 	int global_y1, global_y2;

 	struct global_index gi;
 	struct local_index li;
 	
 	MPI_Request send_request;
 	MPI_Request recv_request;
 	
 	/* Stores the number of requests this core will need to keep track of
 	and creates a pointer to eventually hold the array of these requests */
 	int n_requests;
 	MPI_Request *all_requests;
 	
 	int request_counter;
 	
 	int tag;
 	
 	MPI_Datatype send_type;
 	MPI_Datatype recv_type;
 	
 	int sizes[2];
 	int subsizes[2];
 	int starts[2];
 	
 	/* ----------- Deal with command-line arguments ----------- */
 	n = atoi(argv[1]);
 	
 	printf("!!!! N = %d\n", n);
 	
 	/* ----------- MPI Setup ----------- */

 	/* Initialise MPI */
 	MPI_Init(&argc, &argv);

 	/* Get the rank for this process, and the number of processes */
 	MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
 	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 	
 	npx = (int) sqrt(nprocs);
 	npy = npx;

 	/* Create the cartesian communicator */
 	dim_size[0] = npy;
 	dim_size[1] = npx;
 	periods[0] = 1;
 	periods[1] = 1;
 	
 	MPI_Cart_create(MPI_COMM_WORLD, 2, dim_size, periods, 1, &cart_comm);
 	
 	if (rank >= (npx * npx))
 	{
 		MPI_Finalize();
 	}
 	else
 	{
 	
 	printf("Past IF, rank = %d\n", rank);
 	
 	/* Get our co-ordinates within that communicator */
 	MPI_Cart_coords(cart_comm, rank, 2, coords);
 		
 	rows_in_core = ceil(n / (float) npx);
 	cols_in_core = ceil(n / (float) npy);
 	
 	std_rows_in_core = rows_in_core;
 	std_cols_in_core = cols_in_core;
 		
 	if (coords[0] == (npy - 1))
 	{
 		/* We're at the far end of a row */
 		cols_in_core = n - (cols_in_core * (npy - 1));
 	}
 	if (coords[1] == (npx - 1))
 	{
 		/* We're at the bottom of a col */
 		rows_in_core = n - (rows_in_core * (npx - 1));
 	}

 	printf("Rank: %d, X: %d, Y: %d, RpC: %d, CpC: %d\n", rank, coords[0], coords[1], rows_in_core, cols_in_core);
 	
 	/* ----------- INITIALISE MATRIX CHUNKS ----------- */
 	
 	/* Allocate an array to store this chunk of the matrix.
 	If the allocation fails, print an error */
 	array = alloc_2d_double(rows_in_core, cols_in_core);
 	new_array = alloc_2d_double(rows_in_core, cols_in_core);
 	if (array == NULL || new_array == NULL)
 	{
 		printf("Problem with array allocation.\nExiting\n");
 		return 1;
 	}
 	
 	for (i = 0; i < rows_in_core; i++)
 	{
 		for (j = 0; j < cols_in_core; j++)
 		{
 			new_array[i][j] = NAN;
 		}
 	}
 	
 	/* Calculate the offset of the top left-hand corner of our chunk of the matrix from the
 	top left-hand corner of the whole matrix */
 	x_offset = coords[0] * std_cols_in_core;
 	y_offset = coords[1] * std_rows_in_core;
 	
 	for (i = 0; i < rows_in_core; i++)
 	{
 		/*printf("Process (%d, %d): ", coords[0], coords[1]);*/
 		for (j = 0; j < cols_in_core; j++)
 		{
 			array[i][j] = (float) ( (i + y_offset) * n) + ( (j + x_offset) + 1);
 			
 			/*printf("%f ", array[i][j]);*/
 		}
 		/*printf("\n");*/
 	}
 		
 	/* ----------- DO TRANSPOSE ----------- */
 	/* Find the opposite co-ordinates (as we know it's a square) */
 	coords2[0] = coords[1];
 	coords2[1] = coords[0];
 	
 	/* Get the rank for this process */
 	MPI_Cart_rank(cart_comm, coords2, &rank2);
 	
 	
 	
 	/* Send to these new coordinates */
 	
 	tag = (coords[0] + 1) * (coords[1] + 1);
 	
 	
 	/* Create new derived type to receive as */
 	/* MPI_Type_vector(rows_in_core, cols_in_core, cols_in_core, MPI_DOUBLE, &vector_type); */
 	sizes[0] = rows_in_core;
 	sizes[1] = cols_in_core;
 	
 	subsizes[0] = rows_in_core;
 	subsizes[1] = cols_in_core;
 	
 	starts[0] = 0;
 	starts[1] = 0;
 	
 	MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_FORTRAN, MPI_DOUBLE, &send_type);
 	MPI_Type_commit(&send_type);
 	
 	MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_DOUBLE, &recv_type);
 	MPI_Type_commit(&recv_type);
 	
 	
 	/* We're sending in row-major form, so it's just rows_in_core * cols_in_core lots of MPI_DOUBLE */
 	MPI_Send(&array[0][0], 1, send_type, rank2, tag ,cart_comm);
 	
 	/* Receive from these new coordinates */
 	MPI_Recv(&new_array[0][0], 1, recv_type, rank2, tag, cart_comm, &status);
 	
 		
 	/* ----------- CLOSE AND FINALISE ENVIRONMENT ----------- */
 	 
 	for (i = 0; i < rows_in_core; i++)
 	{
 		printf("Result Process (%d, %d): ", coords[0], coords[1]);
 		for (j = 0; j < cols_in_core; j++)
 		{			
 			printf("%f ", new_array[i][j]);
 		}
 		printf("\n");
 	}
 	
 	/* Free the memory we grabbed earlier for the data arrays */
 	free_2d_double(array);
 	free_2d_double(new_array);
 	
 	
 	/* Close down the MPI environment */
 	MPI_Finalize();
 	
 	return EXIT_SUCCESS;
 	}
 }
	#include<stdio.h>
	#include<stdlib.h>
	#include<array_alloc.h>
	#include<math.h>
	#include<mpi.h>

	#define NAN (0.0/0.0)

	struct global_index
	{
	int x;
	int y;
	};

	struct local_index
	{
	int procX;
	int procY;
	int arrayX;
	int arrayY;
	};

	struct global_index local_to_global(int procX, int procY, int arrayX, int arrayY, int cols, int rows)
	{
	struct global_index result;
	/* printf("Inside GI: pX: %d, pY: %d, aX: %d, aY: %d, cols: %d, rows: %d\n", procX, procY, arrayX, arrayY, cols, rows); */

	result.x = arrayX + (procX * cols);
	result.y = arrayY + (procY * rows);

	return result;
	}

	struct local_index global_to_local(int x, int y, int cols, int rows)
	{
	struct local_index result;

	result.arrayX = x % cols;
	result.procX = (int) x / (int) cols;

	result.arrayY = y % rows;
	result.procY = (int) y / (int) rows;

	return result;
	}

	/* Factorisation function taken from Lab Sheet 9 - written (I assume) by Ian Bush */
	/* (Reformatted to fit with my coding style) */
	void factorise(int n, int x, inty)
	{
	/* Factorise n into two numbers which are as close to equal as possible */
	*x = sqrt( n ) + 0.5;
	y = n / x;

	/* Loop will definitely terminate when x == 1 */
	while ( x *y != n )
	{
	(*x)--;
	y = n / x;
	}
	}

	int main(int argc, char ** argv)
	{
	/* ----------- VARIABLE DECLARATIONS ----------- */

	/* Size of matrix - the matrix will be n x n */
	int n;

	/* Number of processes and rank of this process*/
	int nprocs, rank;

	/* Size and periodicity of cartesian grid */
	int dim_size[2];
	int periods[2];

	/* Number of processes in x and y directions */
	int npx, npy;

	/* Offset for this process in x and y directions */
	int x_offset, y_offset;

	/* The standard (normal) number of rows and cols
	per core - not valid for the last in a row or col */
	int std_rows_in_core, std_cols_in_core;

	/* Coordinates in cartesian grid, and num of rows and cols
	this process has */
	int coords[2];
	int rows_in_core;
	int cols_in_core;

	/* Array to store the chunk of the matrix that we have */
	double **array;
	double **new_array;

	MPI_Status status;

	/* Loop variables */
	int i, j;

	/* Cartesian communicator */
	MPI_Comm cart_comm;

	int coords2[2];
	int rank2;

	int global_x1, global_x2;
	int global_y1, global_y2;

	struct global_index gi;
	struct local_index li;

	MPI_Request send_request;
	MPI_Request recv_request;

	/* Stores the number of requests this core will need to keep track of
	and creates a pointer to eventually hold the array of these requests */
	int n_requests;
	MPI_Request *all_requests;

	int request_counter;

	int tag;

	MPI_Datatype send_type;
	MPI_Datatype recv_type;

	int sizes[2];
	int subsizes[2];
	int starts[2];

	/* ----------- Deal with command-line arguments ----------- */
	n = atoi(argv[1]);

	printf("!!!! N = %d\n", n);

	/* ----------- MPI Setup ----------- */

	/* Initialise MPI */
	MPI_Init(&argc, &argv);

	/* Get the rank for this process, and the number of processes */
	MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);

	npx = (int) sqrt(nprocs);
	npy = npx;

	/* Create the cartesian communicator */
	dim_size[0] = npy;
	dim_size[1] = npx;
	periods[0] = 1;
	periods[1] = 1;

	MPI_Cart_create(MPI_COMM_WORLD, 2, dim_size, periods, 1, &cart_comm);

	if (rank >= (npx * npx))
	{
	MPI_Finalize();
	}
	else
	{

	printf("Past IF, rank = %d\n", rank);

	/* Get our co-ordinates within that communicator */
	MPI_Cart_coords(cart_comm, rank, 2, coords);

	rows_in_core = ceil(n / (float) npx);
	cols_in_core = ceil(n / (float) npy);

	std_rows_in_core = rows_in_core;
	std_cols_in_core = cols_in_core;

	if (coords[0] == (npy - 1))
	{
	/* We're at the far end of a row */
	cols_in_core = n - (cols_in_core * (npy - 1));
	}
	if (coords[1] == (npx - 1))
	{
	/* We're at the bottom of a col */
	rows_in_core = n - (rows_in_core * (npx - 1));
	}

	printf("Rank: %d, X: %d, Y: %d, RpC: %d, CpC: %d\n", rank, coords[0], coords[1], rows_in_core, cols_in_core);

	/* ----------- INITIALISE MATRIX CHUNKS ----------- */

	/* Allocate an array to store this chunk of the matrix.
	If the allocation fails, print an error */
	array = alloc_2d_double(rows_in_core, cols_in_core);
	new_array = alloc_2d_double(rows_in_core, cols_in_core);
	if (array == NULL \|\| new_array == NULL)
	{
	printf("Problem with array allocation.\nExiting\n");
	return 1;
	}

	for (i = 0; i < rows_in_core; i++)
	{
	for (j = 0; j < cols_in_core; j++)
	{
	new_array[i][j] = NAN;
	}
	}

	/* Calculate the offset of the top left-hand corner of our chunk of the matrix from the
	top left-hand corner of the whole matrix */
	x_offset = coords[0] * std_cols_in_core;
	y_offset = coords[1] * std_rows_in_core;

	for (i = 0; i < rows_in_core; i++)
	{
	/printf("Process (%d, %d): ", coords[0], coords[1]);/
	for (j = 0; j < cols_in_core; j++)
	{
	array[i][j] = (float) ( (i + y_offset) * n) + ( (j + x_offset) + 1);

	/printf("%f ", array[i][j]);/
	}
	/printf("\n");/
	}

	/* ----------- DO TRANSPOSE ----------- */
	/* Find the opposite co-ordinates (as we know it's a square) */
	coords2[0] = coords[1];
	coords2[1] = coords[0];

	/* Get the rank for this process */
	MPI_Cart_rank(cart_comm, coords2, &rank2);



	/* Send to these new coordinates */

	tag = (coords[0] + 1) * (coords[1] + 1);


	/* Create new derived type to receive as */
	/* MPI_Type_vector(rows_in_core, cols_in_core, cols_in_core, MPI_DOUBLE, &vector_type); */
	sizes[0] = rows_in_core;
	sizes[1] = cols_in_core;

	subsizes[0] = rows_in_core;
	subsizes[1] = cols_in_core;

	starts[0] = 0;
	starts[1] = 0;

	MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_FORTRAN, MPI_DOUBLE, &send_type);
	MPI_Type_commit(&send_type);

	MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_DOUBLE, &recv_type);
	MPI_Type_commit(&recv_type);


	/* We're sending in row-major form, so it's just rows_in_core * cols_in_core lots of MPI_DOUBLE */
	MPI_Send(&array[0][0], 1, send_type, rank2, tag ,cart_comm);

	/* Receive from these new coordinates */
	MPI_Recv(&new_array[0][0], 1, recv_type, rank2, tag, cart_comm, &status);


	/* ----------- CLOSE AND FINALISE ENVIRONMENT ----------- */

	for (i = 0; i < rows_in_core; i++)
	{
	printf("Result Process (%d, %d): ", coords[0], coords[1]);
	for (j = 0; j < cols_in_core; j++)
	{
	printf("%f ", new_array[i][j]);
	}
	printf("\n");
	}

	/* Free the memory we grabbed earlier for the data arrays */
	free_2d_double(array);
	free_2d_double(new_array);


	/* Close down the MPI environment */
	MPI_Finalize();

	return EXIT_SUCCESS;
	}
	}