Skip to content

Instantly share code, notes, and snippets.

@jstroem
Created December 6, 2012 04:18
Show Gist options
  • Save jstroem/4221740 to your computer and use it in GitHub Desktop.
Save jstroem/4221740 to your computer and use it in GitHub Desktop.
CSE260 project. ghost-cells NOT distribution
#!/bin/bash
# This script is intepreted by the Bourne Shell, sh
#
# Documentation for SGE is found in:
# http://docs.oracle.com/cd/E19279-01/820-3257-12/n1ge.html
#
# Tell SGE which shell to run the job script in rather than depending
# on SGE to try and figure it out.
#$ -S /bin/bash
#
# Export all my environment variables to the job
#$ -V
#
# Tun the job in the same directory from which you submitted it
#$ -cwd
#
#
# --- Don't change anything above this line ---
#
# Give a name to the job
#$ -N MPI
#
# Specify a time limit for the job, not more than 30 minutes
#$ -l h_rt=00:10:00
#
# Specify the parallel environment and number of cores,
# If not a multiple of 8, you'll get the whole node anyway
#$ -pe orte 8
#
# Join stdout and stderr so they are reported in job output file
#$ -j y
#
#
# Choose the queue to run the job
#
# Debug queue: only one node may be used at a time for up to 30 minutes
# Interactive or batch jobs, maximum of 1 job per user running at a time
#
# Normal queue: job may use all available compute nodes (256 cores)
# for up to 60 minutes
# Batch jobs, maximum of 2 jobs per user running at a time
# To use more than one node, specify the "normal" queue
#$ -q normal.q
# #$ -q debug.q
#
# Specifies the circumstances under which mail is to be sent to the job owner
# defined by -M option. For example, options "bea" cause mail to be sent at the
# begining, end, and at abort time (if it happens) of the job.
# Option "n" means no mail will be sent.
#$ -m aeb
#
# *** Change to the address you want the notification sent to, and
# *** REMOVE the blank between the # and the $
#$ -M [email protected]
#
echo
echo " *** Current working directory"
pwd
echo
echo " *** Compiler"
# Output which compiler are we using and the environment
mpicc -v
echo
echo " *** Environment"
printenv
echo
echo ">>> Job Starts"
date
mpirun -np $NSLOTS ./mpi -n 5000 -proc_width 4 -proc_height 2 -o mpi.txt -c checksum.txt
date
echo ">>> Job Ends"
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <float.h>
#include <string.h>
#include <math.h>
#include <time.h>
#include <mpi.h>
#include <sys/time.h>
#include "common.h"
double size;
//
// tuned constants
//
#define density 0.0010
#define mass 0.01
#define cutoff 0.01
#define overlap 0.011
#define min_r (cutoff/100)
#define dt 0.0005
void setup_ghost_buffers( neighbor_t *neighbors, double size_x, double size_y){
neighbor_t self = neighbors[SELF];
for(int i = 1; i < 9; i++){
if (neighbors[i].proc_x - self.proc_x != 0 && neighbors[i].proc_y - self.proc_y != 0) {
neighbors[i].buffer_size = (int)(min(ceil(size_y),ceil(size_x)) / mass);
} else if (neighbors[i].proc_x - self.proc_x != 0) {
neighbors[i].buffer_size = (int)ceil(size_y / mass);
} else if (neighbors[i].proc_y - self.proc_y != 0) {
neighbors[i].buffer_size = (int)ceil(size_x / mass);
}
neighbors[i].send_buffer = (particle_t*) malloc( sizeof(particle_t) * neighbors[i].buffer_size);
}
}
void copy_to_ghost_buffers( particle_t *particles, neighbor_t *neighbors) {
neighbor_t self = neighbors[SELF];
for(int i = 1; i < 9; i++){
neighbors[i].send_size = 0;
}
for (int i = self.offset; i < self.offset + self.recv_size; i++){
if (particles[i].x <= self.from_x + overlap && particles[i].y <= self.from_y + overlap && neighbors[TOP_LEFT].rank != -1) {//TOP LEFT particle
if (neighbors[TOP_LEFT].buffer_size <= neighbors[TOP_LEFT].send_size){
printf("Rank %d, neighbors: %d, send_buffer isnt big enough. Has: %d, needed: %d\n", self.rank, neighbors[TOP_LEFT].rank, neighbors[TOP_LEFT].buffer_size, neighbors[TOP_LEFT].send_size +1);
} else {
neighbors[TOP_LEFT].send_buffer[neighbors[TOP_LEFT].send_size] = particles[i];
}
neighbors[TOP_LEFT].send_size = neighbors[TOP_LEFT].send_size + 1;
}
if (self.to_x - overlap < particles[i].x && particles[i].y <= self.from_y + overlap && neighbors[TOP_RIGHT].rank != -1) {//TOP RIGHT particle
if (neighbors[TOP_RIGHT].buffer_size <= neighbors[TOP_RIGHT].send_size){
printf("Rank %d, neighbors: %d, send_buffer isnt big enough. Has: %d, needed: %d\n", self.rank, neighbors[TOP_RIGHT].rank, neighbors[TOP_RIGHT].buffer_size, neighbors[TOP_LEFT].send_size +1);
} else {
neighbors[TOP_RIGHT].send_buffer[neighbors[TOP_RIGHT].send_size] = particles[i];
}
neighbors[TOP_RIGHT].send_size = neighbors[TOP_RIGHT].send_size + 1;
}
if (particles[i].x <= self.from_x + overlap && self.to_y - overlap < particles[i].y && neighbors[BOTTOM_LEFT].rank != -1) {// BOTTOM LEFT particle
if (neighbors[BOTTOM_LEFT].buffer_size <= neighbors[BOTTOM_LEFT].send_size){
printf("Rank %d, neighbors: %d, send_buffer isnt big enough. Has: %d, needed: %d\n", self.rank, neighbors[BOTTOM_LEFT].rank, neighbors[BOTTOM_LEFT].buffer_size, neighbors[TOP_LEFT].send_size +1);
} else {
neighbors[BOTTOM_LEFT].send_buffer[neighbors[BOTTOM_LEFT].send_size] = particles[i];
}
neighbors[BOTTOM_LEFT].send_size = neighbors[BOTTOM_LEFT].send_size + 1;
}
if (self.to_x - overlap < particles[i].x && self.to_y - overlap < particles[i].y && neighbors[BOTTOM_RIGHT].rank != -1) {//BOTTOM RIGHT particle
if (neighbors[BOTTOM_RIGHT].buffer_size <= neighbors[BOTTOM_RIGHT].send_size){
printf("Rank %d, neighbors: %d, send_buffer isnt big enough. Has: %d, needed: %d\n", self.rank, neighbors[BOTTOM_RIGHT].rank, neighbors[BOTTOM_RIGHT].buffer_size, neighbors[TOP_LEFT].send_size +1);
} else {
neighbors[BOTTOM_RIGHT].send_buffer[neighbors[BOTTOM_RIGHT].send_size] = particles[i];
}
neighbors[BOTTOM_RIGHT].send_size = neighbors[BOTTOM_RIGHT].send_size + 1;
}
if (self.to_x - overlap < particles[i].x && neighbors[STRAIGHT_RIGHT].rank != -1) { // STRAIGHT RIGHT particle
if (neighbors[STRAIGHT_RIGHT].buffer_size <= neighbors[STRAIGHT_RIGHT].send_size){
printf("Rank %d, neighbors: %d, send_buffer isnt big enough. Has: %d, needed: %d\n", self.rank, neighbors[STRAIGHT_RIGHT].rank, neighbors[STRAIGHT_RIGHT].buffer_size, neighbors[TOP_LEFT].send_size +1);
} else {
neighbors[STRAIGHT_RIGHT].send_buffer[neighbors[STRAIGHT_RIGHT].send_size] = particles[i];
}
neighbors[STRAIGHT_RIGHT].send_size = neighbors[STRAIGHT_RIGHT].send_size + 1;
}
if (particles[i].x <= self.from_x + overlap && neighbors[STRAIGHT_LEFT].rank != -1) { // STRAIGHT LEFT particle
if (neighbors[STRAIGHT_LEFT].buffer_size <= neighbors[STRAIGHT_LEFT].send_size){
printf("Rank %d, neighbors: %d, send_buffer isnt big enough. Has: %d, needed: %d\n", self.rank, neighbors[STRAIGHT_LEFT].rank, neighbors[STRAIGHT_LEFT].buffer_size, neighbors[TOP_LEFT].send_size +1);
} else {
neighbors[STRAIGHT_LEFT].send_buffer[neighbors[STRAIGHT_LEFT].send_size] = particles[i];
}
neighbors[STRAIGHT_LEFT].send_size = neighbors[STRAIGHT_LEFT].send_size + 1;
}
if (self.to_y - overlap < particles[i].y && neighbors[BOTTOM_STRAIGHT].rank != -1) { // BOTTOM STRAIGHT particle
if (neighbors[BOTTOM_STRAIGHT].buffer_size <= neighbors[BOTTOM_STRAIGHT].send_size){
printf("Rank %d, neighbors: %d, send_buffer isnt big enough. Has: %d, needed: %d\n", self.rank, neighbors[BOTTOM_STRAIGHT].rank, neighbors[BOTTOM_STRAIGHT].buffer_size, neighbors[TOP_LEFT].send_size +1);
} else {
neighbors[BOTTOM_STRAIGHT].send_buffer[neighbors[BOTTOM_STRAIGHT].send_size] = particles[i];
}
neighbors[BOTTOM_STRAIGHT].send_size = neighbors[BOTTOM_STRAIGHT].send_size + 1;
}
if (particles[i].y <= self.from_y + overlap && neighbors[TOP_STRAIGHT].rank != -1) { // TOP STRAIGHT particle
if (neighbors[TOP_STRAIGHT].buffer_size <= neighbors[TOP_STRAIGHT].send_size){
printf("Rank %d, neighbors: %d, send_buffer isnt big enough. Has: %d, needed: %d\n", self.rank, neighbors[TOP_STRAIGHT].rank, neighbors[TOP_STRAIGHT].buffer_size, neighbors[TOP_LEFT].send_size +1);
} else {
neighbors[TOP_STRAIGHT].send_buffer[neighbors[TOP_STRAIGHT].send_size] = particles[i];
}
neighbors[TOP_STRAIGHT].send_size = neighbors[TOP_STRAIGHT].send_size + 1;
}
}
}
//
// timer
//
double read_timer( )
{
static bool initialized = false;
static struct timeval start;
struct timeval end;
if( !initialized )
{
gettimeofday( &start, NULL );
initialized = true;
}
gettimeofday( &end, NULL );
return (end.tv_sec - start.tv_sec) + 1.0e-6 * (end.tv_usec - start.tv_usec);
}
//
// keep density constant
//
double set_size( int n )
{
size = sqrt( density * n );
return size;
}
//
// Initialize the particle positions and velocities
//
void init_particles( int n, particle_t *p )
{
srand48( 1 );
int sx = (int)ceil(sqrt((double)n));
int sy = (n+sx-1)/sx;
int *shuffle = (int*)malloc( n * sizeof(int) );
for( int i = 0; i < n; i++ )
shuffle[i] = i;
for( int i = 0; i < n; i++ )
{
//
// make sure particles are not spatially sorted
//
int j = lrand48()%(n-i);
int k = shuffle[j];
shuffle[j] = shuffle[n-i-1];
//
// distribute particles evenly to ensure proper spacing
//
p[i].x = size*(1.+(k%sx))/(1+sx);
p[i].y = (size/2)*(1.+(k/sx))/(1+sy);
p[i].index = i;
//
// assign random velocities within a bound
//
p[i].vx = drand48()*2-1;
p[i].vy = drand48()*2-1;
}
free( shuffle );
}
//
// interact two particles
//
void apply_force( particle_t &particle, particle_t &neighbor )
{
double dx = neighbor.x - particle.x;
double dy = neighbor.y - particle.y;
double r2 = dx * dx + dy * dy;
if( r2 > cutoff*cutoff )
return;
r2 = fmax( r2, min_r*min_r );
double r = sqrt( r2 );
//
// very simple short-range repulsive force
//
double coef = ( 1 - cutoff / r ) / r2 / mass;
particle.ax += coef * dx;
particle.ay += coef * dy;
}
//
// integrate the ODE
//
void move( particle_t &p )
{
//
// slightly simplified Velocity Verlet integration
// conserves energy better than explicit Euler method
//
p.vx += p.ax * dt;
p.vy += p.ay * dt;
p.x += p.vx * dt;
p.y += p.vy * dt;
//
// bounce from walls
//
while( p.x < 0 || p.x > size )
{
p.x = p.x < 0 ? -p.x : 2*size-p.x;
p.vx = -p.vx;
}
while( p.y < 0 || p.y > size )
{
p.y = p.y < 0 ? -p.y : 2*size-p.y;
p.vy = -p.vy;
}
}
//
// I/O routines
//
void save( FILE *f, int n, particle_t *p )
{
static bool first = true;
if( first )
{
fprintf( f, "%d %g\n", n, size );
first = false;
}
for( int i = 0; i < n; i++ )
fprintf( f, "%g %g\n", p[i].x, p[i].y );
}
//
// command line option processing
//
int find_option( int argc, char **argv, const char *option )
{
for( int i = 1; i < argc; i++ )
if( strcmp( argv[i], option ) == 0 )
return i;
return -1;
}
int read_int( int argc, char **argv, const char *option, int default_value )
{
int iplace = find_option( argc, argv, option );
if( iplace >= 0 && iplace < argc-1 )
return atoi( argv[iplace+1] );
return default_value;
}
char *read_string( int argc, char **argv, const char *option, char *default_value )
{
int iplace = find_option( argc, argv, option );
if( iplace >= 0 && iplace < argc-1 )
return argv[iplace+1];
return default_value;
}
#ifndef __CS267_COMMON_H__
#define __CS267_COMMON_H__
inline int min( int a, int b ) { return a < b ? a : b; }
inline int max( int a, int b ) { return a > b ? a : b; }
//
// saving parameters
//
const int NSTEPS = 1000;
const int SAVEFREQ = 10;
const int SELF = 0,
BOTTOM_STRAIGHT = 1,
BOTTOM_LEFT = 2,
STRAIGHT_LEFT = 3,
TOP_LEFT = 4,
TOP_STRAIGHT = 5,
TOP_RIGHT = 6,
STRAIGHT_RIGHT = 7,
BOTTOM_RIGHT = 8;
//
// particle data structure
//
typedef struct
{
double index;
double x;
double y;
double vx;
double vy;
double ax;
double ay;
} particle_t;
typedef struct
{
int offset;
int send_size;
int recv_size;
int proc_x;
int proc_y;
int rank;
double from_x;
double from_y;
double to_x;
double to_y;
particle_t *send_buffer;
int buffer_size;
MPI_Request recv_req;
MPI_Request send_req;
MPI_Status recv_stat;
MPI_Status send_stat;
} neighbor_t;
//
// timing routines
//
double read_timer( );
void copy_to_ghost_buffers( particle_t *particles, neighbor_t *neighbors);
void setup_ghost_buffers( neighbor_t *neighbors, double size_x, double size_y);
//
// simulation routines
//
double set_size( int n );
void init_particles( int n, particle_t *p );
void apply_force( particle_t &particle, particle_t &neighbor );
void move( particle_t &p );
//
// I/O routines
//
FILE *open_save( char *filename, int n );
void save( FILE *f, int n, particle_t *p );
//
// argument processing routines
//
int find_option( int argc, char **argv, const char *option );
int read_int( int argc, char **argv, const char *option, int default_value );
char *read_string( int argc, char **argv, const char *option, char *default_value );
#endif
HOST = $(shell hostname)
BANG = $(shell expr match `hostname` ccom-bang)
BANG-COMPUTE = $(shell expr match `hostname` compute)
LILLIPUT = $(shell expr match `hostname` lilliput)
ifneq ($(BANG), 0)
PUB = /share/class/public/cse260-fa12
include $(PUB)/Arch/arch.gnu.generic
else
ifneq ($(BANG-COMPUTE), 0)
PUB = /share/class/public/cse260-fa12
include $(PUB)/Arch/arch.gnu.generic
else
ifneq ($(LILLIPUT), 0)
PUB = /class/public/cse260-fa12
include $(PUB)/Arch/arch.intel.generic
else
# PUB = /Users/baden/lib
include $(PUB)/Arch/arch.gnu
# include $(PUB)/Arch/arch.gnu-4.5
endif
endif
endif
#
# Add symbol table information for gdb/cachegrind
ifeq ($(debug), 1)
CFLAGS += -g
LDFLAGS += -g
C++FLAGS += -g
endif
# Add symbol table information for gprof
ifeq ($(gprof), 1)
CFLAGS += -g -pg
C++FLAGS += -g -pg
LDFLAGS += -g -pg
endif
# If you want to compile for single precision,
# specify single=1 on the "make" command line
ifeq ($(single), 1)
else
C++FLAGS += -D_DOUBLE
CFLAGS += -D_DOUBLE
endif
# If you want to compile so that you call the plotter for
# flattened 2D arrays (implemented as 1D arrays)
# specify flattened=1 on the "make" command line
ifeq ($(flattened), 1)
C++FLAGS += -DPLOT1D
CFLAGS += -DPLOT1D
endif
# If you want to use restrict pointers, make restrict=1
# This applies to the hand code version
ifeq ($(restrict), 1)
C++FLAGS += -D__RESTRICT
CFLAGS += -D__RESTRICT
ifneq ($(CARVER), 0)
C++FLAGS += -restrict
CFLAGS += -restrict
endif
endif
#DEBUG += -DDEBUG
TARGETS = mpi
app: $(TARGETS)
OBJECTS = common.o mpi.o.o
#ifeq ($(no-mpi),1)
#OBJECTS += Timer.o
#endif
app: $(TARGETS)
mpi: mpi.o common.o
$(C++LINK) $(LDFLAGS) -o $@ mpi.o common.o $(LDLIBS)
clean:
$(RM) *.o $(TARGETS);
$(RM) core.*;
#include <mpi.h>
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <math.h>
#include "common.h"
//
// benchmarking program
//
int main( int argc, char **argv )
{
//
// process command line parameters
//
if( find_option( argc, argv, "-h" ) >= 0 )
{
printf( "Options:\n" );
printf( "-h to see this help\n" );
printf( "-n <int> to set the number of particles\n" );
printf( "-proc_width <int> to set the width of the processor grid\n" );
printf( "-proc_height <int> to set the height of the processor grid\n" );
printf( "-o <filename> to specify the output file name\n" );
printf( "-c <checksum> to specify the output checksum file name\n" );
return 0;
}
int n = read_int( argc, argv, "-n", 1000 );
int proc_width = read_int( argc, argv, "-proc_width", NULL );
int proc_height = read_int( argc, argv, "-proc_height", NULL );
char *savename = read_string( argc, argv, "-o", NULL );
char *checksum = read_string( argc, argv, "-c", NULL );
//
// set up MPI
//
int n_proc, myrank;
MPI_Init( &argc, &argv );
MPI_Comm_size( MPI_COMM_WORLD, &n_proc );
MPI_Comm_rank( MPI_COMM_WORLD, &myrank );
//Ensure that px and py are correct setup.
if ((proc_height * proc_width) != n_proc && !myrank){
printf("\n *** The number of processors in the geometry (%d) is not the same as the number requested (%d)",proc_height*proc_width,n_proc);
exit(-1);
}
int my_x = myrank % proc_width;
int my_y = myrank / proc_width;
//
// Setup the neighbor setup
//
neighbor_t *neighbors = (neighbor_t*) malloc( 9 * sizeof(neighbor_t));
neighbors[ SELF ].proc_x = my_x; neighbors[ SELF ].proc_y = my_y;
neighbors[ BOTTOM_STRAIGHT ].proc_x = my_x; neighbors[ BOTTOM_STRAIGHT ].proc_y = my_y + 1;
neighbors[ BOTTOM_LEFT ].proc_x = my_x - 1; neighbors[ BOTTOM_LEFT ].proc_y = my_y + 1;
neighbors[ STRAIGHT_LEFT ].proc_x = my_x - 1; neighbors[ STRAIGHT_LEFT ].proc_y = my_y;
neighbors[ TOP_LEFT ].proc_x = my_x - 1; neighbors[ TOP_LEFT ].proc_y = my_y - 1;
neighbors[ TOP_STRAIGHT ].proc_x = my_x; neighbors[ TOP_STRAIGHT ].proc_y = my_y - 1;
neighbors[ TOP_RIGHT ].proc_x = my_x + 1; neighbors[ TOP_RIGHT ].proc_y = my_y - 1;
neighbors[ STRAIGHT_RIGHT ].proc_x = my_x + 1; neighbors[ STRAIGHT_RIGHT ].proc_y = my_y;
neighbors[ BOTTOM_RIGHT ].proc_x = my_x + 1; neighbors[ BOTTOM_RIGHT ].proc_y = my_y + 1;
//
// allocate generic resources
//
FILE *fsave = savename && myrank == 0 ? fopen( savename, "w" ) : NULL;
FILE *fchecksum = checksum && myrank == 0 ? fopen( checksum, "w" ) : NULL;
particle_t *particles = (particle_t*) malloc( n * sizeof(particle_t));
particle_t *particles_prev = (particle_t*) malloc( n * sizeof(particle_t));
MPI_Datatype PARTICLE;
MPI_Type_contiguous( 7, MPI_DOUBLE, &PARTICLE );
MPI_Type_commit( &PARTICLE );
//
// initialize and distribute the particles (that's fine to leave it unoptimized)
//
double size = set_size( n );
double size_x = size / proc_width;
double size_y = size / proc_height;
//
// set up the data partitioning across processors
//
setup_ghost_buffers(neighbors, size_x, size_y);
if (!myrank) { //Init all particles at rank 0
init_particles( n, particles_prev );
}
MPI_Bcast(particles_prev, n, PARTICLE, 0, MPI_COMM_WORLD);
for( int i = 0; i < 9; i++ ) {
neighbors[ i ].from_x = size_x * neighbors[ i ].proc_x;
neighbors[ i ].to_x = size_x + neighbors[ i ].from_x;
neighbors[ i ].from_y = size_y * neighbors[ i ].proc_y;
neighbors[ i ].to_y = size_y + neighbors[ i ].from_y;
if (i == 0) { // 0 = SELF
neighbors[ SELF ].rank = neighbors[ SELF ].proc_x + neighbors[ SELF ].proc_y * proc_width;
neighbors[ SELF ].offset = 0;
neighbors[ SELF ].send_size = 0;
neighbors[ SELF ].recv_size = 0;
for(int j = 0; j < n; j++){
if (particles_prev[j].x >= neighbors[SELF].from_x && particles_prev[j].x < neighbors[SELF].to_x &&
particles_prev[j].y >= neighbors[SELF].from_y && particles_prev[j].y < neighbors[SELF].to_y){
particles[neighbors[ SELF ].recv_size] = particles_prev[j];
neighbors[ SELF ].recv_size += 1;
}
}
neighbors[ SELF ].buffer_size = neighbors[SELF].recv_size;
} else if (neighbors[ i ].proc_x > -1 && neighbors[i].proc_x < proc_width && neighbors[ i ].proc_y > -1 && neighbors[i].proc_y < proc_height) {
neighbors[ i ].rank = neighbors[ i ].proc_x + neighbors[ i ].proc_y * proc_width;
neighbors[ i ].offset = neighbors[ i - 1 ].buffer_size + neighbors[ i - 1 ].offset;
} else {
neighbors[ i ].buffer_size = 0;
neighbors[ i ].rank = -1;
neighbors[ i ].offset = neighbors[ i - 1 ].buffer_size + neighbors[ i - 1 ].offset;
neighbors[ i ].send_size = 0;
neighbors[ i ].recv_size = 0;
}
}
for(int i = 1; i < 9; i++){
if (neighbors[i].rank != -1){
MPI_Irecv(&particles[neighbors[i].offset],neighbors[i].buffer_size, PARTICLE, neighbors[i].rank, 0, MPI_COMM_WORLD, &neighbors[i].recv_req );
}
}
//Send info out to each neighbor
copy_to_ghost_buffers( particles, neighbors );
for(int i = 1; i < 9; i++) {
if (neighbors[i].rank != -1) {
MPI_Isend(&neighbors[i].send_buffer[0],neighbors[i].send_size, PARTICLE, neighbors[i].rank, 0, MPI_COMM_WORLD, &neighbors[i].send_req );
}
}
//Ensure that we have recv from everyone
for(int i = 1; i < 9; i++) {
if (neighbors[i].rank != -1) {
MPI_Wait(&neighbors[i].recv_req,&neighbors[i].recv_stat);
MPI_Get_count(&neighbors[i].recv_stat, PARTICLE, &neighbors[i].recv_size);
}
}
//Ensure that you have send to anyone.
for(int i = 1; i < 9; i++) {
if (neighbors[i].rank != -1) {
MPI_Wait(&neighbors[i].send_req,&neighbors[i].send_stat);
}
}
particle_t *tmp = particles; particles = particles_prev; particles_prev = tmp;
//Myrank is main so he is the one gathering the info
int *n_proc_sizes = new int[n_proc];
int *n_proc_offset = new int[n_proc];
int count;
MPI_Gather( &neighbors[SELF].recv_size, 1, MPI_INT, n_proc_sizes, 1, MPI_INT, 0, MPI_COMM_WORLD );
if(!myrank ) {
int real_n = 0;
for(int i = 0; i < n_proc; i++){
real_n += n_proc_sizes[i];
}
printf("Before correctness check: n: %d, real-n: %d\n",n, real_n);
}
//
// simulate a number of time steps
//
double simulation_time = read_timer( );
for( int step = 0; step < NSTEPS; step++ )
{
//
// save current step if necessary (slightly different semantics than in other codes)
//
if( (step%SAVEFREQ) == 0 && savename ) {
//To to collect from every node
MPI_Gather( &neighbors[SELF].recv_size, 1, MPI_INT, n_proc_sizes, 1, MPI_INT, 0, MPI_COMM_WORLD );
//Calculate offsets
if (myrank == 0){
for(int i = 0; i < n_proc; i++){
if (i == 0) {
n_proc_offset[i] = 0;
} else {
n_proc_offset[i] = n_proc_offset[i-1] + n_proc_sizes[i-1];
}
}
}
MPI_Gatherv( &particles_prev[neighbors[SELF].offset], neighbors[SELF].recv_size, PARTICLE, particles, n_proc_sizes, n_proc_offset, PARTICLE, 0, MPI_COMM_WORLD );
if (myrank == 0 && fsave) {
save( fsave, n, particles );
}
}
//
// compute forces in us and the neighbors
//
for( int i = 0; i < 9; i++){
for (int k = neighbors[i].offset; k < neighbors[i].offset + neighbors[i].recv_size; k++){
particles_prev[k].ax = particles_prev[k].ay = 0;
for( int j = 0; j < 9; j++){
for (int l = neighbors[j].offset; l < neighbors[j].offset + neighbors[j].recv_size; l++){
apply_force( particles_prev[k], particles_prev[l] );
}
}
}
}
//
// move particles
//
count = 0;
for(int i = 0; i < 9; i++){
for (int k = neighbors[i].offset; k < neighbors[i].offset + neighbors[i].recv_size; k++){
move(particles_prev[k]);
if (particles_prev[k].x >= neighbors[SELF].from_x && particles_prev[k].x < neighbors[SELF].to_x &&
particles_prev[k].y >= neighbors[SELF].from_y && particles_prev[k].y < neighbors[SELF].to_y){
particles[count + neighbors[SELF].offset] = particles_prev[k];
count = count + 1;
}
}
}
neighbors[SELF].buffer_size = neighbors[SELF].recv_size = count;
for(int i = 1; i < 9; i++) {
neighbors[ i ].offset = neighbors[ i - 1 ].buffer_size + neighbors[ i - 1 ].offset;
if (neighbors[i].rank != -1){
MPI_Irecv(&particles[neighbors[i].offset],neighbors[i].buffer_size, PARTICLE, neighbors[i].rank, 0, MPI_COMM_WORLD, &neighbors[i].recv_req );
}
}
copy_to_ghost_buffers(particles, neighbors);
for( int i = 1; i < 9; i++){
if (neighbors[i].rank != -1){
MPI_Isend(&neighbors[i].send_buffer[0],min(neighbors[i].send_size, neighbors[i].buffer_size), PARTICLE, neighbors[i].rank, 0, MPI_COMM_WORLD, &neighbors[i].send_req );
}
}
for( int i = 1; i < 9; i++) {
if (neighbors[i].rank != -1){
MPI_Wait(&neighbors[i].recv_req,&neighbors[i].recv_stat);
MPI_Get_count(&neighbors[i].recv_stat, PARTICLE, &neighbors[i].recv_size);
}
}
particle_t *tmp = particles; particles = particles_prev; particles_prev = tmp;
for( int i = 1; i < 9; i++) {
if (neighbors[i].rank != -1){
MPI_Wait(&neighbors[i].send_req,&neighbors[i].send_stat);
}
}
}
simulation_time = read_timer( ) - simulation_time;
MPI_Gather( &neighbors[SELF].recv_size, 1, MPI_INT, n_proc_sizes, 1, MPI_INT, 0, MPI_COMM_WORLD );
if(!myrank ) {
int real_n = 0;
for(int i = 0; i < n_proc; i++){
if (i == 0) {
n_proc_offset[i] = 0;
} else {
n_proc_offset[i] = n_proc_offset[i-1] + n_proc_sizes[i-1];
}
real_n += n_proc_sizes[i];
}
printf("After correctness check: n: %d, real-n: %d\n",n, real_n);
printf( "n = %d, n_procs = %d, proc_width = %d, proc_height = %d, simulation time = %g s\n", n, n_proc, proc_width, proc_height, simulation_time );
}
MPI_Gatherv( &particles_prev[neighbors[SELF].offset], neighbors[SELF].recv_size, PARTICLE, particles, n_proc_sizes, n_proc_offset, PARTICLE, 0, MPI_COMM_WORLD );
//Sort every particle to their i value
if (!myrank && fchecksum){
for(int j = 0; j < n; j++){
particles_prev[(int)particles[j].index] = particles[j];
}
save( fchecksum, n, particles_prev );
}
//
// release resources
//
free( neighbors );
free( particles );
free( particles_prev );
if( fsave )
fclose( fsave );
if (fchecksum)
fclose( fchecksum );
MPI_Finalize( );
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment