Created
December 6, 2012 04:18
-
-
Save jstroem/4221740 to your computer and use it in GitHub Desktop.
CSE260 project. ghost-cells NOT distribution
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# This script is intepreted by the Bourne Shell, sh | |
# | |
# Documentation for SGE is found in: | |
# http://docs.oracle.com/cd/E19279-01/820-3257-12/n1ge.html | |
# | |
# Tell SGE which shell to run the job script in rather than depending | |
# on SGE to try and figure it out. | |
#$ -S /bin/bash | |
# | |
# Export all my environment variables to the job | |
#$ -V | |
# | |
# Tun the job in the same directory from which you submitted it | |
#$ -cwd | |
# | |
# | |
# --- Don't change anything above this line --- | |
# | |
# Give a name to the job | |
#$ -N MPI | |
# | |
# Specify a time limit for the job, not more than 30 minutes | |
#$ -l h_rt=00:10:00 | |
# | |
# Specify the parallel environment and number of cores, | |
# If not a multiple of 8, you'll get the whole node anyway | |
#$ -pe orte 8 | |
# | |
# Join stdout and stderr so they are reported in job output file | |
#$ -j y | |
# | |
# | |
# Choose the queue to run the job | |
# | |
# Debug queue: only one node may be used at a time for up to 30 minutes | |
# Interactive or batch jobs, maximum of 1 job per user running at a time | |
# | |
# Normal queue: job may use all available compute nodes (256 cores) | |
# for up to 60 minutes | |
# Batch jobs, maximum of 2 jobs per user running at a time | |
# To use more than one node, specify the "normal" queue | |
#$ -q normal.q | |
# #$ -q debug.q | |
# | |
# Specifies the circumstances under which mail is to be sent to the job owner | |
# defined by -M option. For example, options "bea" cause mail to be sent at the | |
# begining, end, and at abort time (if it happens) of the job. | |
# Option "n" means no mail will be sent. | |
#$ -m aeb | |
# | |
# *** Change to the address you want the notification sent to, and | |
# *** REMOVE the blank between the # and the $ | |
#$ -M [email protected] | |
# | |
echo | |
echo " *** Current working directory" | |
pwd | |
echo | |
echo " *** Compiler" | |
# Output which compiler are we using and the environment | |
mpicc -v | |
echo | |
echo " *** Environment" | |
printenv | |
echo | |
echo ">>> Job Starts" | |
date | |
mpirun -np $NSLOTS ./mpi -n 5000 -proc_width 4 -proc_height 2 -o mpi.txt -c checksum.txt | |
date | |
echo ">>> Job Ends" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdio.h> | |
#include <assert.h> | |
#include <float.h> | |
#include <string.h> | |
#include <math.h> | |
#include <time.h> | |
#include <mpi.h> | |
#include <sys/time.h> | |
#include "common.h" | |
double size; | |
// | |
// tuned constants | |
// | |
#define density 0.0010 | |
#define mass 0.01 | |
#define cutoff 0.01 | |
#define overlap 0.011 | |
#define min_r (cutoff/100) | |
#define dt 0.0005 | |
void setup_ghost_buffers( neighbor_t *neighbors, double size_x, double size_y){ | |
neighbor_t self = neighbors[SELF]; | |
for(int i = 1; i < 9; i++){ | |
if (neighbors[i].proc_x - self.proc_x != 0 && neighbors[i].proc_y - self.proc_y != 0) { | |
neighbors[i].buffer_size = (int)(min(ceil(size_y),ceil(size_x)) / mass); | |
} else if (neighbors[i].proc_x - self.proc_x != 0) { | |
neighbors[i].buffer_size = (int)ceil(size_y / mass); | |
} else if (neighbors[i].proc_y - self.proc_y != 0) { | |
neighbors[i].buffer_size = (int)ceil(size_x / mass); | |
} | |
neighbors[i].send_buffer = (particle_t*) malloc( sizeof(particle_t) * neighbors[i].buffer_size); | |
} | |
} | |
void copy_to_ghost_buffers( particle_t *particles, neighbor_t *neighbors) { | |
neighbor_t self = neighbors[SELF]; | |
for(int i = 1; i < 9; i++){ | |
neighbors[i].send_size = 0; | |
} | |
for (int i = self.offset; i < self.offset + self.recv_size; i++){ | |
if (particles[i].x <= self.from_x + overlap && particles[i].y <= self.from_y + overlap && neighbors[TOP_LEFT].rank != -1) {//TOP LEFT particle | |
if (neighbors[TOP_LEFT].buffer_size <= neighbors[TOP_LEFT].send_size){ | |
printf("Rank %d, neighbors: %d, send_buffer isnt big enough. Has: %d, needed: %d\n", self.rank, neighbors[TOP_LEFT].rank, neighbors[TOP_LEFT].buffer_size, neighbors[TOP_LEFT].send_size +1); | |
} else { | |
neighbors[TOP_LEFT].send_buffer[neighbors[TOP_LEFT].send_size] = particles[i]; | |
} | |
neighbors[TOP_LEFT].send_size = neighbors[TOP_LEFT].send_size + 1; | |
} | |
if (self.to_x - overlap < particles[i].x && particles[i].y <= self.from_y + overlap && neighbors[TOP_RIGHT].rank != -1) {//TOP RIGHT particle | |
if (neighbors[TOP_RIGHT].buffer_size <= neighbors[TOP_RIGHT].send_size){ | |
printf("Rank %d, neighbors: %d, send_buffer isnt big enough. Has: %d, needed: %d\n", self.rank, neighbors[TOP_RIGHT].rank, neighbors[TOP_RIGHT].buffer_size, neighbors[TOP_LEFT].send_size +1); | |
} else { | |
neighbors[TOP_RIGHT].send_buffer[neighbors[TOP_RIGHT].send_size] = particles[i]; | |
} | |
neighbors[TOP_RIGHT].send_size = neighbors[TOP_RIGHT].send_size + 1; | |
} | |
if (particles[i].x <= self.from_x + overlap && self.to_y - overlap < particles[i].y && neighbors[BOTTOM_LEFT].rank != -1) {// BOTTOM LEFT particle | |
if (neighbors[BOTTOM_LEFT].buffer_size <= neighbors[BOTTOM_LEFT].send_size){ | |
printf("Rank %d, neighbors: %d, send_buffer isnt big enough. Has: %d, needed: %d\n", self.rank, neighbors[BOTTOM_LEFT].rank, neighbors[BOTTOM_LEFT].buffer_size, neighbors[TOP_LEFT].send_size +1); | |
} else { | |
neighbors[BOTTOM_LEFT].send_buffer[neighbors[BOTTOM_LEFT].send_size] = particles[i]; | |
} | |
neighbors[BOTTOM_LEFT].send_size = neighbors[BOTTOM_LEFT].send_size + 1; | |
} | |
if (self.to_x - overlap < particles[i].x && self.to_y - overlap < particles[i].y && neighbors[BOTTOM_RIGHT].rank != -1) {//BOTTOM RIGHT particle | |
if (neighbors[BOTTOM_RIGHT].buffer_size <= neighbors[BOTTOM_RIGHT].send_size){ | |
printf("Rank %d, neighbors: %d, send_buffer isnt big enough. Has: %d, needed: %d\n", self.rank, neighbors[BOTTOM_RIGHT].rank, neighbors[BOTTOM_RIGHT].buffer_size, neighbors[TOP_LEFT].send_size +1); | |
} else { | |
neighbors[BOTTOM_RIGHT].send_buffer[neighbors[BOTTOM_RIGHT].send_size] = particles[i]; | |
} | |
neighbors[BOTTOM_RIGHT].send_size = neighbors[BOTTOM_RIGHT].send_size + 1; | |
} | |
if (self.to_x - overlap < particles[i].x && neighbors[STRAIGHT_RIGHT].rank != -1) { // STRAIGHT RIGHT particle | |
if (neighbors[STRAIGHT_RIGHT].buffer_size <= neighbors[STRAIGHT_RIGHT].send_size){ | |
printf("Rank %d, neighbors: %d, send_buffer isnt big enough. Has: %d, needed: %d\n", self.rank, neighbors[STRAIGHT_RIGHT].rank, neighbors[STRAIGHT_RIGHT].buffer_size, neighbors[TOP_LEFT].send_size +1); | |
} else { | |
neighbors[STRAIGHT_RIGHT].send_buffer[neighbors[STRAIGHT_RIGHT].send_size] = particles[i]; | |
} | |
neighbors[STRAIGHT_RIGHT].send_size = neighbors[STRAIGHT_RIGHT].send_size + 1; | |
} | |
if (particles[i].x <= self.from_x + overlap && neighbors[STRAIGHT_LEFT].rank != -1) { // STRAIGHT LEFT particle | |
if (neighbors[STRAIGHT_LEFT].buffer_size <= neighbors[STRAIGHT_LEFT].send_size){ | |
printf("Rank %d, neighbors: %d, send_buffer isnt big enough. Has: %d, needed: %d\n", self.rank, neighbors[STRAIGHT_LEFT].rank, neighbors[STRAIGHT_LEFT].buffer_size, neighbors[TOP_LEFT].send_size +1); | |
} else { | |
neighbors[STRAIGHT_LEFT].send_buffer[neighbors[STRAIGHT_LEFT].send_size] = particles[i]; | |
} | |
neighbors[STRAIGHT_LEFT].send_size = neighbors[STRAIGHT_LEFT].send_size + 1; | |
} | |
if (self.to_y - overlap < particles[i].y && neighbors[BOTTOM_STRAIGHT].rank != -1) { // BOTTOM STRAIGHT particle | |
if (neighbors[BOTTOM_STRAIGHT].buffer_size <= neighbors[BOTTOM_STRAIGHT].send_size){ | |
printf("Rank %d, neighbors: %d, send_buffer isnt big enough. Has: %d, needed: %d\n", self.rank, neighbors[BOTTOM_STRAIGHT].rank, neighbors[BOTTOM_STRAIGHT].buffer_size, neighbors[TOP_LEFT].send_size +1); | |
} else { | |
neighbors[BOTTOM_STRAIGHT].send_buffer[neighbors[BOTTOM_STRAIGHT].send_size] = particles[i]; | |
} | |
neighbors[BOTTOM_STRAIGHT].send_size = neighbors[BOTTOM_STRAIGHT].send_size + 1; | |
} | |
if (particles[i].y <= self.from_y + overlap && neighbors[TOP_STRAIGHT].rank != -1) { // TOP STRAIGHT particle | |
if (neighbors[TOP_STRAIGHT].buffer_size <= neighbors[TOP_STRAIGHT].send_size){ | |
printf("Rank %d, neighbors: %d, send_buffer isnt big enough. Has: %d, needed: %d\n", self.rank, neighbors[TOP_STRAIGHT].rank, neighbors[TOP_STRAIGHT].buffer_size, neighbors[TOP_LEFT].send_size +1); | |
} else { | |
neighbors[TOP_STRAIGHT].send_buffer[neighbors[TOP_STRAIGHT].send_size] = particles[i]; | |
} | |
neighbors[TOP_STRAIGHT].send_size = neighbors[TOP_STRAIGHT].send_size + 1; | |
} | |
} | |
} | |
// | |
// timer | |
// | |
double read_timer( ) | |
{ | |
static bool initialized = false; | |
static struct timeval start; | |
struct timeval end; | |
if( !initialized ) | |
{ | |
gettimeofday( &start, NULL ); | |
initialized = true; | |
} | |
gettimeofday( &end, NULL ); | |
return (end.tv_sec - start.tv_sec) + 1.0e-6 * (end.tv_usec - start.tv_usec); | |
} | |
// | |
// keep density constant | |
// | |
double set_size( int n ) | |
{ | |
size = sqrt( density * n ); | |
return size; | |
} | |
// | |
// Initialize the particle positions and velocities | |
// | |
void init_particles( int n, particle_t *p ) | |
{ | |
srand48( 1 ); | |
int sx = (int)ceil(sqrt((double)n)); | |
int sy = (n+sx-1)/sx; | |
int *shuffle = (int*)malloc( n * sizeof(int) ); | |
for( int i = 0; i < n; i++ ) | |
shuffle[i] = i; | |
for( int i = 0; i < n; i++ ) | |
{ | |
// | |
// make sure particles are not spatially sorted | |
// | |
int j = lrand48()%(n-i); | |
int k = shuffle[j]; | |
shuffle[j] = shuffle[n-i-1]; | |
// | |
// distribute particles evenly to ensure proper spacing | |
// | |
p[i].x = size*(1.+(k%sx))/(1+sx); | |
p[i].y = (size/2)*(1.+(k/sx))/(1+sy); | |
p[i].index = i; | |
// | |
// assign random velocities within a bound | |
// | |
p[i].vx = drand48()*2-1; | |
p[i].vy = drand48()*2-1; | |
} | |
free( shuffle ); | |
} | |
// | |
// interact two particles | |
// | |
void apply_force( particle_t &particle, particle_t &neighbor ) | |
{ | |
double dx = neighbor.x - particle.x; | |
double dy = neighbor.y - particle.y; | |
double r2 = dx * dx + dy * dy; | |
if( r2 > cutoff*cutoff ) | |
return; | |
r2 = fmax( r2, min_r*min_r ); | |
double r = sqrt( r2 ); | |
// | |
// very simple short-range repulsive force | |
// | |
double coef = ( 1 - cutoff / r ) / r2 / mass; | |
particle.ax += coef * dx; | |
particle.ay += coef * dy; | |
} | |
// | |
// integrate the ODE | |
// | |
void move( particle_t &p ) | |
{ | |
// | |
// slightly simplified Velocity Verlet integration | |
// conserves energy better than explicit Euler method | |
// | |
p.vx += p.ax * dt; | |
p.vy += p.ay * dt; | |
p.x += p.vx * dt; | |
p.y += p.vy * dt; | |
// | |
// bounce from walls | |
// | |
while( p.x < 0 || p.x > size ) | |
{ | |
p.x = p.x < 0 ? -p.x : 2*size-p.x; | |
p.vx = -p.vx; | |
} | |
while( p.y < 0 || p.y > size ) | |
{ | |
p.y = p.y < 0 ? -p.y : 2*size-p.y; | |
p.vy = -p.vy; | |
} | |
} | |
// | |
// I/O routines | |
// | |
void save( FILE *f, int n, particle_t *p ) | |
{ | |
static bool first = true; | |
if( first ) | |
{ | |
fprintf( f, "%d %g\n", n, size ); | |
first = false; | |
} | |
for( int i = 0; i < n; i++ ) | |
fprintf( f, "%g %g\n", p[i].x, p[i].y ); | |
} | |
// | |
// command line option processing | |
// | |
int find_option( int argc, char **argv, const char *option ) | |
{ | |
for( int i = 1; i < argc; i++ ) | |
if( strcmp( argv[i], option ) == 0 ) | |
return i; | |
return -1; | |
} | |
int read_int( int argc, char **argv, const char *option, int default_value ) | |
{ | |
int iplace = find_option( argc, argv, option ); | |
if( iplace >= 0 && iplace < argc-1 ) | |
return atoi( argv[iplace+1] ); | |
return default_value; | |
} | |
char *read_string( int argc, char **argv, const char *option, char *default_value ) | |
{ | |
int iplace = find_option( argc, argv, option ); | |
if( iplace >= 0 && iplace < argc-1 ) | |
return argv[iplace+1]; | |
return default_value; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef __CS267_COMMON_H__ | |
#define __CS267_COMMON_H__ | |
inline int min( int a, int b ) { return a < b ? a : b; } | |
inline int max( int a, int b ) { return a > b ? a : b; } | |
// | |
// saving parameters | |
// | |
const int NSTEPS = 1000; | |
const int SAVEFREQ = 10; | |
const int SELF = 0, | |
BOTTOM_STRAIGHT = 1, | |
BOTTOM_LEFT = 2, | |
STRAIGHT_LEFT = 3, | |
TOP_LEFT = 4, | |
TOP_STRAIGHT = 5, | |
TOP_RIGHT = 6, | |
STRAIGHT_RIGHT = 7, | |
BOTTOM_RIGHT = 8; | |
// | |
// particle data structure | |
// | |
typedef struct | |
{ | |
double index; | |
double x; | |
double y; | |
double vx; | |
double vy; | |
double ax; | |
double ay; | |
} particle_t; | |
typedef struct | |
{ | |
int offset; | |
int send_size; | |
int recv_size; | |
int proc_x; | |
int proc_y; | |
int rank; | |
double from_x; | |
double from_y; | |
double to_x; | |
double to_y; | |
particle_t *send_buffer; | |
int buffer_size; | |
MPI_Request recv_req; | |
MPI_Request send_req; | |
MPI_Status recv_stat; | |
MPI_Status send_stat; | |
} neighbor_t; | |
// | |
// timing routines | |
// | |
double read_timer( ); | |
void copy_to_ghost_buffers( particle_t *particles, neighbor_t *neighbors); | |
void setup_ghost_buffers( neighbor_t *neighbors, double size_x, double size_y); | |
// | |
// simulation routines | |
// | |
double set_size( int n ); | |
void init_particles( int n, particle_t *p ); | |
void apply_force( particle_t &particle, particle_t &neighbor ); | |
void move( particle_t &p ); | |
// | |
// I/O routines | |
// | |
FILE *open_save( char *filename, int n ); | |
void save( FILE *f, int n, particle_t *p ); | |
// | |
// argument processing routines | |
// | |
int find_option( int argc, char **argv, const char *option ); | |
int read_int( int argc, char **argv, const char *option, int default_value ); | |
char *read_string( int argc, char **argv, const char *option, char *default_value ); | |
#endif |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
HOST = $(shell hostname) | |
BANG = $(shell expr match `hostname` ccom-bang) | |
BANG-COMPUTE = $(shell expr match `hostname` compute) | |
LILLIPUT = $(shell expr match `hostname` lilliput) | |
ifneq ($(BANG), 0) | |
PUB = /share/class/public/cse260-fa12 | |
include $(PUB)/Arch/arch.gnu.generic | |
else | |
ifneq ($(BANG-COMPUTE), 0) | |
PUB = /share/class/public/cse260-fa12 | |
include $(PUB)/Arch/arch.gnu.generic | |
else | |
ifneq ($(LILLIPUT), 0) | |
PUB = /class/public/cse260-fa12 | |
include $(PUB)/Arch/arch.intel.generic | |
else | |
# PUB = /Users/baden/lib | |
include $(PUB)/Arch/arch.gnu | |
# include $(PUB)/Arch/arch.gnu-4.5 | |
endif | |
endif | |
endif | |
# | |
# Add symbol table information for gdb/cachegrind | |
ifeq ($(debug), 1) | |
CFLAGS += -g | |
LDFLAGS += -g | |
C++FLAGS += -g | |
endif | |
# Add symbol table information for gprof | |
ifeq ($(gprof), 1) | |
CFLAGS += -g -pg | |
C++FLAGS += -g -pg | |
LDFLAGS += -g -pg | |
endif | |
# If you want to compile for single precision, | |
# specify single=1 on the "make" command line | |
ifeq ($(single), 1) | |
else | |
C++FLAGS += -D_DOUBLE | |
CFLAGS += -D_DOUBLE | |
endif | |
# If you want to compile so that you call the plotter for | |
# flattened 2D arrays (implemented as 1D arrays) | |
# specify flattened=1 on the "make" command line | |
ifeq ($(flattened), 1) | |
C++FLAGS += -DPLOT1D | |
CFLAGS += -DPLOT1D | |
endif | |
# If you want to use restrict pointers, make restrict=1 | |
# This applies to the hand code version | |
ifeq ($(restrict), 1) | |
C++FLAGS += -D__RESTRICT | |
CFLAGS += -D__RESTRICT | |
ifneq ($(CARVER), 0) | |
C++FLAGS += -restrict | |
CFLAGS += -restrict | |
endif | |
endif | |
#DEBUG += -DDEBUG | |
TARGETS = mpi | |
app: $(TARGETS) | |
OBJECTS = common.o mpi.o.o | |
#ifeq ($(no-mpi),1) | |
#OBJECTS += Timer.o | |
#endif | |
app: $(TARGETS) | |
mpi: mpi.o common.o | |
$(C++LINK) $(LDFLAGS) -o $@ mpi.o common.o $(LDLIBS) | |
clean: | |
$(RM) *.o $(TARGETS); | |
$(RM) core.*; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <mpi.h> | |
#include <stdlib.h> | |
#include <stdio.h> | |
#include <assert.h> | |
#include <math.h> | |
#include "common.h" | |
// | |
// benchmarking program | |
// | |
int main( int argc, char **argv ) | |
{ | |
// | |
// process command line parameters | |
// | |
if( find_option( argc, argv, "-h" ) >= 0 ) | |
{ | |
printf( "Options:\n" ); | |
printf( "-h to see this help\n" ); | |
printf( "-n <int> to set the number of particles\n" ); | |
printf( "-proc_width <int> to set the width of the processor grid\n" ); | |
printf( "-proc_height <int> to set the height of the processor grid\n" ); | |
printf( "-o <filename> to specify the output file name\n" ); | |
printf( "-c <checksum> to specify the output checksum file name\n" ); | |
return 0; | |
} | |
int n = read_int( argc, argv, "-n", 1000 ); | |
int proc_width = read_int( argc, argv, "-proc_width", NULL ); | |
int proc_height = read_int( argc, argv, "-proc_height", NULL ); | |
char *savename = read_string( argc, argv, "-o", NULL ); | |
char *checksum = read_string( argc, argv, "-c", NULL ); | |
// | |
// set up MPI | |
// | |
int n_proc, myrank; | |
MPI_Init( &argc, &argv ); | |
MPI_Comm_size( MPI_COMM_WORLD, &n_proc ); | |
MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); | |
//Ensure that px and py are correct setup. | |
if ((proc_height * proc_width) != n_proc && !myrank){ | |
printf("\n *** The number of processors in the geometry (%d) is not the same as the number requested (%d)",proc_height*proc_width,n_proc); | |
exit(-1); | |
} | |
int my_x = myrank % proc_width; | |
int my_y = myrank / proc_width; | |
// | |
// Setup the neighbor setup | |
// | |
neighbor_t *neighbors = (neighbor_t*) malloc( 9 * sizeof(neighbor_t)); | |
neighbors[ SELF ].proc_x = my_x; neighbors[ SELF ].proc_y = my_y; | |
neighbors[ BOTTOM_STRAIGHT ].proc_x = my_x; neighbors[ BOTTOM_STRAIGHT ].proc_y = my_y + 1; | |
neighbors[ BOTTOM_LEFT ].proc_x = my_x - 1; neighbors[ BOTTOM_LEFT ].proc_y = my_y + 1; | |
neighbors[ STRAIGHT_LEFT ].proc_x = my_x - 1; neighbors[ STRAIGHT_LEFT ].proc_y = my_y; | |
neighbors[ TOP_LEFT ].proc_x = my_x - 1; neighbors[ TOP_LEFT ].proc_y = my_y - 1; | |
neighbors[ TOP_STRAIGHT ].proc_x = my_x; neighbors[ TOP_STRAIGHT ].proc_y = my_y - 1; | |
neighbors[ TOP_RIGHT ].proc_x = my_x + 1; neighbors[ TOP_RIGHT ].proc_y = my_y - 1; | |
neighbors[ STRAIGHT_RIGHT ].proc_x = my_x + 1; neighbors[ STRAIGHT_RIGHT ].proc_y = my_y; | |
neighbors[ BOTTOM_RIGHT ].proc_x = my_x + 1; neighbors[ BOTTOM_RIGHT ].proc_y = my_y + 1; | |
// | |
// allocate generic resources | |
// | |
FILE *fsave = savename && myrank == 0 ? fopen( savename, "w" ) : NULL; | |
FILE *fchecksum = checksum && myrank == 0 ? fopen( checksum, "w" ) : NULL; | |
particle_t *particles = (particle_t*) malloc( n * sizeof(particle_t)); | |
particle_t *particles_prev = (particle_t*) malloc( n * sizeof(particle_t)); | |
MPI_Datatype PARTICLE; | |
MPI_Type_contiguous( 7, MPI_DOUBLE, &PARTICLE ); | |
MPI_Type_commit( &PARTICLE ); | |
// | |
// initialize and distribute the particles (that's fine to leave it unoptimized) | |
// | |
double size = set_size( n ); | |
double size_x = size / proc_width; | |
double size_y = size / proc_height; | |
// | |
// set up the data partitioning across processors | |
// | |
setup_ghost_buffers(neighbors, size_x, size_y); | |
if (!myrank) { //Init all particles at rank 0 | |
init_particles( n, particles_prev ); | |
} | |
MPI_Bcast(particles_prev, n, PARTICLE, 0, MPI_COMM_WORLD); | |
for( int i = 0; i < 9; i++ ) { | |
neighbors[ i ].from_x = size_x * neighbors[ i ].proc_x; | |
neighbors[ i ].to_x = size_x + neighbors[ i ].from_x; | |
neighbors[ i ].from_y = size_y * neighbors[ i ].proc_y; | |
neighbors[ i ].to_y = size_y + neighbors[ i ].from_y; | |
if (i == 0) { // 0 = SELF | |
neighbors[ SELF ].rank = neighbors[ SELF ].proc_x + neighbors[ SELF ].proc_y * proc_width; | |
neighbors[ SELF ].offset = 0; | |
neighbors[ SELF ].send_size = 0; | |
neighbors[ SELF ].recv_size = 0; | |
for(int j = 0; j < n; j++){ | |
if (particles_prev[j].x >= neighbors[SELF].from_x && particles_prev[j].x < neighbors[SELF].to_x && | |
particles_prev[j].y >= neighbors[SELF].from_y && particles_prev[j].y < neighbors[SELF].to_y){ | |
particles[neighbors[ SELF ].recv_size] = particles_prev[j]; | |
neighbors[ SELF ].recv_size += 1; | |
} | |
} | |
neighbors[ SELF ].buffer_size = neighbors[SELF].recv_size; | |
} else if (neighbors[ i ].proc_x > -1 && neighbors[i].proc_x < proc_width && neighbors[ i ].proc_y > -1 && neighbors[i].proc_y < proc_height) { | |
neighbors[ i ].rank = neighbors[ i ].proc_x + neighbors[ i ].proc_y * proc_width; | |
neighbors[ i ].offset = neighbors[ i - 1 ].buffer_size + neighbors[ i - 1 ].offset; | |
} else { | |
neighbors[ i ].buffer_size = 0; | |
neighbors[ i ].rank = -1; | |
neighbors[ i ].offset = neighbors[ i - 1 ].buffer_size + neighbors[ i - 1 ].offset; | |
neighbors[ i ].send_size = 0; | |
neighbors[ i ].recv_size = 0; | |
} | |
} | |
for(int i = 1; i < 9; i++){ | |
if (neighbors[i].rank != -1){ | |
MPI_Irecv(&particles[neighbors[i].offset],neighbors[i].buffer_size, PARTICLE, neighbors[i].rank, 0, MPI_COMM_WORLD, &neighbors[i].recv_req ); | |
} | |
} | |
//Send info out to each neighbor | |
copy_to_ghost_buffers( particles, neighbors ); | |
for(int i = 1; i < 9; i++) { | |
if (neighbors[i].rank != -1) { | |
MPI_Isend(&neighbors[i].send_buffer[0],neighbors[i].send_size, PARTICLE, neighbors[i].rank, 0, MPI_COMM_WORLD, &neighbors[i].send_req ); | |
} | |
} | |
//Ensure that we have recv from everyone | |
for(int i = 1; i < 9; i++) { | |
if (neighbors[i].rank != -1) { | |
MPI_Wait(&neighbors[i].recv_req,&neighbors[i].recv_stat); | |
MPI_Get_count(&neighbors[i].recv_stat, PARTICLE, &neighbors[i].recv_size); | |
} | |
} | |
//Ensure that you have send to anyone. | |
for(int i = 1; i < 9; i++) { | |
if (neighbors[i].rank != -1) { | |
MPI_Wait(&neighbors[i].send_req,&neighbors[i].send_stat); | |
} | |
} | |
particle_t *tmp = particles; particles = particles_prev; particles_prev = tmp; | |
//Myrank is main so he is the one gathering the info | |
int *n_proc_sizes = new int[n_proc]; | |
int *n_proc_offset = new int[n_proc]; | |
int count; | |
MPI_Gather( &neighbors[SELF].recv_size, 1, MPI_INT, n_proc_sizes, 1, MPI_INT, 0, MPI_COMM_WORLD ); | |
if(!myrank ) { | |
int real_n = 0; | |
for(int i = 0; i < n_proc; i++){ | |
real_n += n_proc_sizes[i]; | |
} | |
printf("Before correctness check: n: %d, real-n: %d\n",n, real_n); | |
} | |
// | |
// simulate a number of time steps | |
// | |
double simulation_time = read_timer( ); | |
for( int step = 0; step < NSTEPS; step++ ) | |
{ | |
// | |
// save current step if necessary (slightly different semantics than in other codes) | |
// | |
if( (step%SAVEFREQ) == 0 && savename ) { | |
//To to collect from every node | |
MPI_Gather( &neighbors[SELF].recv_size, 1, MPI_INT, n_proc_sizes, 1, MPI_INT, 0, MPI_COMM_WORLD ); | |
//Calculate offsets | |
if (myrank == 0){ | |
for(int i = 0; i < n_proc; i++){ | |
if (i == 0) { | |
n_proc_offset[i] = 0; | |
} else { | |
n_proc_offset[i] = n_proc_offset[i-1] + n_proc_sizes[i-1]; | |
} | |
} | |
} | |
MPI_Gatherv( &particles_prev[neighbors[SELF].offset], neighbors[SELF].recv_size, PARTICLE, particles, n_proc_sizes, n_proc_offset, PARTICLE, 0, MPI_COMM_WORLD ); | |
if (myrank == 0 && fsave) { | |
save( fsave, n, particles ); | |
} | |
} | |
// | |
// compute forces in us and the neighbors | |
// | |
for( int i = 0; i < 9; i++){ | |
for (int k = neighbors[i].offset; k < neighbors[i].offset + neighbors[i].recv_size; k++){ | |
particles_prev[k].ax = particles_prev[k].ay = 0; | |
for( int j = 0; j < 9; j++){ | |
for (int l = neighbors[j].offset; l < neighbors[j].offset + neighbors[j].recv_size; l++){ | |
apply_force( particles_prev[k], particles_prev[l] ); | |
} | |
} | |
} | |
} | |
// | |
// move particles | |
// | |
count = 0; | |
for(int i = 0; i < 9; i++){ | |
for (int k = neighbors[i].offset; k < neighbors[i].offset + neighbors[i].recv_size; k++){ | |
move(particles_prev[k]); | |
if (particles_prev[k].x >= neighbors[SELF].from_x && particles_prev[k].x < neighbors[SELF].to_x && | |
particles_prev[k].y >= neighbors[SELF].from_y && particles_prev[k].y < neighbors[SELF].to_y){ | |
particles[count + neighbors[SELF].offset] = particles_prev[k]; | |
count = count + 1; | |
} | |
} | |
} | |
neighbors[SELF].buffer_size = neighbors[SELF].recv_size = count; | |
for(int i = 1; i < 9; i++) { | |
neighbors[ i ].offset = neighbors[ i - 1 ].buffer_size + neighbors[ i - 1 ].offset; | |
if (neighbors[i].rank != -1){ | |
MPI_Irecv(&particles[neighbors[i].offset],neighbors[i].buffer_size, PARTICLE, neighbors[i].rank, 0, MPI_COMM_WORLD, &neighbors[i].recv_req ); | |
} | |
} | |
copy_to_ghost_buffers(particles, neighbors); | |
for( int i = 1; i < 9; i++){ | |
if (neighbors[i].rank != -1){ | |
MPI_Isend(&neighbors[i].send_buffer[0],min(neighbors[i].send_size, neighbors[i].buffer_size), PARTICLE, neighbors[i].rank, 0, MPI_COMM_WORLD, &neighbors[i].send_req ); | |
} | |
} | |
for( int i = 1; i < 9; i++) { | |
if (neighbors[i].rank != -1){ | |
MPI_Wait(&neighbors[i].recv_req,&neighbors[i].recv_stat); | |
MPI_Get_count(&neighbors[i].recv_stat, PARTICLE, &neighbors[i].recv_size); | |
} | |
} | |
particle_t *tmp = particles; particles = particles_prev; particles_prev = tmp; | |
for( int i = 1; i < 9; i++) { | |
if (neighbors[i].rank != -1){ | |
MPI_Wait(&neighbors[i].send_req,&neighbors[i].send_stat); | |
} | |
} | |
} | |
simulation_time = read_timer( ) - simulation_time; | |
MPI_Gather( &neighbors[SELF].recv_size, 1, MPI_INT, n_proc_sizes, 1, MPI_INT, 0, MPI_COMM_WORLD ); | |
if(!myrank ) { | |
int real_n = 0; | |
for(int i = 0; i < n_proc; i++){ | |
if (i == 0) { | |
n_proc_offset[i] = 0; | |
} else { | |
n_proc_offset[i] = n_proc_offset[i-1] + n_proc_sizes[i-1]; | |
} | |
real_n += n_proc_sizes[i]; | |
} | |
printf("After correctness check: n: %d, real-n: %d\n",n, real_n); | |
printf( "n = %d, n_procs = %d, proc_width = %d, proc_height = %d, simulation time = %g s\n", n, n_proc, proc_width, proc_height, simulation_time ); | |
} | |
MPI_Gatherv( &particles_prev[neighbors[SELF].offset], neighbors[SELF].recv_size, PARTICLE, particles, n_proc_sizes, n_proc_offset, PARTICLE, 0, MPI_COMM_WORLD ); | |
//Sort every particle to their i value | |
if (!myrank && fchecksum){ | |
for(int j = 0; j < n; j++){ | |
particles_prev[(int)particles[j].index] = particles[j]; | |
} | |
save( fchecksum, n, particles_prev ); | |
} | |
// | |
// release resources | |
// | |
free( neighbors ); | |
free( particles ); | |
free( particles_prev ); | |
if( fsave ) | |
fclose( fsave ); | |
if (fchecksum) | |
fclose( fchecksum ); | |
MPI_Finalize( ); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment