Created
November 30, 2012 06:48
-
-
Save jstroem/4174180 to your computer and use it in GitHub Desktop.
cse260 project ghost cells
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# This script is intepreted by the Bourne Shell, sh | |
# | |
# Documentation for SGE is found in: | |
# http://docs.oracle.com/cd/E19279-01/820-3257-12/n1ge.html | |
# | |
# Tell SGE which shell to run the job script in rather than depending | |
# on SGE to try and figure it out. | |
#$ -S /bin/bash | |
# | |
# Export all my environment variables to the job | |
#$ -V | |
# | |
# Tun the job in the same directory from which you submitted it | |
#$ -cwd | |
# | |
# | |
# --- Don't change anything above this line --- | |
# | |
# Give a name to the job | |
#$ -N MPI | |
# | |
# Specify a time limit for the job, not more than 30 minutes | |
#$ -l h_rt=00:10:00 | |
# | |
# Specify the parallel environment and number of cores, | |
# If not a multiple of 8, you'll get the whole node anyway | |
#$ -pe orte 16 | |
# | |
# Join stdout and stderr so they are reported in job output file | |
#$ -j y | |
# | |
# | |
# Choose the queue to run the job | |
# | |
# Debug queue: only one node may be used at a time for up to 30 minutes | |
# Interactive or batch jobs, maximum of 1 job per user running at a time | |
# | |
# Normal queue: job may use all available compute nodes (256 cores) | |
# for up to 60 minutes | |
# Batch jobs, maximum of 2 jobs per user running at a time | |
# To use more than one node, specify the "normal" queue | |
#$ -q normal.q | |
# #$ -q debug.q | |
# | |
# Specifies the circumstances under which mail is to be sent to the job owner | |
# defined by -M option. For example, options "bea" cause mail to be sent at the | |
# begining, end, and at abort time (if it happens) of the job. | |
# Option "n" means no mail will be sent. | |
#$ -m aeb | |
# | |
# *** Change to the address you want the notification sent to, and | |
# *** REMOVE the blank between the # and the $ | |
#$ -M [email protected] | |
# | |
echo | |
echo " *** Current working directory" | |
pwd | |
echo | |
echo " *** Compiler" | |
# Output which compiler are we using and the environment | |
mpicc -v | |
echo | |
echo " *** Environment" | |
printenv | |
echo | |
echo ">>> Job Starts" | |
date | |
mpirun -np $NSLOTS ./mpi -n 5000 -proc_width 4 -proc_height 4 -split_factor 3 -o mpi.txt | |
date | |
echo ">>> Job Ends" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdio.h> | |
#include <assert.h> | |
#include <float.h> | |
#include <string.h> | |
#include <math.h> | |
#include <time.h> | |
#include <mpi.h> | |
#include <sys/time.h> | |
#include "common.h" | |
double size; | |
// | |
// tuned constants | |
// | |
#define density 0.0005 | |
#define mass 0.01 | |
#define cutoff 0.01 | |
#define min_r (cutoff/100) | |
#define dt 0.0005 | |
int calc_array_size(int n, double size_x, double size_y, int split_factor, int n_proc){ | |
return max(n, (int)ceil(fmax(size_x,size_y) / mass) * 8 * (split_factor * split_factor * n_proc)); | |
} | |
void setup_neighbors( block_t block, int my_x, int my_y, int proc_width, int proc_height, int split_factor ){ | |
neighbor_t *neighbors = block.neighbors; | |
neighbors[ BOTTOM_STRAIGHT ].proc_x = my_x; neighbors[ BOTTOM_STRAIGHT ].proc_y = my_y + 1; | |
neighbors[ BOTTOM_LEFT ].proc_x = my_x - 1; neighbors[ BOTTOM_LEFT ].proc_y = my_y + 1; | |
neighbors[ STRAIGHT_LEFT ].proc_x = my_x - 1; neighbors[ STRAIGHT_LEFT ].proc_y = my_y; | |
neighbors[ TOP_LEFT ].proc_x = my_x - 1; neighbors[ TOP_LEFT ].proc_y = my_y - 1; | |
neighbors[ TOP_STRAIGHT ].proc_x = my_x; neighbors[ TOP_STRAIGHT ].proc_y = my_y - 1; | |
neighbors[ TOP_RIGHT ].proc_x = my_x + 1; neighbors[ TOP_RIGHT ].proc_y = my_y - 1; | |
neighbors[ STRAIGHT_RIGHT ].proc_x = my_x + 1; neighbors[ STRAIGHT_RIGHT ].proc_y = my_y; | |
neighbors[ BOTTOM_RIGHT ].proc_x = my_x + 1; neighbors[ BOTTOM_RIGHT ].proc_y = my_y + 1; | |
for(int i = 0; i < 8; i++){ | |
if (neighbors[ i ].proc_x < 0) { | |
neighbors[ i ].proc_x = neighbors[ i ].proc_x + proc_width; | |
neighbors[ i ].block_x = block.block_x - 1; | |
} else if (proc_width <= neighbors[ i ].proc_x) { | |
neighbors[ i ].proc_x = neighbors[ i ].proc_x % proc_width; | |
neighbors[ i ].block_x = block.block_x + 1; | |
} else { | |
neighbors[ i ].block_x = block.block_x; | |
} | |
if (neighbors[ i ].proc_y < 0) { | |
neighbors[ i ].proc_y = neighbors[ i ].proc_y + proc_height; | |
neighbors[ i ].block_y = block.block_y - 1; | |
} else if (proc_height <= neighbors[ i ].proc_y) { | |
neighbors[ i ].proc_y = neighbors[ i ].proc_y % proc_height; | |
neighbors[ i ].block_y = block.block_y + 1; | |
} else { | |
neighbors[ i ].block_y = block.block_y; | |
} | |
neighbors[ i ].rank = neighbors[ i ].proc_x + neighbors[ i ].proc_y * proc_width; | |
if (neighbors[ i ].block_y < 0 || split_factor <= neighbors[ i ].block_y || neighbors[ i ].block_x < 0 || split_factor <= neighbors[ i ].block_x){ | |
neighbors[ i ].rank = -1; | |
neighbors[ i ].buffer_size = 0; | |
neighbors[ i ].recv_size = 0; | |
} | |
} | |
} | |
void setup_ghost_buffers(int my_x, int my_y, neighbor_t *neighbors, double size_x, double size_y, int start_offset){ | |
for(int i = 0; i < 8; i++){ | |
if (neighbors[ i ].rank != -1){ | |
if (neighbors[i].proc_x - my_x != 0 && neighbors[i].proc_y - my_y != 0) { | |
neighbors[i].buffer_size = (int)(ceil(fmin(size_y,size_x) / mass)); | |
} else if (neighbors[i].proc_x - my_x != 0) { | |
neighbors[i].buffer_size = (int)ceil(size_y / mass); | |
} else if (neighbors[i].proc_y - my_y != 0) { | |
neighbors[i].buffer_size = (int)ceil(size_x / mass); | |
} | |
} else { | |
neighbors[i].buffer_size = 0; | |
} | |
neighbors[i].offset = (i == 0 ? start_offset : neighbors[ i - 1 ].buffer_size + neighbors[ i - 1 ].offset); | |
neighbors[i].send_buffer = (particle_t*) malloc( sizeof(particle_t) * neighbors[i].buffer_size); | |
} | |
} | |
void copy_to_ghost_buffers( particle_t *particles, block_t self) { | |
for(int i = 0; i < 8; i++){ | |
self.neighbors[i].send_size = 0; | |
} | |
for (int i = self.offset; i < self.offset + self.size; i++){ | |
if (particles[i].x <= self.from_x + cutoff && particles[i].y <= self.from_y + cutoff && self.neighbors[TOP_LEFT].rank != -1) {//TOP LEFT particle | |
if (self.neighbors[TOP_LEFT].buffer_size <= self.neighbors[TOP_LEFT].send_size){ | |
printf("Subrank %d, neighbors: %d, send_buffer isnt big enough. Has: %d, needed: %d\n", self.subrank, self.neighbors[TOP_LEFT].rank, self.neighbors[TOP_LEFT].buffer_size, self.neighbors[TOP_LEFT].send_size +1); | |
} else { | |
self.neighbors[TOP_LEFT].send_buffer[self.neighbors[TOP_LEFT].send_size] = particles[i]; | |
} | |
self.neighbors[TOP_LEFT].send_size = self.neighbors[TOP_LEFT].send_size + 1; | |
} | |
if (self.to_x - cutoff < particles[i].x && particles[i].y <= self.from_y + cutoff && self.neighbors[TOP_RIGHT].rank != -1) {//TOP RIGHT particle | |
if (self.neighbors[TOP_RIGHT].buffer_size <= self.neighbors[TOP_RIGHT].send_size){ | |
printf("Subrank %d, neighbors: %d, send_buffer isnt big enough. Has: %d, needed: %d\n", self.subrank, self.neighbors[TOP_RIGHT].rank, self.neighbors[TOP_RIGHT].buffer_size, self.neighbors[TOP_LEFT].send_size +1); | |
} else { | |
self.neighbors[TOP_RIGHT].send_buffer[self.neighbors[TOP_RIGHT].send_size] = particles[i]; | |
} | |
self.neighbors[TOP_RIGHT].send_size = self.neighbors[TOP_RIGHT].send_size + 1; | |
} | |
if (particles[i].x <= self.from_x + cutoff && self.to_y - cutoff < particles[i].y && self.neighbors[BOTTOM_LEFT].rank != -1) {// BOTTOM LEFT particle | |
if (self.neighbors[BOTTOM_LEFT].buffer_size <= self.neighbors[BOTTOM_LEFT].send_size){ | |
printf("Subrank %d, neighbors: %d, send_buffer isnt big enough. Has: %d, needed: %d\n", self.subrank, self.neighbors[BOTTOM_LEFT].rank, self.neighbors[BOTTOM_LEFT].buffer_size, self.neighbors[TOP_LEFT].send_size +1); | |
} else { | |
self.neighbors[BOTTOM_LEFT].send_buffer[self.neighbors[BOTTOM_LEFT].send_size] = particles[i]; | |
} | |
self.neighbors[BOTTOM_LEFT].send_size = self.neighbors[BOTTOM_LEFT].send_size + 1; | |
} | |
if (self.to_x - cutoff < particles[i].x && self.to_y - cutoff < particles[i].y && self.neighbors[BOTTOM_RIGHT].rank != -1) {//BOTTOM RIGHT particle | |
if (self.neighbors[BOTTOM_RIGHT].buffer_size <= self.neighbors[BOTTOM_RIGHT].send_size){ | |
printf("Subrank %d, neighbors: %d, send_buffer isnt big enough. Has: %d, needed: %d\n", self.subrank, self.neighbors[BOTTOM_RIGHT].rank, self.neighbors[BOTTOM_RIGHT].buffer_size, self.neighbors[TOP_LEFT].send_size +1); | |
} else { | |
self.neighbors[BOTTOM_RIGHT].send_buffer[self.neighbors[BOTTOM_RIGHT].send_size] = particles[i]; | |
} | |
self.neighbors[BOTTOM_RIGHT].send_size = self.neighbors[BOTTOM_RIGHT].send_size + 1; | |
} | |
if (self.to_x - cutoff < particles[i].x && self.neighbors[STRAIGHT_RIGHT].rank != -1) { // STRAIGHT RIGHT particle | |
if (self.neighbors[STRAIGHT_RIGHT].buffer_size <= self.neighbors[STRAIGHT_RIGHT].send_size){ | |
printf("Subrank %d, neighbors: %d, send_buffer isnt big enough. Has: %d, needed: %d\n", self.subrank, self.neighbors[STRAIGHT_RIGHT].rank, self.neighbors[STRAIGHT_RIGHT].buffer_size, self.neighbors[TOP_LEFT].send_size +1); | |
} else { | |
self.neighbors[STRAIGHT_RIGHT].send_buffer[self.neighbors[STRAIGHT_RIGHT].send_size] = particles[i]; | |
} | |
self.neighbors[STRAIGHT_RIGHT].send_size = self.neighbors[STRAIGHT_RIGHT].send_size + 1; | |
} | |
if (particles[i].x <= self.from_x + cutoff && self.neighbors[STRAIGHT_LEFT].rank != -1) { // STRAIGHT LEFT particle | |
if (self.neighbors[STRAIGHT_LEFT].buffer_size <= self.neighbors[STRAIGHT_LEFT].send_size){ | |
printf("Subrank %d, neighbors: %d, send_buffer isnt big enough. Has: %d, needed: %d\n", self.subrank, self.neighbors[STRAIGHT_LEFT].rank, self.neighbors[STRAIGHT_LEFT].buffer_size, self.neighbors[TOP_LEFT].send_size +1); | |
} else { | |
self.neighbors[STRAIGHT_LEFT].send_buffer[self.neighbors[STRAIGHT_LEFT].send_size] = particles[i]; | |
} | |
self.neighbors[STRAIGHT_LEFT].send_size = self.neighbors[STRAIGHT_LEFT].send_size + 1; | |
} | |
if (self.to_y - cutoff < particles[i].y && self.neighbors[BOTTOM_STRAIGHT].rank != -1) { // BOTTOM STRAIGHT particle | |
if (self.neighbors[BOTTOM_STRAIGHT].buffer_size <= self.neighbors[BOTTOM_STRAIGHT].send_size){ | |
printf("Subrank %d, neighbors: %d, send_buffer isnt big enough. Has: %d, needed: %d\n", self.subrank, self.neighbors[BOTTOM_STRAIGHT].rank, self.neighbors[BOTTOM_STRAIGHT].buffer_size, self.neighbors[TOP_LEFT].send_size +1); | |
} else { | |
self.neighbors[BOTTOM_STRAIGHT].send_buffer[self.neighbors[BOTTOM_STRAIGHT].send_size] = particles[i]; | |
} | |
self.neighbors[BOTTOM_STRAIGHT].send_size = self.neighbors[BOTTOM_STRAIGHT].send_size + 1; | |
} | |
if (particles[i].y <= self.from_y + cutoff && self.neighbors[TOP_STRAIGHT].rank != -1) { // TOP STRAIGHT particle | |
if (self.neighbors[TOP_STRAIGHT].buffer_size <= self.neighbors[TOP_STRAIGHT].send_size){ | |
printf("Subrank %d, neighbors: %d, send_buffer isnt big enough. Has: %d, needed: %d\n", self.subrank, self.neighbors[TOP_STRAIGHT].rank, self.neighbors[TOP_STRAIGHT].buffer_size, self.neighbors[TOP_LEFT].send_size +1); | |
} else { | |
self.neighbors[TOP_STRAIGHT].send_buffer[self.neighbors[TOP_STRAIGHT].send_size] = particles[i]; | |
} | |
self.neighbors[TOP_STRAIGHT].send_size = self.neighbors[TOP_STRAIGHT].send_size + 1; | |
} | |
} | |
} | |
// | |
// timer | |
// | |
double read_timer( ) | |
{ | |
static bool initialized = false; | |
static struct timeval start; | |
struct timeval end; | |
if( !initialized ) | |
{ | |
gettimeofday( &start, NULL ); | |
initialized = true; | |
} | |
gettimeofday( &end, NULL ); | |
return (end.tv_sec - start.tv_sec) + 1.0e-6 * (end.tv_usec - start.tv_usec); | |
} | |
// | |
// keep density constant | |
// | |
double set_size( int n ) | |
{ | |
size = sqrt( density * n ); | |
return size; | |
} | |
// | |
// Initialize the particle positions and velocities | |
// | |
void init_particles( block_t self, particle_t *p, double size_x, double size_y ) | |
{ | |
srand48( time( NULL ) ); | |
int sx = (int)ceil(sqrt((double)self.size)); | |
int sy = (self.size+sx-1)/sx; | |
int *shuffle = (int*)malloc( self.size * sizeof(int) ); | |
for( int i = 0; i < self.size; i++ ) | |
shuffle[i] = i; | |
for( int i = 0; i < self.size; i++ ) | |
{ | |
// | |
// make sure particles are not spatially sorted | |
// | |
int j = lrand48()%(self.size-i); | |
int k = shuffle[j]; | |
shuffle[j] = shuffle[self.size-i-1]; | |
// | |
// distribute particles evenly to ensure proper spacing | |
// | |
p[i+self.offset].x = self.from_x + size_x*(1.+(k%sx))/(1+sx); | |
p[i+self.offset].y = self.from_y + size_y*(1.+(k/sx))/(1+sy); | |
// | |
// assign random velocities within a bound | |
// | |
p[i+self.offset].vx = drand48()*2-1; | |
p[i+self.offset].vy = drand48()*2-1; | |
} | |
free( shuffle ); | |
} | |
// | |
// interact two particles | |
// | |
void apply_force( particle_t &particle, particle_t &neighbor ) | |
{ | |
double dx = neighbor.x - particle.x; | |
double dy = neighbor.y - particle.y; | |
double r2 = dx * dx + dy * dy; | |
if( r2 > cutoff*cutoff ) | |
return; | |
r2 = fmax( r2, min_r*min_r ); | |
double r = sqrt( r2 ); | |
// | |
// very simple short-range repulsive force | |
// | |
double coef = ( 1 - cutoff / r ) / r2 / mass; | |
particle.ax += coef * dx; | |
particle.ay += coef * dy; | |
} | |
// | |
// integrate the ODE | |
// | |
void move( particle_t &p ) | |
{ | |
// | |
// slightly simplified Velocity Verlet integration | |
// conserves energy better than explicit Euler method | |
// | |
p.vx += p.ax * dt; | |
p.vy += p.ay * dt; | |
p.x += p.vx * dt; | |
p.y += p.vy * dt; | |
// | |
// bounce from walls | |
// | |
while( p.x < 0 || p.x > size ) | |
{ | |
p.x = p.x < 0 ? -p.x : 2*size-p.x; | |
p.vx = -p.vx; | |
} | |
while( p.y < 0 || p.y > size ) | |
{ | |
p.y = p.y < 0 ? -p.y : 2*size-p.y; | |
p.vy = -p.vy; | |
} | |
} | |
// | |
// I/O routines | |
// | |
void save( FILE *f, int n, particle_t *p ) | |
{ | |
static bool first = true; | |
if( first ) | |
{ | |
fprintf( f, "%d %g\n", n, size ); | |
first = false; | |
} | |
for( int i = 0; i < n; i++ ) | |
fprintf( f, "%g %g\n", p[i].x, p[i].y ); | |
} | |
// | |
// command line option processing | |
// | |
int find_option( int argc, char **argv, const char *option ) | |
{ | |
for( int i = 1; i < argc; i++ ) | |
if( strcmp( argv[i], option ) == 0 ) | |
return i; | |
return -1; | |
} | |
int read_int( int argc, char **argv, const char *option, int default_value ) | |
{ | |
int iplace = find_option( argc, argv, option ); | |
if( iplace >= 0 && iplace < argc-1 ) | |
return atoi( argv[iplace+1] ); | |
return default_value; | |
} | |
char *read_string( int argc, char **argv, const char *option, char *default_value ) | |
{ | |
int iplace = find_option( argc, argv, option ); | |
if( iplace >= 0 && iplace < argc-1 ) | |
return argv[iplace+1]; | |
return default_value; | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef __CS267_COMMON_H__ | |
#define __CS267_COMMON_H__ | |
inline int min( int a, int b ) { return a < b ? a : b; } | |
inline int max( int a, int b ) { return a > b ? a : b; } | |
// | |
// saving parameters | |
// | |
const int NSTEPS = 1000; | |
const int SAVEFREQ = 10; | |
const int BOTTOM_STRAIGHT = 0, | |
BOTTOM_LEFT = 1, | |
STRAIGHT_LEFT = 2, | |
TOP_LEFT = 3, | |
TOP_STRAIGHT = 4, | |
TOP_RIGHT = 5, | |
STRAIGHT_RIGHT = 6, | |
BOTTOM_RIGHT = 7; | |
// | |
// particle data structure | |
// | |
typedef struct | |
{ | |
double x; | |
double y; | |
double vx; | |
double vy; | |
double ax; | |
double ay; | |
} particle_t; | |
typedef struct | |
{ | |
int offset; | |
int send_size; | |
int recv_size; | |
int proc_x; | |
int proc_y; | |
int block_x; | |
int block_y; | |
int rank; | |
particle_t *send_buffer; | |
int buffer_size; | |
MPI_Request recv_req; | |
MPI_Request send_req; | |
MPI_Status recv_stat; | |
MPI_Status send_stat; | |
} neighbor_t; | |
typedef struct | |
{ | |
double from_x; | |
double from_y; | |
double to_x; | |
double to_y; | |
neighbor_t *neighbors; | |
int offset; | |
int size; | |
int block_x; | |
int subrank; | |
int block_y; | |
} block_t; | |
// | |
// timing routines | |
// | |
double read_timer( ); | |
int calc_array_size(int n, double size_x, double size_y, int split_factor, int n_proc); | |
void copy_to_ghost_buffers( particle_t *particles, block_t self); | |
void setup_ghost_buffers( int my_x, int my_y, neighbor_t *neighbors, double size_x, double size_y, int start_offset); | |
void setup_neighbors( block_t block, int my_x, int my_y, int proc_width, int proc_height, int split_factor ); | |
// | |
// simulation routines | |
// | |
double set_size( int n ); | |
void init_particles( block_t self, particle_t *p, double size_x, double size_y ); | |
void apply_force( particle_t &particle, particle_t &neighbor ); | |
void move( particle_t &p ); | |
// | |
// I/O routines | |
// | |
FILE *open_save( char *filename, int n ); | |
void save( FILE *f, int n, particle_t *p ); | |
// | |
// argument processing routines | |
// | |
int find_option( int argc, char **argv, const char *option ); | |
int read_int( int argc, char **argv, const char *option, int default_value ); | |
char *read_string( int argc, char **argv, const char *option, char *default_value ); | |
#endif |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
test |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
HOST = $(shell hostname) | |
BANG = $(shell expr match `hostname` ccom-bang) | |
BANG-COMPUTE = $(shell expr match `hostname` compute) | |
LILLIPUT = $(shell expr match `hostname` lilliput) | |
ifneq ($(BANG), 0) | |
PUB = /share/class/public/cse260-fa12 | |
include $(PUB)/Arch/arch.gnu.generic | |
else | |
ifneq ($(BANG-COMPUTE), 0) | |
PUB = /share/class/public/cse260-fa12 | |
include $(PUB)/Arch/arch.gnu.generic | |
else | |
ifneq ($(LILLIPUT), 0) | |
PUB = /class/public/cse260-fa12 | |
include $(PUB)/Arch/arch.intel.generic | |
else | |
# PUB = /Users/baden/lib | |
include $(PUB)/Arch/arch.gnu | |
# include $(PUB)/Arch/arch.gnu-4.5 | |
endif | |
endif | |
endif | |
# | |
# Add symbol table information for gdb/cachegrind | |
ifeq ($(debug), 1) | |
CFLAGS += -g | |
LDFLAGS += -g | |
C++FLAGS += -g | |
endif | |
# Add symbol table information for gprof | |
ifeq ($(gprof), 1) | |
CFLAGS += -g -pg | |
C++FLAGS += -g -pg | |
LDFLAGS += -g -pg | |
endif | |
# If you want to compile for single precision, | |
# specify single=1 on the "make" command line | |
ifeq ($(single), 1) | |
else | |
C++FLAGS += -D_DOUBLE | |
CFLAGS += -D_DOUBLE | |
endif | |
# If you want to compile so that you call the plotter for | |
# flattened 2D arrays (implemented as 1D arrays) | |
# specify flattened=1 on the "make" command line | |
ifeq ($(flattened), 1) | |
C++FLAGS += -DPLOT1D | |
CFLAGS += -DPLOT1D | |
endif | |
# If you want to use restrict pointers, make restrict=1 | |
# This applies to the hand code version | |
ifeq ($(restrict), 1) | |
C++FLAGS += -D__RESTRICT | |
CFLAGS += -D__RESTRICT | |
ifneq ($(CARVER), 0) | |
C++FLAGS += -restrict | |
CFLAGS += -restrict | |
endif | |
endif | |
#DEBUG += -DDEBUG | |
TARGETS = mpi | |
app: $(TARGETS) | |
OBJECTS = common.o mpi.o.o | |
#ifeq ($(no-mpi),1) | |
#OBJECTS += Timer.o | |
#endif | |
app: $(TARGETS) | |
mpi: mpi.o common.o | |
$(C++LINK) $(LDFLAGS) -o $@ mpi.o common.o $(LDLIBS) | |
clean: | |
$(RM) *.o $(TARGETS); | |
$(RM) core.*; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <mpi.h> | |
#include <stdlib.h> | |
#include <stdio.h> | |
#include <assert.h> | |
#include "common.h" | |
// | |
// benchmarking program | |
// | |
int main( int argc, char **argv ) | |
{ | |
// | |
// process command line parameters | |
// | |
if( find_option( argc, argv, "-h" ) >= 0 ) | |
{ | |
printf( "Options:\n" ); | |
printf( "-h to see this help\n" ); | |
printf( "-n <int> to set the number of particles\n" ); | |
printf( "-proc_width <int> to set the width of the processor grid\n" ); | |
printf( "-proc_height <int> to set the height of the processor grid\n" ); | |
printf( "-split_factor <int> to set split factor of the processor grid\n" ); | |
printf( "-o <filename> to specify the output file name\n" ); | |
return 0; | |
} | |
int n = read_int( argc, argv, "-n", 1000 ); | |
int proc_width = read_int( argc, argv, "-proc_width", NULL ); | |
int proc_height = read_int( argc, argv, "-proc_height", NULL ); | |
int split_factor = read_int( argc, argv, "-split_factor", NULL ); | |
char *savename = read_string( argc, argv, "-o", NULL ); | |
// | |
// set up MPI | |
// | |
int n_proc, myrank; | |
MPI_Init( &argc, &argv ); | |
MPI_Comm_size( MPI_COMM_WORLD, &n_proc ); | |
MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); | |
//Ensure that px and py are correct setup. | |
if ((proc_height * proc_width) != n_proc && !myrank){ | |
printf("\n *** The number of processors in the geometry (%d) is not the same as the number requested (%d)",proc_height*proc_width,n_proc); | |
exit(-1); | |
} | |
// | |
// initialize sizes and ranks. | |
// | |
double size = set_size( n ); | |
double size_x = (size / proc_width) / split_factor; | |
double size_y = (size / proc_height) / split_factor; | |
int my_x = myrank % proc_width; | |
int my_y = myrank / proc_width; | |
block_t *blocks = (block_t*) malloc(split_factor * split_factor * sizeof(block_t)); | |
int block_no = (n_proc * split_factor * split_factor); | |
// | |
// allocate generic resources | |
// | |
FILE *fsave = savename && myrank == 0 ? fopen( savename, "w" ) : NULL; | |
int array_size = calc_array_size(n, size_x,size_y,split_factor,n_proc); | |
particle_t *particles = (particle_t*) malloc( array_size * sizeof(particle_t)); | |
particle_t *particles_prev = (particle_t*) malloc( array_size * sizeof(particle_t)); | |
MPI_Datatype PARTICLE; | |
MPI_Type_contiguous( 6, MPI_DOUBLE, &PARTICLE ); | |
MPI_Type_commit( &PARTICLE ); | |
// | |
// Setup the neighbor setup | |
// | |
int ij; | |
for (int i = 0; i < split_factor; i++){ | |
for (int j = 0; j < split_factor; j++){ | |
ij = i*split_factor+j; | |
blocks[ij].block_x = j; | |
blocks[ij].block_y = i; | |
blocks[ij].from_x = (my_x + j * proc_width) * size_x; | |
blocks[ij].from_y = (my_y + i * proc_height) * size_y; | |
blocks[ij].to_x = blocks[ij].from_x + size_x; | |
blocks[ij].to_y = blocks[ij].from_y + size_y; | |
blocks[ij].subrank = ((j*n_proc) + (i*split_factor*n_proc) + myrank); | |
blocks[ij].size = (blocks[ij].subrank < n % block_no ? n / block_no + 1: n / block_no); | |
blocks[ij].offset = (ij == 0 ? 0 : blocks[ij - 1].size + blocks[ij - 1].offset); | |
blocks[ij].neighbors = (neighbor_t*) malloc( 8 * sizeof(neighbor_t) ); | |
setup_neighbors( blocks[ij], my_x, my_y, proc_width, proc_height, split_factor ); | |
} | |
} | |
int last_block_index = split_factor*split_factor-1; | |
int start_offset = blocks[last_block_index].offset + blocks[last_block_index].size; | |
for (int i = 0; i < split_factor*split_factor; i++){ | |
setup_ghost_buffers(my_x,my_y, blocks[i].neighbors, size_x, size_y, start_offset); | |
start_offset = blocks[i].neighbors[7].offset + blocks[i].neighbors[7].buffer_size; | |
} | |
//Setup recivers. | |
for (int i = 0; i < split_factor * split_factor; i++){ | |
for(int j = 0; j < 8; j++) { | |
if (blocks[i].neighbors[j].rank != -1){ | |
MPI_Irecv(&particles[blocks[i].neighbors[j].offset], | |
blocks[i].neighbors[j].buffer_size, | |
PARTICLE, | |
blocks[i].neighbors[j].rank, | |
blocks[i].block_x + blocks[i].block_y*split_factor, | |
MPI_COMM_WORLD, | |
&blocks[i].neighbors[j].recv_req ); | |
} | |
} | |
} | |
//Copy to ghost buffers and send it. | |
for (int i = 0; i < split_factor * split_factor; i++){ | |
init_particles( blocks[i], particles, size_x, size_y ); | |
copy_to_ghost_buffers( particles, blocks[i] ); | |
for(int j = 0; j < 8; j++){ | |
if (blocks[i].neighbors[j].rank != -1){ | |
MPI_Isend(&blocks[i].neighbors[j].send_buffer[0], | |
blocks[i].neighbors[j].send_size, | |
PARTICLE, | |
blocks[i].neighbors[j].rank, | |
blocks[i].neighbors[j].block_x + blocks[i].neighbors[j].block_y*split_factor, | |
MPI_COMM_WORLD, | |
&blocks[i].neighbors[j].send_req ); | |
} | |
} | |
} | |
//Set correct size | |
for (int i = 0; i < split_factor * split_factor; i++){ | |
for(int j = 0; j < 8; j++) { | |
if (blocks[i].neighbors[j].rank != -1) { | |
MPI_Wait(&blocks[i].neighbors[j].recv_req,&blocks[i].neighbors[j].recv_stat); | |
MPI_Get_count(&blocks[i].neighbors[j].recv_stat, PARTICLE, &blocks[i].neighbors[j].recv_size); | |
} | |
} | |
} | |
//Ensure that you have send to anyone. | |
for (int i = 0; i < split_factor * split_factor; i++){ | |
for(int j = 0; j < 8; j++) { | |
if (blocks[i].neighbors[j].rank != -1) { | |
MPI_Wait(&blocks[i].neighbors[j].send_req,&blocks[i].neighbors[j].send_stat); | |
} | |
} | |
} | |
particle_t *tmp = particles; particles = particles_prev; particles_prev = tmp; | |
//Myrank is main so he is the one gathering the info | |
int *n_proc_sizes = new int[n_proc]; | |
int *n_proc_offset = new int[n_proc]; | |
int count; | |
// | |
// simulate a number of time steps | |
// | |
double simulation_time = read_timer( ); | |
for( int step = 0; step < NSTEPS; step++ ) | |
{ | |
// | |
// save current step if necessary (slightly different semantics than in other codes) | |
// | |
if( (step%SAVEFREQ) == 0 && savename ) { | |
//To to collect from every node | |
int size_of_array = blocks[last_block_index].size + blocks[last_block_index].offset; | |
MPI_Gather( &size_of_array, 1, MPI_INT, n_proc_sizes, 1, MPI_INT, 0, MPI_COMM_WORLD ); | |
//Calculate offsets | |
if (myrank == 0){ | |
for(int i = 0; i < n_proc; i++){ | |
if (i == 0) { | |
n_proc_offset[i] = 0; | |
} else { | |
n_proc_offset[i] = n_proc_offset[i-1] + n_proc_sizes[i-1]; | |
} | |
} | |
} | |
MPI_Gatherv( &particles_prev[blocks[0].offset], size_of_array, PARTICLE, particles, n_proc_sizes, n_proc_offset, PARTICLE, 0, MPI_COMM_WORLD ); | |
if (myrank == 0 && fsave) { | |
save( fsave, n, particles ); | |
} | |
} | |
// | |
// compute forces in us and the neighbors | |
// | |
for( int i = 0; i < split_factor * split_factor; i++){ | |
for(int k = blocks[i].offset; k < blocks[i].offset + blocks[i].size; k++){ | |
particles_prev[k].ax = particles_prev[k].ay = 0; | |
for(int l = blocks[i].offset; l < blocks[i].offset + blocks[i].size; l++ ) { | |
apply_force( particles_prev[k], particles_prev[l] ); | |
} | |
for(int j = 0; j < 8; j++){ | |
for(int l = blocks[i].neighbors[j].offset; l < blocks[i].neighbors[j].offset + blocks[i].neighbors[j].recv_size; l++ ) { | |
apply_force( particles_prev[k], particles_prev[l] ); | |
} | |
} | |
} | |
for(int j = 0; j < 8; j++){ | |
for(int k = blocks[i].neighbors[j].offset; k < blocks[i].neighbors[j].offset + blocks[i].neighbors[j].recv_size; k++ ) { | |
particles_prev[k].ax = particles_prev[k].ay = 0; | |
for(int l = blocks[i].offset; l < blocks[i].offset + blocks[i].size; l++ ) { | |
apply_force( particles_prev[k], particles_prev[l] ); | |
} | |
for(int z = 0; z < 8; z++){ | |
for(int l = blocks[i].neighbors[z].offset; l < blocks[i].neighbors[z].offset + blocks[i].neighbors[z].recv_size; l++ ) { | |
apply_force( particles_prev[k], particles_prev[l] ); | |
} | |
} | |
} | |
} | |
} | |
// | |
// move particles | |
// | |
int new_offset = blocks[0].offset; | |
for( int i = 0; i < split_factor * split_factor; i++){ | |
count = 0; | |
int removed = 0; | |
int newin = 0; | |
for(int k = blocks[i].offset; k < blocks[i].offset + blocks[i].size; k++ ){ | |
move(particles_prev[k]); | |
if (particles_prev[k].x >= blocks[i].from_x && particles_prev[k].x < blocks[i].to_x && | |
particles_prev[k].y >= blocks[i].from_y && particles_prev[k].y < blocks[i].to_y){ | |
particles[count+new_offset] = particles_prev[k]; | |
count = count + 1; | |
} else { | |
removed = removed + 1; | |
} | |
} | |
for(int j = 0; j < 8; j++){ | |
for(int k = blocks[i].neighbors[j].offset; k < blocks[i].neighbors[j].offset + blocks[i].neighbors[j].recv_size; k++ ) { | |
move(particles_prev[k]); | |
if (particles_prev[k].x >= blocks[i].from_x && particles_prev[k].x < blocks[i].to_x && | |
particles_prev[k].y >= blocks[i].from_y && particles_prev[k].y < blocks[i].to_y){ | |
particles[count + new_offset] = particles_prev[k]; | |
count = count + 1; | |
newin = newin + 1; | |
} | |
} | |
blocks[i].neighbors[j].recv_size = 0; | |
} | |
blocks[i].size = count; | |
blocks[i].offset = new_offset; | |
new_offset = blocks[i].offset + blocks[i].size; | |
} | |
//calculating new offset | |
int start_offset = blocks[last_block_index].offset + blocks[last_block_index].size; | |
for (int i = 0; i < split_factor*split_factor; i++){ | |
for(int j = 0; j < 8; j++) { | |
blocks[i].neighbors[j].offset = start_offset; | |
start_offset = start_offset + blocks[i].neighbors[j].buffer_size; | |
} | |
} | |
//Setup recivers. | |
for (int i = 0; i < split_factor * split_factor; i++){ | |
for(int j = 0; j < 8; j++) { | |
if (blocks[i].neighbors[j].rank != -1){ | |
MPI_Irecv(&particles[blocks[i].neighbors[j].offset], | |
blocks[i].neighbors[j].buffer_size, | |
PARTICLE, | |
blocks[i].neighbors[j].rank, | |
blocks[i].block_x + blocks[i].block_y*split_factor, | |
MPI_COMM_WORLD, | |
&blocks[i].neighbors[j].recv_req ); | |
} | |
} | |
} | |
//Copy to ghost buffers and send it. | |
for (int i = 0; i < split_factor * split_factor; i++){ | |
copy_to_ghost_buffers( particles, blocks[i] ); | |
for(int j = 0; j < 8; j++){ | |
if (blocks[i].neighbors[j].rank != -1){ | |
MPI_Isend(&blocks[i].neighbors[j].send_buffer[0], | |
blocks[i].neighbors[j].send_size, | |
PARTICLE, | |
blocks[i].neighbors[j].rank, | |
blocks[i].neighbors[j].block_x + blocks[i].neighbors[j].block_y*split_factor, | |
MPI_COMM_WORLD, | |
&blocks[i].neighbors[j].send_req ); | |
} | |
} | |
} | |
//Set correct size | |
for (int i = 0; i < split_factor * split_factor; i++){ | |
for(int j = 0; j < 8; j++) { | |
if (blocks[i].neighbors[j].rank != -1) { | |
MPI_Wait(&blocks[i].neighbors[j].recv_req,&blocks[i].neighbors[j].recv_stat); | |
MPI_Get_count(&blocks[i].neighbors[j].recv_stat, PARTICLE, &blocks[i].neighbors[j].recv_size); | |
} | |
} | |
} | |
particle_t *tmp = particles; particles = particles_prev; particles_prev = tmp; | |
//Ensure that you have send to anyone. | |
for (int i = 0; i < split_factor * split_factor; i++){ | |
for(int j = 0; j < 8; j++) { | |
if (blocks[i].neighbors[j].rank != -1) { | |
MPI_Wait(&blocks[i].neighbors[j].send_req,&blocks[i].neighbors[j].send_stat); | |
} | |
} | |
} | |
} | |
simulation_time = read_timer( ) - simulation_time; | |
if(!myrank ) | |
printf( "n = %d, n_procs = %d, simulation time = %g s\n", n, n_proc, simulation_time ); | |
// | |
// release resources | |
// | |
for(int i = 0; i < split_factor*split_factor; i++){ | |
free(blocks[i].neighbors); | |
} | |
free( blocks ); | |
free( particles ); | |
free( particles_prev ); | |
if( fsave ) | |
fclose( fsave ); | |
MPI_Finalize( ); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment