Created
December 7, 2012 03:49
-
-
Save jstroem/4230625 to your computer and use it in GitHub Desktop.
cse260 project standard NOT distribution
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# This script is intepreted by the Bourne Shell, sh | |
# | |
# Documentation for SGE is found in: | |
# http://docs.oracle.com/cd/E19279-01/820-3257-12/n1ge.html | |
# | |
# Tell SGE which shell to run the job script in rather than depending | |
# on SGE to try and figure it out. | |
#$ -S /bin/bash | |
# | |
# Export all my environment variables to the job | |
#$ -V | |
# | |
# Tun the job in the same directory from which you submitted it | |
#$ -cwd | |
# | |
# | |
# --- Don't change anything above this line --- | |
# | |
# Give a name to the job | |
#$ -N MPI | |
# | |
# Specify a time limit for the job, not more than 30 minutes | |
#$ -l h_rt=00:02:00 | |
# | |
# Specify the parallel environment and number of cores, | |
# If not a multiple of 8, you'll get the whole node anyway | |
#$ -pe orte 8 | |
# | |
# Join stdout and stderr so they are reported in job output file | |
#$ -j y | |
# | |
# | |
# Choose the queue to run the job | |
# | |
# Debug queue: only one node may be used at a time for up to 30 minutes | |
# Interactive or batch jobs, maximum of 1 job per user running at a time | |
# | |
# Normal queue: job may use all available compute nodes (256 cores) | |
# for up to 60 minutes | |
# Batch jobs, maximum of 2 jobs per user running at a time | |
# To use more than one node, specify the "normal" queue | |
#$ -q normal.q | |
# #$ -q debug.q | |
# | |
# Specifies the circumstances under which mail is to be sent to the job owner | |
# defined by -M option. For example, options "bea" cause mail to be sent at the | |
# begining, end, and at abort time (if it happens) of the job. | |
# Option "n" means no mail will be sent. | |
#$ -m aeb | |
# | |
# *** Change to the address you want the notification sent to, and | |
# *** REMOVE the blank between the # and the $ | |
#$ -M [email protected] | |
# | |
echo | |
echo " *** Current working directory" | |
pwd | |
echo | |
echo " *** Compiler" | |
# Output which compiler are we using and the environment | |
mpicc -v | |
echo | |
echo " *** Environment" | |
printenv | |
echo | |
echo ">>> Job Starts" | |
date | |
mpirun -np $NSLOTS ./mpi -n 500 -proc_width 4 -proc_height 2 -o mpi.txt -c checksum.txt | |
date | |
echo ">>> Job Ends" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdio.h> | |
#include <assert.h> | |
#include <float.h> | |
#include <string.h> | |
#include <math.h> | |
#include <time.h> | |
#include <sys/time.h> | |
#include "common.h" | |
double size; | |
// | |
// tuned constants | |
// | |
#define density 0.0010 | |
#define mass 0.01 | |
#define cutoff 0.01 | |
#define min_r (cutoff/100) | |
#define dt 0.0005 | |
// | |
// timer | |
// | |
double read_timer( ) | |
{ | |
static bool initialized = false; | |
static struct timeval start; | |
struct timeval end; | |
if( !initialized ) | |
{ | |
gettimeofday( &start, NULL ); | |
initialized = true; | |
} | |
gettimeofday( &end, NULL ); | |
return (end.tv_sec - start.tv_sec) + 1.0e-6 * (end.tv_usec - start.tv_usec); | |
} | |
// | |
// keep density constant | |
// | |
void set_size( int n ) | |
{ | |
size = sqrt( density * n ); | |
} | |
// | |
// Initialize the particle positions and velocities | |
// | |
void init_particles( int n, particle_t *p ) | |
{ | |
srand48( 1 ); | |
int sx = (int)ceil(sqrt((double)n)); | |
int sy = (n+sx-1)/sx; | |
int *shuffle = (int*)malloc( n * sizeof(int) ); | |
for( int i = 0; i < n; i++ ) | |
shuffle[i] = i; | |
for( int i = 0; i < n; i++ ) | |
{ | |
// | |
// make sure particles are not spatially sorted | |
// | |
int j = lrand48()%(n-i); | |
int k = shuffle[j]; | |
shuffle[j] = shuffle[n-i-1]; | |
// | |
// distribute particles evenly to ensure proper spacing | |
// | |
p[i].x = size*(1.+(k%sx))/(1+sx); | |
p[i].y = (size/2)*(1.+(k/sx))/(1+sy); | |
p[i].index = i; | |
// | |
// assign random velocities within a bound | |
// | |
p[i].vx = drand48()*2-1; | |
p[i].vy = drand48()*2-1; | |
} | |
free( shuffle ); | |
} | |
// | |
// interact two particles | |
// | |
void apply_force( particle_t &particle, particle_t &neighbor ) | |
{ | |
double dx = neighbor.x - particle.x; | |
double dy = neighbor.y - particle.y; | |
double r2 = dx * dx + dy * dy; | |
if( r2 > cutoff*cutoff ) | |
return; | |
r2 = fmax( r2, min_r*min_r ); | |
double r = sqrt( r2 ); | |
// | |
// very simple short-range repulsive force | |
// | |
double coef = ( 1 - cutoff / r ) / r2 / mass; | |
particle.ax += coef * dx; | |
particle.ay += coef * dy; | |
} | |
// | |
// integrate the ODE | |
// | |
void move( particle_t &p ) | |
{ | |
// | |
// slightly simplified Velocity Verlet integration | |
// conserves energy better than explicit Euler method | |
// | |
p.vx += p.ax * dt; | |
p.vy += p.ay * dt; | |
p.x += p.vx * dt; | |
p.y += p.vy * dt; | |
// | |
// bounce from walls | |
// | |
while( p.x < 0 || p.x > size ) | |
{ | |
p.x = p.x < 0 ? -p.x : 2*size-p.x; | |
p.vx = -p.vx; | |
} | |
while( p.y < 0 || p.y > size ) | |
{ | |
p.y = p.y < 0 ? -p.y : 2*size-p.y; | |
p.vy = -p.vy; | |
} | |
} | |
// | |
// I/O routines | |
// | |
void save( FILE *f, int n, particle_t *p ) | |
{ | |
static bool first = true; | |
if( first ) | |
{ | |
fprintf( f, "%d %g\n", n, size ); | |
first = false; | |
} | |
for( int i = 0; i < n; i++ ) | |
fprintf( f, "%g %g\n", p[i].x, p[i].y ); | |
} | |
// | |
// command line option processing | |
// | |
int find_option( int argc, char **argv, const char *option ) | |
{ | |
for( int i = 1; i < argc; i++ ) | |
if( strcmp( argv[i], option ) == 0 ) | |
return i; | |
return -1; | |
} | |
int read_int( int argc, char **argv, const char *option, int default_value ) | |
{ | |
int iplace = find_option( argc, argv, option ); | |
if( iplace >= 0 && iplace < argc-1 ) | |
return atoi( argv[iplace+1] ); | |
return default_value; | |
} | |
char *read_string( int argc, char **argv, const char *option, char *default_value ) | |
{ | |
int iplace = find_option( argc, argv, option ); | |
if( iplace >= 0 && iplace < argc-1 ) | |
return argv[iplace+1]; | |
return default_value; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef __CS267_COMMON_H__ | |
#define __CS267_COMMON_H__ | |
inline int min( int a, int b ) { return a < b ? a : b; } | |
inline int max( int a, int b ) { return a > b ? a : b; } | |
// | |
// saving parameters | |
// | |
const int NSTEPS = 1000; | |
const int SAVEFREQ = 10; | |
// | |
// particle data structure | |
// | |
typedef struct | |
{ | |
double index; | |
double x; | |
double y; | |
double vx; | |
double vy; | |
double ax; | |
double ay; | |
} particle_t; | |
// | |
// timing routines | |
// | |
double read_timer( ); | |
// | |
// simulation routines | |
// | |
void set_size( int n ); | |
void init_particles( int n, particle_t *p ); | |
void apply_force( particle_t &particle, particle_t &neighbor ); | |
void move( particle_t &p ); | |
// | |
// I/O routines | |
// | |
FILE *open_save( char *filename, int n ); | |
void save( FILE *f, int n, particle_t *p ); | |
// | |
// argument processing routines | |
// | |
int find_option( int argc, char **argv, const char *option ); | |
int read_int( int argc, char **argv, const char *option, int default_value ); | |
char *read_string( int argc, char **argv, const char *option, char *default_value ); | |
#endif |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
HOST = $(shell hostname) | |
BANG = $(shell expr match `hostname` ccom-bang) | |
BANG-COMPUTE = $(shell expr match `hostname` compute) | |
LILLIPUT = $(shell expr match `hostname` lilliput) | |
ifneq ($(BANG), 0) | |
PUB = /share/class/public/cse260-fa12 | |
include $(PUB)/Arch/arch.gnu.generic | |
else | |
ifneq ($(BANG-COMPUTE), 0) | |
PUB = /share/class/public/cse260-fa12 | |
include $(PUB)/Arch/arch.gnu.generic | |
else | |
ifneq ($(LILLIPUT), 0) | |
PUB = /class/public/cse260-fa12 | |
include $(PUB)/Arch/arch.intel.generic | |
else | |
# PUB = /Users/baden/lib | |
include $(PUB)/Arch/arch.gnu | |
# include $(PUB)/Arch/arch.gnu-4.5 | |
endif | |
endif | |
endif | |
# | |
# Add symbol table information for gdb/cachegrind | |
ifeq ($(debug), 1) | |
CFLAGS += -g | |
LDFLAGS += -g | |
C++FLAGS += -g | |
endif | |
# Add symbol table information for gprof | |
ifeq ($(gprof), 1) | |
CFLAGS += -g -pg | |
C++FLAGS += -g -pg | |
LDFLAGS += -g -pg | |
endif | |
# If you want to compile for single precision, | |
# specify single=1 on the "make" command line | |
ifeq ($(single), 1) | |
else | |
C++FLAGS += -D_DOUBLE | |
CFLAGS += -D_DOUBLE | |
endif | |
# If you want to compile so that you call the plotter for | |
# flattened 2D arrays (implemented as 1D arrays) | |
# specify flattened=1 on the "make" command line | |
ifeq ($(flattened), 1) | |
C++FLAGS += -DPLOT1D | |
CFLAGS += -DPLOT1D | |
endif | |
# If you want to use restrict pointers, make restrict=1 | |
# This applies to the hand code version | |
ifeq ($(restrict), 1) | |
C++FLAGS += -D__RESTRICT | |
CFLAGS += -D__RESTRICT | |
ifneq ($(CARVER), 0) | |
C++FLAGS += -restrict | |
CFLAGS += -restrict | |
endif | |
endif | |
#DEBUG += -DDEBUG | |
TARGETS = mpi | |
app: $(TARGETS) | |
OBJECTS = common.o mpi.o.o | |
#ifeq ($(no-mpi),1) | |
#OBJECTS += Timer.o | |
#endif | |
app: $(TARGETS) | |
mpi: mpi.o common.o | |
$(C++LINK) $(LDFLAGS) -o $@ mpi.o common.o $(LDLIBS) | |
clean: | |
$(RM) *.o $(TARGETS); | |
$(RM) core; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <mpi.h> | |
#include <stdlib.h> | |
#include <stdio.h> | |
#include <assert.h> | |
#include "common.h" | |
// | |
// benchmarking program | |
// | |
int main( int argc, char **argv ) | |
{ | |
// | |
// process command line parameters | |
// | |
if( find_option( argc, argv, "-h" ) >= 0 ) | |
{ | |
printf( "Options:\n" ); | |
printf( "-h to see this help\n" ); | |
printf( "-n <int> to set the number of particles\n" ); | |
printf( "-o <filename> to specify the output file name\n" ); | |
printf( "-c <checksum> to specify the output checksum file name\n" ); | |
return 0; | |
} | |
int n = read_int( argc, argv, "-n", 1000 ); | |
char *savename = read_string( argc, argv, "-o", NULL ); | |
char *checksum = read_string( argc, argv, "-c", NULL ); | |
// | |
// set up MPI | |
// | |
int n_proc, rank; | |
MPI_Init( &argc, &argv ); | |
MPI_Comm_size( MPI_COMM_WORLD, &n_proc ); | |
MPI_Comm_rank( MPI_COMM_WORLD, &rank ); | |
// | |
// allocate generic resources | |
// | |
FILE *fsave = savename && rank == 0 ? fopen( savename, "w" ) : NULL; | |
FILE *fchecksum = checksum && rank == 0 ? fopen( checksum, "w" ) : NULL; | |
particle_t *particles = (particle_t*) malloc( n * sizeof(particle_t) ); | |
MPI_Datatype PARTICLE; | |
MPI_Type_contiguous( 7, MPI_DOUBLE, &PARTICLE ); | |
MPI_Type_commit( &PARTICLE ); | |
// | |
// set up the data partitioning across processors | |
// | |
int particle_per_proc = (n + n_proc - 1) / n_proc; | |
int *partition_offsets = (int*) malloc( (n_proc+1) * sizeof(int) ); | |
for( int i = 0; i < n_proc+1; i++ ) | |
partition_offsets[i] = min( i * particle_per_proc, n ); | |
int *partition_sizes = (int*) malloc( n_proc * sizeof(int) ); | |
for( int i = 0; i < n_proc; i++ ) | |
partition_sizes[i] = partition_offsets[i+1] - partition_offsets[i]; | |
// | |
// allocate storage for local partition | |
// | |
int nlocal = partition_sizes[rank]; | |
particle_t *local = (particle_t*) malloc( nlocal * sizeof(particle_t) ); | |
// | |
// initialize and distribute the particles (that's fine to leave it unoptimized) | |
// | |
set_size( n ); | |
if( rank == 0 ) | |
init_particles( n, particles ); | |
MPI_Scatterv( particles, partition_sizes, partition_offsets, PARTICLE, local, nlocal, PARTICLE, 0, MPI_COMM_WORLD ); | |
// | |
// simulate a number of time steps | |
// | |
double simulation_time = read_timer( ); | |
for( int step = 0; step < NSTEPS; step++ ) | |
{ | |
// | |
// collect all global data locally (not good idea to do) | |
// | |
MPI_Allgatherv( local, nlocal, PARTICLE, particles, partition_sizes, partition_offsets, PARTICLE, MPI_COMM_WORLD ); | |
// | |
// save current step if necessary (slightly different semantics than in other codes) | |
// | |
if( fsave && (step%SAVEFREQ) == 0 ) | |
save( fsave, n, particles ); | |
// | |
// compute all forces | |
// | |
for( int i = 0; i < nlocal; i++ ) | |
{ | |
local[i].ax = local[i].ay = 0; | |
for (int j = 0; j < n; j++ ) | |
apply_force( local[i], particles[j] ); | |
} | |
// | |
// move particles | |
// | |
for( int i = 0; i < nlocal; i++ ) | |
move( local[i] ); | |
} | |
simulation_time = read_timer( ) - simulation_time; | |
if( rank == 0 ) | |
printf( "n = %d, n_procs = %d, simulation time = %g s\n", n, n_proc, simulation_time ); | |
MPI_Allgatherv( local, nlocal, PARTICLE, particles, partition_sizes, partition_offsets, PARTICLE, MPI_COMM_WORLD ); | |
if (!rank && fchecksum){ | |
particle_t *particles_prev = (particle_t*) malloc( n * sizeof(particle_t) ); | |
for(int j = 0; j < n; j++){ | |
particles_prev[(int)particles[j].index] = particles[j]; | |
} | |
save( fchecksum, n, particles_prev ); | |
} | |
// | |
// release resources | |
// | |
free( partition_offsets ); | |
free( partition_sizes ); | |
free( local ); | |
free( particles ); | |
if( fsave ) | |
fclose( fsave ); | |
if (fchecksum) | |
fclose( fchecksum ); | |
MPI_Finalize( ); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment