Skip to content

Instantly share code, notes, and snippets.

@jstroem
Created December 3, 2012 05:15
Show Gist options
  • Save jstroem/4192869 to your computer and use it in GitHub Desktop.
Save jstroem/4192869 to your computer and use it in GitHub Desktop.
cse260 project standard
#!/bin/bash
# This script is intepreted by the Bourne Shell, sh
#
# Documentation for SGE is found in:
# http://docs.oracle.com/cd/E19279-01/820-3257-12/n1ge.html
#
# Tell SGE which shell to run the job script in rather than depending
# on SGE to try and figure it out.
#$ -S /bin/bash
#
# Export all my environment variables to the job
#$ -V
#
# Tun the job in the same directory from which you submitted it
#$ -cwd
#
#
# --- Don't change anything above this line ---
#
# Give a name to the job
#$ -N MPI
#
# Specify a time limit for the job, not more than 30 minutes
#$ -l h_rt=00:02:00
#
# Specify the parallel environment and number of cores,
# If not a multiple of 8, you'll get the whole node anyway
#$ -pe orte 8
#
# Join stdout and stderr so they are reported in job output file
#$ -j y
#
#
# Choose the queue to run the job
#
# Debug queue: only one node may be used at a time for up to 30 minutes
# Interactive or batch jobs, maximum of 1 job per user running at a time
#
# Normal queue: job may use all available compute nodes (256 cores)
# for up to 60 minutes
# Batch jobs, maximum of 2 jobs per user running at a time
# To use more than one node, specify the "normal" queue
#$ -q normal.q
# #$ -q debug.q
#
# Specifies the circumstances under which mail is to be sent to the job owner
# defined by -M option. For example, options "bea" cause mail to be sent at the
# begining, end, and at abort time (if it happens) of the job.
# Option "n" means no mail will be sent.
#$ -m aeb
#
# *** Change to the address you want the notification sent to, and
# *** REMOVE the blank between the # and the $
#$ -M [email protected]
#
echo
echo " *** Current working directory"
pwd
echo
echo " *** Compiler"
# Output which compiler are we using and the environment
mpicc -v
echo
echo " *** Environment"
printenv
echo
echo ">>> Job Starts"
date
mpirun -np $NSLOTS ./mpi -n 500 -proc_width 4 -proc_height 2 -o mpi.txt -c checksum.txt
date
echo ">>> Job Ends"
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <float.h>
#include <string.h>
#include <math.h>
#include <time.h>
#include <sys/time.h>
#include "common.h"
double size;
//
// tuned constants
//
#define density 0.0005
#define mass 0.01
#define cutoff 0.01
#define min_r (cutoff/100)
#define dt 0.0005
//
// timer
//
double read_timer( )
{
static bool initialized = false;
static struct timeval start;
struct timeval end;
if( !initialized )
{
gettimeofday( &start, NULL );
initialized = true;
}
gettimeofday( &end, NULL );
return (end.tv_sec - start.tv_sec) + 1.0e-6 * (end.tv_usec - start.tv_usec);
}
//
// keep density constant
//
void set_size( int n )
{
size = sqrt( density * n );
}
//
// Initialize the particle positions and velocities
//
void init_particles( int n, particle_t *p )
{
srand48( 1 );
int sx = (int)ceil(sqrt((double)n));
int sy = (n+sx-1)/sx;
int *shuffle = (int*)malloc( n * sizeof(int) );
for( int i = 0; i < n; i++ )
shuffle[i] = i;
for( int i = 0; i < n; i++ )
{
//
// make sure particles are not spatially sorted
//
int j = lrand48()%(n-i);
int k = shuffle[j];
shuffle[j] = shuffle[n-i-1];
//
// distribute particles evenly to ensure proper spacing
//
p[i].x = size*(1.+(k%sx))/(1+sx);
p[i].y = size*(1.+(k/sx))/(1+sy);
p[i].index = i;
//
// assign random velocities within a bound
//
p[i].vx = drand48()*2-1;
p[i].vy = drand48()*2-1;
}
free( shuffle );
}
//
// interact two particles
//
void apply_force( particle_t &particle, particle_t &neighbor )
{
double dx = neighbor.x - particle.x;
double dy = neighbor.y - particle.y;
double r2 = dx * dx + dy * dy;
if( r2 > cutoff*cutoff )
return;
r2 = fmax( r2, min_r*min_r );
double r = sqrt( r2 );
//
// very simple short-range repulsive force
//
double coef = ( 1 - cutoff / r ) / r2 / mass;
particle.ax += coef * dx;
particle.ay += coef * dy;
}
//
// integrate the ODE
//
void move( particle_t &p )
{
//
// slightly simplified Velocity Verlet integration
// conserves energy better than explicit Euler method
//
p.vx += p.ax * dt;
p.vy += p.ay * dt;
p.x += p.vx * dt;
p.y += p.vy * dt;
//
// bounce from walls
//
while( p.x < 0 || p.x > size )
{
p.x = p.x < 0 ? -p.x : 2*size-p.x;
p.vx = -p.vx;
}
while( p.y < 0 || p.y > size )
{
p.y = p.y < 0 ? -p.y : 2*size-p.y;
p.vy = -p.vy;
}
}
//
// I/O routines
//
void save( FILE *f, int n, particle_t *p )
{
static bool first = true;
if( first )
{
fprintf( f, "%d %g\n", n, size );
first = false;
}
for( int i = 0; i < n; i++ )
fprintf( f, "%g %g\n", p[i].x, p[i].y );
}
//
// command line option processing
//
int find_option( int argc, char **argv, const char *option )
{
for( int i = 1; i < argc; i++ )
if( strcmp( argv[i], option ) == 0 )
return i;
return -1;
}
int read_int( int argc, char **argv, const char *option, int default_value )
{
int iplace = find_option( argc, argv, option );
if( iplace >= 0 && iplace < argc-1 )
return atoi( argv[iplace+1] );
return default_value;
}
char *read_string( int argc, char **argv, const char *option, char *default_value )
{
int iplace = find_option( argc, argv, option );
if( iplace >= 0 && iplace < argc-1 )
return argv[iplace+1];
return default_value;
}
#ifndef __CS267_COMMON_H__
#define __CS267_COMMON_H__
inline int min( int a, int b ) { return a < b ? a : b; }
inline int max( int a, int b ) { return a > b ? a : b; }
//
// saving parameters
//
const int NSTEPS = 1000;
const int SAVEFREQ = 10;
//
// particle data structure
//
typedef struct
{
double index;
double x;
double y;
double vx;
double vy;
double ax;
double ay;
} particle_t;
//
// timing routines
//
double read_timer( );
//
// simulation routines
//
void set_size( int n );
void init_particles( int n, particle_t *p );
void apply_force( particle_t &particle, particle_t &neighbor );
void move( particle_t &p );
//
// I/O routines
//
FILE *open_save( char *filename, int n );
void save( FILE *f, int n, particle_t *p );
//
// argument processing routines
//
int find_option( int argc, char **argv, const char *option );
int read_int( int argc, char **argv, const char *option, int default_value );
char *read_string( int argc, char **argv, const char *option, char *default_value );
#endif
HOST = $(shell hostname)
BANG = $(shell expr match `hostname` ccom-bang)
BANG-COMPUTE = $(shell expr match `hostname` compute)
LILLIPUT = $(shell expr match `hostname` lilliput)
ifneq ($(BANG), 0)
PUB = /share/class/public/cse260-fa12
include $(PUB)/Arch/arch.gnu.generic
else
ifneq ($(BANG-COMPUTE), 0)
PUB = /share/class/public/cse260-fa12
include $(PUB)/Arch/arch.gnu.generic
else
ifneq ($(LILLIPUT), 0)
PUB = /class/public/cse260-fa12
include $(PUB)/Arch/arch.intel.generic
else
# PUB = /Users/baden/lib
include $(PUB)/Arch/arch.gnu
# include $(PUB)/Arch/arch.gnu-4.5
endif
endif
endif
#
# Add symbol table information for gdb/cachegrind
ifeq ($(debug), 1)
CFLAGS += -g
LDFLAGS += -g
C++FLAGS += -g
endif
# Add symbol table information for gprof
ifeq ($(gprof), 1)
CFLAGS += -g -pg
C++FLAGS += -g -pg
LDFLAGS += -g -pg
endif
# If you want to compile for single precision,
# specify single=1 on the "make" command line
ifeq ($(single), 1)
else
C++FLAGS += -D_DOUBLE
CFLAGS += -D_DOUBLE
endif
# If you want to compile so that you call the plotter for
# flattened 2D arrays (implemented as 1D arrays)
# specify flattened=1 on the "make" command line
ifeq ($(flattened), 1)
C++FLAGS += -DPLOT1D
CFLAGS += -DPLOT1D
endif
# If you want to use restrict pointers, make restrict=1
# This applies to the hand code version
ifeq ($(restrict), 1)
C++FLAGS += -D__RESTRICT
CFLAGS += -D__RESTRICT
ifneq ($(CARVER), 0)
C++FLAGS += -restrict
CFLAGS += -restrict
endif
endif
#DEBUG += -DDEBUG
TARGETS = mpi
app: $(TARGETS)
OBJECTS = common.o mpi.o.o
#ifeq ($(no-mpi),1)
#OBJECTS += Timer.o
#endif
app: $(TARGETS)
mpi: mpi.o common.o
$(C++LINK) $(LDFLAGS) -o $@ mpi.o common.o $(LDLIBS)
clean:
$(RM) *.o $(TARGETS);
$(RM) core;
#include <mpi.h>
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include "common.h"
//
// benchmarking program
//
int main( int argc, char **argv )
{
//
// process command line parameters
//
if( find_option( argc, argv, "-h" ) >= 0 )
{
printf( "Options:\n" );
printf( "-h to see this help\n" );
printf( "-n <int> to set the number of particles\n" );
printf( "-o <filename> to specify the output file name\n" );
printf( "-c <checksum> to specify the output checksum file name\n" );
return 0;
}
int n = read_int( argc, argv, "-n", 1000 );
char *savename = read_string( argc, argv, "-o", NULL );
char *checksum = read_string( argc, argv, "-c", NULL );
//
// set up MPI
//
int n_proc, rank;
MPI_Init( &argc, &argv );
MPI_Comm_size( MPI_COMM_WORLD, &n_proc );
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
//
// allocate generic resources
//
FILE *fsave = savename && rank == 0 ? fopen( savename, "w" ) : NULL;
FILE *fchecksum = checksum && rank == 0 ? fopen( checksum, "w" ) : NULL;
particle_t *particles = (particle_t*) malloc( n * sizeof(particle_t) );
MPI_Datatype PARTICLE;
MPI_Type_contiguous( 7, MPI_DOUBLE, &PARTICLE );
MPI_Type_commit( &PARTICLE );
//
// set up the data partitioning across processors
//
int particle_per_proc = (n + n_proc - 1) / n_proc;
int *partition_offsets = (int*) malloc( (n_proc+1) * sizeof(int) );
for( int i = 0; i < n_proc+1; i++ )
partition_offsets[i] = min( i * particle_per_proc, n );
int *partition_sizes = (int*) malloc( n_proc * sizeof(int) );
for( int i = 0; i < n_proc; i++ )
partition_sizes[i] = partition_offsets[i+1] - partition_offsets[i];
//
// allocate storage for local partition
//
int nlocal = partition_sizes[rank];
particle_t *local = (particle_t*) malloc( nlocal * sizeof(particle_t) );
//
// initialize and distribute the particles (that's fine to leave it unoptimized)
//
set_size( n );
if( rank == 0 )
init_particles( n, particles );
MPI_Scatterv( particles, partition_sizes, partition_offsets, PARTICLE, local, nlocal, PARTICLE, 0, MPI_COMM_WORLD );
//
// simulate a number of time steps
//
double simulation_time = read_timer( );
for( int step = 0; step < NSTEPS; step++ )
{
//
// collect all global data locally (not good idea to do)
//
MPI_Allgatherv( local, nlocal, PARTICLE, particles, partition_sizes, partition_offsets, PARTICLE, MPI_COMM_WORLD );
//
// save current step if necessary (slightly different semantics than in other codes)
//
if( fsave && (step%SAVEFREQ) == 0 )
save( fsave, n, particles );
//
// compute all forces
//
for( int i = 0; i < nlocal; i++ )
{
local[i].ax = local[i].ay = 0;
for (int j = 0; j < n; j++ )
apply_force( local[i], particles[j] );
}
//
// move particles
//
for( int i = 0; i < nlocal; i++ )
move( local[i] );
}
simulation_time = read_timer( ) - simulation_time;
if( rank == 0 )
printf( "n = %d, n_procs = %d, simulation time = %g s\n", n, n_proc, simulation_time );
MPI_Allgatherv( local, nlocal, PARTICLE, particles, partition_sizes, partition_offsets, PARTICLE, MPI_COMM_WORLD );
if (!rank && fchecksum){
particle_t *particles_prev = (particle_t*) malloc( n * sizeof(particle_t) );
for(int j = 0; j < n; j++){
particles_prev[(int)particles[j].index] = particles[j];
}
save( fchecksum, n, particles_prev );
}
//
// release resources
//
free( partition_offsets );
free( partition_sizes );
free( local );
free( particles );
if( fsave )
fclose( fsave );
if (fchecksum)
fclose( fchecksum );
MPI_Finalize( );
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment