Skip to content

Instantly share code, notes, and snippets.

@smutch
Created October 26, 2013 08:42
Show Gist options
  • Save smutch/7166903 to your computer and use it in GitHub Desktop.
Save smutch/7166903 to your computer and use it in GitHub Desktop.
c: exploring 3d parallel ffts with FFTW
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <stdbool.h>
#include <mpi.h>
#include <complex.h>
#include <fftw3.h>
#include <fftw3-mpi.h>
/*
* Some code exploring how to do 3d parallel ffts with FFTW3. In particular
* focussing on indexing, padding and reconstructing distributed arrays.
*/
static inline int index(int i, int j, int k, int dim, int type)
{
int ind;
// type =0 -> real
// type =1 -> padded
switch(type)
{
case 1:
ind = k + (2*(dim/2 +1)) * (j + dim * (i));
break;
case 0:
ind = k + dim * (j + dim *(i));
break;
}
return ind;
}
int main(int argc, char *argv[])
{
int i_rank, n_rank;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &i_rank);
MPI_Comm_size(MPI_COMM_WORLD, &n_rank);
int dim = 128;
int n_cell = (int)pow(dim, 3);
ptrdiff_t alloc_local, local_nx, local_x_start;
int n_cell_fft;
fftwf_plan plan;
fftwf_mpi_init();
// init the real grid
float *grid_real = (float *)malloc(sizeof(float) * n_cell);
for(int ii=0; ii<n_cell; ii++)
grid_real[ii] = 0;
grid_real[index(4,4,4,dim,0)] = 10.;
grid_real[index(3,1,6,dim,0)] = 50.;
// get local data size and allocate
alloc_local = fftwf_mpi_local_size_3d(dim, dim, dim, MPI_COMM_WORLD,
&local_nx, &local_x_start);
fftwf_complex *grid = fftwf_alloc_complex(alloc_local);
// copy the real grid in
for(ptrdiff_t ii=0; ii<local_nx; ii++)
for(int jj=0; jj<dim; jj++)
for(int kk=0; kk<dim; kk++)
((float *)grid)[index((int)ii,jj,kk,dim,1)] = grid_real[index((int)(ii+local_x_start),jj,kk,dim,0)];
plan = fftwf_mpi_plan_dft_r2c_3d(dim, dim, dim, (float *)grid, grid,
MPI_COMM_WORLD, FFTW_ESTIMATE);
fftwf_execute(plan);
fftwf_destroy_plan(plan);
plan = fftwf_mpi_plan_dft_c2r_3d(dim, dim, dim, grid, (float *)grid,
MPI_COMM_WORLD, FFTW_ESTIMATE);
fftwf_execute(plan);
fftwf_destroy_plan(plan);
// renormalise
n_cell_fft = (int)(alloc_local * 2);
for(int ii=0; ii<n_cell_fft; ii++)
((float *)grid)[ii] /= (float)n_cell;
// make a new float grid with no padding and put the fft grid in it
float *result_grid = (float *)calloc(n_cell, sizeof(float));
for(ptrdiff_t ii=0; ii<local_nx; ii++)
for(int jj=0; jj<dim; jj++)
for(int kk=0; kk<dim; kk++)
result_grid[index(ii+local_x_start,jj,kk,dim,0)] = ((float *)grid)[index(ii,jj,kk,dim,1)];
// get the local offsets and counts from each rank
int *global_counts = (int *)malloc(sizeof(int) * n_rank);
int *global_offsets = (int *)calloc(n_rank, sizeof(int));
MPI_Allgather((int *)&local_nx, 1, MPI_INT, global_counts, 1, MPI_INT, MPI_COMM_WORLD);
for(int ii=0; ii<n_rank; ii++)
global_counts[ii] *= dim*dim;
for(int ii=1; ii<n_rank; ii++)
global_offsets[ii] = global_offsets[ii-1]+global_counts[ii-1];
fprintf(stderr, "RANK %d: global_counts = [%d, %d]\n", i_rank, global_counts[0], global_counts[1]);
fprintf(stderr, "RANK %d: global_offsets = [%d, %d]\n", i_rank, global_offsets[0], global_offsets[1]);
fprintf(stderr, "RANK %d: local_x_start = %d (ind=%d)\n", i_rank, (int)local_x_start, index(local_x_start, 0, 0, dim, 0));
// assemble the result grid on all ranks
MPI_Allgatherv(&(result_grid[global_offsets[i_rank]]), global_counts[i_rank],
MPI_FLOAT, result_grid, global_counts, global_offsets, MPI_FLOAT,
MPI_COMM_WORLD);
// Compare the real and fourier (+ inverse) transformed grids
printf("RANK %d: grid_real \t(4,4,4)=%.1f;\t\t(3,1,6)=%.1f\n", i_rank, grid_real[index(4,4,4,dim,0)], grid_real[index(3,1,6,dim,0)]);
printf("RANK %d: result_grid\t(4,4,4)=%.1f;\t\t(3,1,6)=%.1f\n", i_rank, result_grid[index(4,4,4,dim,0)], result_grid[index(3,1,6,dim,0)]);
free(global_offsets);
free(global_counts);
free(result_grid);
fftwf_free(grid);
free(grid_real);
fftwf_mpi_cleanup();
MPI_Finalize();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment