Created
November 7, 2017 18:18
-
-
Save robodhruv/97387ced89b6f86205e3dd348aa5d406 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# include <cstdlib> | |
# include <iostream> | |
# include <iomanip> | |
# include <cmath> | |
# include <ctime> | |
# include <omp.h> | |
using namespace std; | |
int main ( void ); | |
void ccopy ( int n, double x[], double y[] ); | |
void cfft2 ( int n, double x[], double y[], double w[], double sgn ); | |
void cffti ( int n, double w[] ); | |
double ggl ( double *ds ); | |
void step ( int n, int mj, double a[], double b[], double c[], double d[], | |
double w[], double sgn ); | |
void timestamp ( ); | |
int main ( void ) { | |
double error, flops, fnml, mflops, sgn, wtime, z0, z1, *w, *x, *y, *z; | |
int first, i, icase, it, ln2, n; | |
int ln2_max = 25; | |
int nits = 10000; | |
static double seed; | |
timestamp ( ); | |
cout << "\n"; | |
cout << "FFT_OPENMP\n"; | |
cout << "\n"; | |
cout << " Demonstrate an implementation of the Fast Fourier Transform\n"; | |
cout << " of a complex data vector, using OpenMP for parallel execution.\n"; | |
cout << "\n"; | |
cout << " Number of processors available = " << omp_get_num_procs ( ) << "\n"; | |
cout << " Number of threads = " << omp_get_max_threads ( ) << "\n"; | |
cout << "\n"; | |
cout << " Accuracy check:\n"; | |
cout << "\n"; | |
cout << " FFT ( FFT ( X(1:N) ) ) == N * X(1:N)\n"; | |
cout << "\n"; | |
cout << " N NITS Error Time Time/Call MFLOPS\n"; | |
cout << "\n"; | |
seed = 331.0; | |
n = 1; | |
for ( ln2 = 1; ln2 <= ln2_max; ln2++ ) | |
{ | |
n = 2 * n; | |
w = new double[ n]; | |
x = new double[2*n]; | |
y = new double[2*n]; | |
z = new double[2*n]; | |
first = 1; | |
for ( icase = 0; icase < 2; icase++ ) | |
{ | |
if ( first ) | |
{ | |
for ( i = 0; i < 2 * n; i = i + 2 ) | |
{ | |
z0 = ggl ( &seed ); | |
z1 = ggl ( &seed ); | |
x[i] = z0; | |
z[i] = z0; | |
x[i+1] = z1; | |
z[i+1] = z1; | |
} | |
} | |
else | |
{ | |
# pragma omp parallel shared ( n, x, z ) private ( i, z0, z1 ) | |
# pragma omp for nowait | |
for ( i = 0; i < 2 * n; i = i + 2 ) | |
{ | |
z0 = 0.0; | |
z1 = 0.0; | |
x[i] = z0; | |
z[i] = z0; | |
x[i+1] = z1; | |
z[i+1] = z1; | |
} | |
} | |
cffti ( n, w ); | |
if ( first ) | |
{ | |
sgn = + 1.0; | |
cfft2 ( n, x, y, w, sgn ); | |
sgn = - 1.0; | |
cfft2 ( n, y, x, w, sgn ); | |
fnm1 = 1.0 / ( double ) n; | |
error = 0.0; | |
for ( i = 0; i < 2 * n; i = i + 2 ) | |
{ | |
error = error | |
+ pow ( z[i] - fnm1 * x[i], 2 ) | |
+ pow ( z[i+1] - fnm1 * x[i+1], 2 ); | |
} | |
error = sqrt ( fnm1 * error ); | |
cout << " " << setw(12) << n | |
<< " " << setw(8) << nits | |
<< " " << setw(12) << error; | |
first = 0; | |
} | |
else | |
{ | |
wtime = omp_get_wtime ( ); | |
for ( it = 0; it < nits; it++ ) | |
{ | |
sgn = + 1.0; | |
cfft2 ( n, x, y, w, sgn ); | |
sgn = - 1.0; | |
cfft2 ( n, y, x, w, sgn ); | |
} | |
wtime = omp_get_wtime ( ) - wtime; | |
flops = ( double ) 2 * ( double ) nits | |
* ( ( double ) 5 * ( double ) n * ( double ) ln2 ); | |
mflops = flops / 1.0E+06 / wtime; | |
cout << " " << setw(12) << ctime | |
<< " " << setw(12) << wtime / ( double ) ( 2 * nits ) | |
<< " " << setw(12) << mflops << "\n"; | |
} | |
} | |
if ( ( ln2 % 4 ) == 0 ) | |
{ | |
nits = nits / 10; | |
} | |
if ( nits < 1 ) | |
{ | |
nits = 1; | |
} | |
delete [] w; | |
delete [] x; | |
delete [] y; | |
delete [] z; | |
} | |
cout << "\n"; | |
cout << "FFT_OPENMP:\n"; | |
cout << " Normal end of execution.\n"; | |
cout << "\n"; | |
timestamp ( ); | |
return 0; | |
} | |
void ccopy ( int n, double x[], double y[] ) { | |
int i; | |
for ( i = 0; i < n; i++ ) | |
{ | |
y[i*2+0] = x[i*2+0]; | |
y[i*2+1] = x[i*2+1]; | |
} | |
return; | |
} | |
void cfft2 ( int n, double x[], double y[], double w[], double sgn ) { | |
int j; | |
int m; | |
int mj; | |
int tgle; | |
m = ( int ) ( log ( ( double ) n ) / log ( 1.99 ) ); | |
mj = 1; | |
tgle = 1; | |
step ( n, mj, &x[0*2+0], &x[(n/2)*2+0], &y[0*2+0], &y[mj*2+0], w, sgn ); | |
if ( n == 2 ) | |
{ | |
return; | |
} | |
for ( j = 0; j < m - 2; j++ ) | |
{ | |
mj = mj * 2; | |
if ( tgle ) | |
{ | |
step ( n, mj, &y[0*2+0], &y[(n/2)*2+0], &x[0*2+0], &x[mj*2+0], w, sgn ); | |
tgle = 0; | |
} | |
else | |
{ | |
step ( n, mj, &x[0*2+0], &x[(n/2)*2+0], &y[0*2+0], &y[mj*2+0], w, sgn ); | |
tgle = 1; | |
} | |
} | |
if ( tgle ) | |
{ | |
ccopy ( n, y, x ); | |
} | |
mj = n / 2; | |
step ( n, mj, &x[0*2+0], &x[(n/2)*2+0], &y[0*2+0], &y[mj*2+0], w, sgn ); | |
return; | |
} | |
void cffti ( int n, double w[] ) { | |
double arg; | |
double aw; | |
int i; | |
int n2; | |
const double pi = 3.141592653589793; | |
n2 = n / 2; | |
aw = 2.0 * pi / ( ( double ) n ); | |
# pragma omp parallel shared ( aw, n, w ) private ( arg, i ) | |
# pragma omp for nowait | |
for ( i = 0; i < n2; i++ ) | |
{ | |
arg = aw * ( ( double ) i ); | |
w[i*2+0] = cos ( arg ); | |
w[i*2+1] = sin ( arg ); | |
} | |
return; | |
} | |
double ggl ( double *seed ) { | |
double d2 = 0.2147483647e10; | |
double t; | |
double value; | |
t = ( double ) *seed; | |
t = fmod ( 16807.0 * t, d2 ); | |
*seed = ( double ) t; | |
value = ( double ) ( ( t - 1.0 ) / ( d2 - 1.0 ) ); | |
return value; | |
} | |
void step ( int n, int mj, double a[], double b[], double c[], | |
double d[], double w[], double sgn ) { | |
double ambr; | |
double ambu; | |
int j; | |
int ja; | |
int jb; | |
int jc; | |
int jd; | |
int jw; | |
int k; | |
int lj; | |
int mj2; | |
double wjw[2]; | |
mj2 = 2 * mj; | |
lj = n / mj2; | |
# pragma omp parallel \ | |
shared ( a, b, c, d, lj, mj, mj2, sgn, w ) private ( ambr, ambu, j, ja, jb, jc, jd, jw, k, wjw ) | |
# pragma omp for nowait | |
for ( j = 0; j < lj; j++ ) | |
{ | |
jw = j * mj; | |
ja = jw; | |
jb = ja; | |
jc = j * mj2; | |
jd = jc; | |
wjw[0] = w[jw*2+0]; | |
wjw[1] = w[jw*2+1]; | |
if ( sgn < 0.0 ) | |
{ | |
wjw[1] = - wjw[1]; | |
} | |
for ( k = 0; k < mj; k++ ) | |
{ | |
c[(jc+k)*2+0] = a[(ja+k)*2+0] + b[(jb+k)*2+0]; | |
c[(jc+k)*2+1] = a[(ja+k)*2+1] + b[(jb+k)*2+1]; | |
ambr = a[(ja+k)*2+0] - b[(jb+k)*2+0]; | |
ambu = a[(ja+k)*2+1] - b[(jb+k)*2+1]; | |
d[(jd+k)*2+0] = wjw[0] * ambr - wjw[1] * ambu; | |
d[(jd+k)*2+1] = wjw[1] * ambr + wjw[0] * ambu; | |
} | |
} | |
return; | |
} | |
void timestamp () { | |
# define TIME_SIZE 40 | |
static char time_buffer[TIME_SIZE]; | |
const struct tm *tm; | |
size_t len; | |
time_t now; | |
now = time ( NULL ); | |
tm = localtime ( &now ); | |
len = strftime ( time_buffer, TIME_SIZE, "%d %B %Y %I:%M:%S %p", tm ); | |
cout << time_buffer << "\n"; | |
return; | |
# undef TIME_SIZE | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment