Created
April 29, 2016 06:41
-
-
Save sylvchev/756959f972bfc53e914f1a4f85ad16d1 to your computer and use it in GitHub Desktop.
Simple CUDA file for benchmarking cuFFT plan with different signal size, inspired by https://github.com/drufat/cuda-examples
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Simple benchmarking FFT with Cuda, building on https://github.com/drufat/cuda-examples | |
// compiles with nvcc -DSIGNAL_SIZE=2048 -m64 -gencode arch=compute_30,code=sm_30 -O2 -use_fast_math -I commoninclude -l cufft cudafft.cu -o cudaff | |
#include <stdlib.h> | |
#include <stdio.h> | |
#include <string.h> | |
#include <math.h> | |
#include <sys/time.h> | |
#include <cuda_runtime.h> | |
#include "helper_functions.h" | |
#include "helper_cuda.h" | |
#include <cufft.h> | |
void runTest(int argc, char** argv); | |
// Signal size | |
#ifndef SIGNAL_SIZE | |
#define SIGNAL_SIZE 512 | |
#endif | |
int main(int argc, char** argv) | |
{ | |
runTest(argc, argv); | |
} | |
//! Run and time a simple test for CUDA | |
void runTest(int argc, char** argv) | |
{ | |
StopWatchInterface *hTimer = NULL; | |
sdkCreateTimer(&hTimer); | |
struct timeval start, stop; | |
double interval = 0.; | |
int iterations = 1000; | |
// Allocate host memory for the signal | |
float2* h_signal = (float2*)malloc(sizeof(float2) * SIGNAL_SIZE); | |
// Initalize the memory for the signal | |
for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) { | |
h_signal[i].x = rand() / (float)RAND_MAX; | |
h_signal[i].y = 0; | |
} | |
int mem_size = sizeof(float2) * SIGNAL_SIZE; | |
// Allocate device memory for signal | |
float2* d_signal; | |
cudaMalloc((void**)&d_signal, mem_size); | |
// Copy host memory to device | |
cudaMemcpy(d_signal, h_signal, mem_size, | |
cudaMemcpyHostToDevice); | |
// CUFFT plan | |
cufftHandle plan; | |
cufftPlan1d(&plan, mem_size, CUFFT_C2C, 1); | |
for (int i = -1; i < iterations; i++) { | |
//i == -1 -- warmup iteration | |
if (i == 0) { | |
checkCudaErrors(cudaDeviceSynchronize()); | |
sdkResetTimer(&hTimer); | |
sdkStartTimer(&hTimer); | |
gettimeofday(&start, NULL); | |
} | |
cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_FORWARD); | |
} | |
checkCudaErrors(cudaDeviceSynchronize()); | |
sdkStopTimer(&hTimer); | |
gettimeofday(&stop, NULL); | |
double gpuTime = 0.001 * sdkGetTimerValue(&hTimer); | |
printf("Time elapsed for %d iterations : %.5f s (%g ms per iteration)\n", iterations, gpuTime, 1000.0*gpuTime/(double)iterations); | |
interval = (stop.tv_sec - start.tv_sec)*1000.0; | |
interval += (stop.tv_usec - start.tv_usec)/1000.0; | |
printf("Time elapsed for %d iterations: %g ms (%g ms per iterations)\n", iterations, interval, interval/(double)iterations); | |
//Destroy CUFFT context | |
cufftDestroy(plan); | |
// cleanup memory | |
free(h_signal); | |
cudaFree(d_signal); | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment