Last active
June 26, 2021 15:21
-
-
Save reginaldojunior/3c4320a5490f3b7e4364dcde2b4daf0f to your computer and use it in GitHub Desktop.
acoustic-wave.cu
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%%cu | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <cuda_runtime.h> | |
#include <time.h> | |
#include <sys/time.h> | |
#include <assert.h> | |
#define DT 0.0070710676f // delta t | |
#define DX 15.0f // delta x | |
#define DY 15.0f // delta y | |
#define V 1500.0f // wave velocity v = 1500 m/s | |
#define HALF_LENGTH 1 // radius of the stencil | |
#define NUM_THREADS_BLOCK_X 32 | |
#define NUM_THREADS_BLOCK_Y 32 | |
#define ROWS 10000 | |
#define COLS 10000 | |
#define TIME 1000 | |
__constant__ float dxSquared = DX * DX; | |
__constant__ float dySquared = DY * DY; | |
__constant__ float dtSquared = DT * DT; | |
inline cudaError_t checkCuda(cudaError_t result) | |
{ | |
if (result != cudaSuccess) { | |
fprintf(stderr, "CUDA Runtime Error: %s\n", cudaGetErrorString(result)); | |
// assert(result == cudaSuccess); | |
} | |
return result; | |
} | |
/* | |
* save the matrix on a file.txt | |
*/ | |
void save_grid(int rows, int cols, float *matrix){ | |
char file_name[64]; | |
sprintf(file_name, "wavefield/wavefield.txt"); | |
// save the result | |
FILE *file; | |
file = fopen(file_name, "w"); | |
for(int i = 0; i < rows; i++) { | |
int offset = i * cols; | |
for(int j = 0; j < cols; j++) { | |
fprintf(file, "%f ", matrix[offset + j]); | |
} | |
fprintf(file, "\n"); | |
} | |
fclose(file); | |
} | |
void inicializaMatriz(float *prev_base, float *next_base, float *vel_base) | |
{ | |
// initialize matrix | |
for(int i = 0; i < ROWS; i++){ | |
int offset = i * COLS; | |
for(int j = 0; j < COLS; j++){ | |
prev_base[offset + j] = 0.0f; | |
next_base[offset + j] = 0.0f; | |
vel_base[offset + j] = V * V; | |
} | |
} | |
} | |
void inicializaFonteInicialDaOnda(float *prev_base, float *wavelet) | |
{ | |
// add a source to initial wavefield as an initial condition | |
for(int s = 11; s >= 0; s--){ | |
for(int i = ROWS / 2 - s; i < ROWS / 2 + s; i++){ | |
int offset = i * COLS; | |
for(int j = COLS / 2 - s; j < COLS / 2 + s; j++) | |
prev_base[offset + j] = wavelet[s]; | |
} | |
} | |
} | |
__global__ void waveGPU(float *prev_base, float *next_base, float *vel_base) { | |
int indexCol = blockIdx.x * blockDim.x + threadIdx.x + HALF_LENGTH; | |
int indexLinha = blockIdx.y * blockDim.y + threadIdx.y + HALF_LENGTH; | |
int current = indexLinha * COLS + indexCol; | |
if (indexLinha < ROWS - HALF_LENGTH && indexCol < COLS - HALF_LENGTH) { | |
float value = (prev_base[current + 1] - 2.0 * prev_base[current] + prev_base[current - 1]) / dxSquared; | |
value += (prev_base[current + COLS] - 2.0 * prev_base[current] + prev_base[current - COLS]) / dySquared; | |
value *= dtSquared * vel_base[current]; | |
next_base[current] = 2.0 * prev_base[current] - next_base[current] + value; | |
} | |
} | |
__host__ void processamentoGPU(unsigned n) { | |
// calc the number of iterations (timesteps) | |
int iterations = (int) ( ( TIME / 1000.0 ) / DT ); | |
int numeroBytes = n * COLS * sizeof(float); | |
printf("Grid Sizes: %d x %d \n", n, COLS); | |
printf("Iterations: %d \n", iterations); | |
// Aloca espaço na CPU para o resultado | |
float* prev_base = (float*) malloc( numeroBytes ); | |
float* next_base = (float*) malloc( numeroBytes ); | |
float* vel_base = (float*) malloc( numeroBytes ); | |
float *gpu_prev_base; | |
float *gpu_next_base; | |
float *gpu_vel_base; | |
cudaMalloc( (void**) &gpu_prev_base, numeroBytes); | |
cudaMalloc( (void**) &gpu_next_base, numeroBytes); | |
cudaMalloc( (void**) &gpu_vel_base, numeroBytes); | |
// ************* BEGIN INITIALIZATION ************* | |
printf("Initializing ... \n"); | |
// define source wavelet | |
float wavelet[12] = {0.016387336, -0.041464937, -0.067372555, 0.386110067, | |
0.812723635, 0.416998396, 0.076488599, -0.059434419, | |
0.023680172, 0.005611435, 0.001823209, -0.000720549}; | |
inicializaMatriz(prev_base, next_base, vel_base); | |
inicializaFonteInicialDaOnda(prev_base, wavelet); | |
cudaMemcpy(gpu_next_base, next_base, numeroBytes, cudaMemcpyHostToDevice); | |
cudaMemcpy(gpu_prev_base, prev_base, numeroBytes, cudaMemcpyHostToDevice); | |
cudaMemcpy(gpu_vel_base, vel_base, numeroBytes, cudaMemcpyHostToDevice); | |
// ************** END INITIALIZATION ************** | |
printf("Computing wavefield ... \n"); | |
dim3 bloco = dim3(NUM_THREADS_BLOCK_X, NUM_THREADS_BLOCK_Y); | |
dim3 grid = dim3(ceil ((float)ROWS * sizeof(float)/ (float) NUM_THREADS_BLOCK_X), ceil ((float)ROWS * sizeof(float)/ (float) NUM_THREADS_BLOCK_Y)); | |
printf("Numero de Blocos: %d\n", bloco); | |
printf("Numero de Blocos por Grid: %d\n", grid); | |
// Eventos para obter o tempo de execução | |
cudaEvent_t start, stop; | |
float gpu_time = 0.0; | |
checkCuda( cudaEventCreate(&start) ); | |
checkCuda( cudaEventCreate(&stop) ); | |
checkCuda( cudaEventRecord(start, 0) ); | |
// wavefield modeling | |
float *gpu_swap; | |
// ------------Lançamento do kernel------------ | |
for(int k = 0; k < iterations; k++){ | |
waveGPU<<< grid, bloco >>>(gpu_prev_base, gpu_next_base, gpu_vel_base); | |
cudaDeviceSynchronize(); | |
// swap arrays for next iteration | |
gpu_swap = gpu_next_base; | |
gpu_next_base = gpu_prev_base; | |
gpu_prev_base = gpu_swap; | |
} | |
// Obtém o erro de lançamento de kernel | |
cudaError_t error = cudaGetLastError(); | |
checkCuda( error ); | |
// Eventos que marcam o tempo de final de execução do kernel | |
checkCuda( cudaEventRecord(stop, 0) ); | |
checkCuda( cudaEventSynchronize(stop) ); | |
checkCuda( cudaEventElapsedTime(&gpu_time, start, stop) ); | |
// ------------Transferir o resultado para a CPU------------ | |
cudaMemcpy(next_base, gpu_next_base, numeroBytes, cudaMemcpyDeviceToHost); | |
cudaMemcpy(prev_base, gpu_prev_base, numeroBytes, cudaMemcpyDeviceToHost); | |
cudaMemcpy(vel_base, gpu_vel_base, numeroBytes, cudaMemcpyDeviceToHost); | |
save_grid(ROWS, COLS, next_base); | |
cudaFree(gpu_next_base); | |
cudaFree(gpu_prev_base); | |
cudaFree(gpu_vel_base); | |
printf("Tempo de Execução na GPU: %.4f s ", gpu_time / 1000); | |
} | |
int main(void) { | |
cudaSetDevice(0); | |
cudaDeviceProp prop; | |
cudaGetDeviceProperties(&prop,0); | |
int n = ROWS; | |
processamentoGPU(n); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment