Created
December 4, 2018 15:51
-
-
Save fede-vaccaro/d26cc37b3752a88d514f1ad330b74f19 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "cuda_runtime.h" | |
#include "device_launch_parameters.h" | |
#include "cuda_runtime_api.h" | |
// useful defines | |
#ifndef USEFULDEFINES | |
#define USEFULDEFINES | |
#define Mask_width 3 | |
#define Mask_radius Mask_width / 2 | |
#define TILE_WIDTH 16 | |
#define w (TILE_WIDTH + Mask_width - 1) | |
#define clamp(x) (min(max((x), 0.0), 1.0)) | |
#endif // ! | |
// CUDA Convolution script for bidimensional image, with multiple channel. This implementation uses shared memory! | |
__global__ void convolution(float *I, const float *__restrict__ M, float *P, | |
int channels, int width, int height) { | |
for (int k = 0; k < channels; k++) { | |
__shared__ float localPattern[w][w]; | |
int tx = threadIdx.x; int ty = threadIdx.y; | |
// mapping to local pattern, including zero borders | |
int col = blockIdx.x*TILE_WIDTH + tx - Mask_radius; | |
int row = blockIdx.y*TILE_WIDTH + ty - Mask_radius; | |
// load local pattern into shared memory | |
if (col > -1 && col < width && row > -1 && row < height) { | |
localPattern[ty][tx] = I[(row*width + col) * channels + k]; | |
} | |
else { | |
localPattern[ty][tx] = 0.0f; | |
} | |
__syncthreads(); | |
// col, row are remapped to the output | |
col = blockIdx.x*TILE_WIDTH + tx; | |
row = blockIdx.y*TILE_WIDTH + ty; | |
float value = 0.0f; | |
bool isWriter = (tx > Mask_radius - 1 && tx < TILE_WIDTH + Mask_radius) && (ty > Mask_radius - 1 && ty < TILE_WIDTH + Mask_radius); | |
// computing | |
for (int i = 0; i < Mask_width; ++i) { | |
for (int j = 0; j < Mask_width; ++j) { | |
if (isWriter) { | |
value += localPattern[ty + i - Mask_radius][tx + j - Mask_radius] * M[i *Mask_width + j]; | |
} | |
} | |
} | |
if (row < height && col < width && isWriter) { | |
P[(row*width + col) * channels + k] = clamp(value, 0.0, 1.0); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment