Created
January 8, 2018 15:40
-
-
Save odashi/1c20ba90388cf02330e1b95963d78039 to your computer and use it in GitHub Desktop.
Example usage of cuDNN convolution forward functions.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iomanip> | |
#include <iostream> | |
#include <cstdlib> | |
#include <vector> | |
#include <cuda.h> | |
#include <cudnn.h> | |
#define CUDA_CALL(f) { \ | |
cudaError_t err = (f); \ | |
if (err != cudaSuccess) { \ | |
std::cout \ | |
<< " Error occurred: " << err << std::endl; \ | |
std::exit(1); \ | |
} \ | |
} | |
#define CUDNN_CALL(f) { \ | |
cudnnStatus_t err = (f); \ | |
if (err != CUDNN_STATUS_SUCCESS) { \ | |
std::cout \ | |
<< " Error occurred: " << err << std::endl; \ | |
std::exit(1); \ | |
} \ | |
} | |
__global__ void dev_const(float *px, float k) { | |
int tid = threadIdx.x + blockIdx.x * blockDim.x; | |
px[tid] = k; | |
} | |
__global__ void dev_iota(float *px) { | |
int tid = threadIdx.x + blockIdx.x * blockDim.x; | |
px[tid] = tid; | |
} | |
void print(const float *data, int n, int c, int h, int w) { | |
std::vector<float> buffer(1 << 20); | |
CUDA_CALL(cudaMemcpy( | |
buffer.data(), data, | |
n * c * h * w * sizeof(float), | |
cudaMemcpyDeviceToHost)); | |
int a = 0; | |
for (int i = 0; i < n; ++i) { | |
for (int j = 0; j < c; ++j) { | |
std::cout << "n=" << i << ", c=" << j << ":" << std::endl; | |
for (int k = 0; k < h; ++k) { | |
for (int l = 0; l < w; ++l) { | |
std::cout << std::setw(4) << std::right << buffer[a]; | |
++a; | |
} | |
std::cout << std::endl; | |
} | |
} | |
} | |
std::cout << std::endl; | |
} | |
int main() { | |
cudnnHandle_t cudnn; | |
CUDNN_CALL(cudnnCreate(&cudnn)); | |
// input | |
const int in_n = 1; | |
const int in_c = 1; | |
const int in_h = 5; | |
const int in_w = 5; | |
std::cout << "in_n: " << in_n << std::endl; | |
std::cout << "in_c: " << in_c << std::endl; | |
std::cout << "in_h: " << in_h << std::endl; | |
std::cout << "in_w: " << in_w << std::endl; | |
std::cout << std::endl; | |
cudnnTensorDescriptor_t in_desc; | |
CUDNN_CALL(cudnnCreateTensorDescriptor(&in_desc)); | |
CUDNN_CALL(cudnnSetTensor4dDescriptor( | |
in_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, | |
in_n, in_c, in_h, in_w)); | |
float *in_data; | |
CUDA_CALL(cudaMalloc( | |
&in_data, in_n * in_c * in_h * in_w * sizeof(float))); | |
// filter | |
const int filt_k = 1; | |
const int filt_c = 1; | |
const int filt_h = 2; | |
const int filt_w = 2; | |
std::cout << "filt_k: " << filt_k << std::endl; | |
std::cout << "filt_c: " << filt_c << std::endl; | |
std::cout << "filt_h: " << filt_h << std::endl; | |
std::cout << "filt_w: " << filt_w << std::endl; | |
std::cout << std::endl; | |
cudnnFilterDescriptor_t filt_desc; | |
CUDNN_CALL(cudnnCreateFilterDescriptor(&filt_desc)); | |
CUDNN_CALL(cudnnSetFilter4dDescriptor( | |
filt_desc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, | |
filt_k, filt_c, filt_h, filt_w)); | |
float *filt_data; | |
CUDA_CALL(cudaMalloc( | |
&filt_data, filt_k * filt_c * filt_h * filt_w * sizeof(float))); | |
// convolution | |
const int pad_h = 1; | |
const int pad_w = 1; | |
const int str_h = 1; | |
const int str_w = 1; | |
const int dil_h = 1; | |
const int dil_w = 1; | |
std::cout << "pad_h: " << pad_h << std::endl; | |
std::cout << "pad_w: " << pad_w << std::endl; | |
std::cout << "str_h: " << str_h << std::endl; | |
std::cout << "str_w: " << str_w << std::endl; | |
std::cout << "dil_h: " << dil_h << std::endl; | |
std::cout << "dil_w: " << dil_w << std::endl; | |
std::cout << std::endl; | |
cudnnConvolutionDescriptor_t conv_desc; | |
CUDNN_CALL(cudnnCreateConvolutionDescriptor(&conv_desc)); | |
CUDNN_CALL(cudnnSetConvolution2dDescriptor( | |
conv_desc, | |
pad_h, pad_w, str_h, str_w, dil_h, dil_w, | |
CUDNN_CONVOLUTION, CUDNN_DATA_FLOAT)); | |
// output | |
int out_n; | |
int out_c; | |
int out_h; | |
int out_w; | |
CUDNN_CALL(cudnnGetConvolution2dForwardOutputDim( | |
conv_desc, in_desc, filt_desc, | |
&out_n, &out_c, &out_h, &out_w)); | |
std::cout << "out_n: " << out_n << std::endl; | |
std::cout << "out_c: " << out_c << std::endl; | |
std::cout << "out_h: " << out_h << std::endl; | |
std::cout << "out_w: " << out_w << std::endl; | |
std::cout << std::endl; | |
cudnnTensorDescriptor_t out_desc; | |
CUDNN_CALL(cudnnCreateTensorDescriptor(&out_desc)); | |
CUDNN_CALL(cudnnSetTensor4dDescriptor( | |
out_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, | |
out_n, out_c, out_h, out_w)); | |
float *out_data; | |
CUDA_CALL(cudaMalloc( | |
&out_data, out_n * out_c * out_h * out_w * sizeof(float))); | |
// algorithm | |
cudnnConvolutionFwdAlgo_t algo; | |
CUDNN_CALL(cudnnGetConvolutionForwardAlgorithm( | |
cudnn, | |
in_desc, filt_desc, conv_desc, out_desc, | |
CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, 0, &algo)); | |
std::cout << "Convolution algorithm: " << algo << std::endl; | |
std::cout << std::endl; | |
// workspace | |
size_t ws_size; | |
CUDNN_CALL(cudnnGetConvolutionForwardWorkspaceSize( | |
cudnn, in_desc, filt_desc, conv_desc, out_desc, algo, &ws_size)); | |
float *ws_data; | |
CUDA_CALL(cudaMalloc(&ws_data, ws_size)); | |
std::cout << "Workspace size: " << ws_size << std::endl; | |
std::cout << std::endl; | |
// perform | |
float alpha = 1.f; | |
float beta = 0.f; | |
dev_iota<<<in_w * in_h, in_n * in_c>>>(in_data); | |
dev_const<<<filt_w * filt_h, filt_k * filt_c>>>(filt_data, 1.f); | |
CUDNN_CALL(cudnnConvolutionForward( | |
cudnn, | |
&alpha, in_desc, in_data, filt_desc, filt_data, | |
conv_desc, algo, ws_data, ws_size, | |
&beta, out_desc, out_data)); | |
// results | |
std::cout << "in_data:" << std::endl; | |
print(in_data, in_n, in_c, in_h, in_w); | |
std::cout << "filt_data:" << std::endl; | |
print(filt_data, filt_k, filt_c, filt_h, filt_w); | |
std::cout << "out_data:" << std::endl; | |
print(out_data, out_n, out_c, out_h, out_w); | |
// finalizing | |
CUDA_CALL(cudaFree(ws_data)); | |
CUDA_CALL(cudaFree(out_data)); | |
CUDNN_CALL(cudnnDestroyTensorDescriptor(out_desc)); | |
CUDNN_CALL(cudnnDestroyConvolutionDescriptor(conv_desc)); | |
CUDA_CALL(cudaFree(filt_data)); | |
CUDNN_CALL(cudnnDestroyFilterDescriptor(filt_desc)); | |
CUDA_CALL(cudaFree(in_data)); | |
CUDNN_CALL(cudnnDestroyTensorDescriptor(in_desc)); | |
CUDNN_CALL(cudnnDestroy(cudnn)); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Beautiful! it works!