Created
December 16, 2015 19:37
-
-
Save kevinrobinson/60b9364f83ffe44a7aaa to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
struct LaunchConvOp<GPUDevice, T> { | |
static void launch(OpKernelContext* ctx, bool use_cudnn, const Tensor& input_param, const Tensor& filter, int stride, const Eigen::PaddingType& padding, Tensor* output) { | |
auto* stream = ctx->op_device_context<GPUDeviceContext>()->stream(); | |
// First, we check if the CUDA platform is registered, and fall back to using Eigen on the GPU if not. | |
if (use_cudnn) { | |
Tensor input = input_param; | |
if (filter.dim_size(0) == 1 && filter.dim_size(1) == 1) { | |
// ... 1x1 filter, so call cublas directly ... | |
bool blas_launch_status = stream->ThenBlasGemm(no_transpose, no_transpose, n, m, k, 1.0f, b_ptr, n, a_ptr, k, 0.0f, &c_ptr, n).ok(); | |
return; | |
} | |
// ...??? add some padding to align memory access | |
// We're going to use a CUDA-optimized convolution method that can perform | |
// a larger batch of work at once. | |
// Describe the input and output parameters. | |
perftools::gputools::dnn::BatchDescriptor input_desc; | |
input_desc.set_count(input.dim_size(0)) | |
.set_height(input.dim_size(1)) | |
.set_width(input.dim_size(2)) | |
.set_feature_map_count(input.dim_size(3)) | |
.set_layout(perftools::gputools::dnn::DataLayout::kBatchYXDepth); | |
perftools::gputools::dnn::BatchDescriptor output_desc; | |
output_desc.set_count(output->dim_size(0)) | |
.set_height(output->dim_size(1)) | |
.set_width(output->dim_size(2)) | |
.set_feature_map_count(output->dim_size(3)) | |
.set_layout(perftools::gputools::dnn::DataLayout::kBatchYXDepth); | |
// Describe the convolution | |
perftools::gputools::dnn::FilterDescriptor filter_desc; | |
filter_desc.set_input_filter_height(filter.dim_size(0)) | |
.set_input_filter_width(filter.dim_size(1)) | |
.set_input_feature_map_count(filter.dim_size(2)) | |
.set_output_feature_map_count(filter.dim_size(3)); | |
perftools::gputools::dnn::ConvolutionDescriptor conv_desc; | |
conv_desc.set_vertical_filter_stride(stride) | |
.set_horizontal_filter_stride(stride); | |
// ... allocate memory and transform some shapes ... | |
// Launch the convolution operation on the CUDA GPU. | |
// This will delegate in turn to the DnnSupport plugin. | |
bool cudnn_launch_status = stream->ThenConvolve(input_desc, input_ptr, filter_desc, filter_ptr, conv_desc, output_desc, &output_ptr).ok(); | |
if (!cudnn_launch_status) { | |
ctx->SetStatus(errors::Internal("cuDNN launch failure : input shape(", input.shape().DebugString(),") filter shape(", filter.shape().DebugString(), ")")); | |
} | |
} else { | |
// ... fallback to Eigen ... | |
} | |
} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment