Created
October 15, 2018 09:02
-
-
Save andrewssobral/42ccdd84b65b55f5b8b47ad30e9c82d5 to your computer and use it in GitHub Desktop.
SpatialUpSamplingBilinear.cu
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef THC_GENERIC_FILE | |
#define THC_GENERIC_FILE "generic/SpatialUpSamplingBilinear.cu" | |
#else | |
#include "../linear_upsampling.h" | |
//#define MAX_THREADS_PER_BLOCK 256 | |
//#define MIN_BLOCKS_PER_MP 2 | |
//__launch_bounds__(MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP) | |
//#define MAX_THREADS_PER_BLOCK 512 | |
//__launch_bounds__(MAX_THREADS_PER_BLOCK) | |
static inline void THNN_(SpatialUpSamplingBilinear_shapeCheck) | |
(THCState *state, | |
THCTensor *input, THCTensor *gradOutput, | |
int nBatch, int nChannels, | |
int inputHeight, int inputWidth, | |
int outputHeight, int outputWidth) { | |
THArgCheck(inputHeight > 0 && inputWidth > 0 | |
&& outputHeight > 0 && outputWidth > 0, 2, | |
"input and output sizes should be greater than 0," | |
" but got input (H: %d, W: %d) output (H: %d, W: %d)", | |
inputHeight, inputWidth, outputHeight, outputWidth); | |
if (input != NULL) { | |
THCUNN_argCheck(state, !input->is_empty() && input->dim() == 4, 2, input, | |
"non-empty 4D input tensor expected but got: %s"); | |
} | |
if (gradOutput != NULL) { | |
THCUNN_check_dim_size(state, gradOutput, 4, 0, nBatch); | |
THCUNN_check_dim_size(state, gradOutput, 4, 1, nChannels); | |
THCUNN_check_dim_size(state, gradOutput, 4, 2, outputHeight); | |
THCUNN_check_dim_size(state, gradOutput, 4, 3, outputWidth); | |
} | |
} | |
void THNN_(SpatialUpSamplingBilinear_updateOutput)( | |
THCState *state, | |
THCTensor *input, | |
THCTensor *output, | |
int outputHeight, | |
int outputWidth, | |
bool align_corners) | |
{ | |
int nbatch = THCTensor_(size)(state, input, 0); | |
int channels = THCTensor_(size)(state, input, 1); | |
int inputHeight = THCTensor_(size)(state, input, 2); | |
int inputWidth = THCTensor_(size)(state, input, 3); | |
THNN_(SpatialUpSamplingBilinear_shapeCheck) | |
(state, input, NULL, | |
nbatch, channels, | |
inputHeight, inputWidth, | |
outputHeight, outputWidth); | |
THCUNN_assertSameGPU(state, 2, input, output); | |
THCTensor_(resize4d)(state, output, | |
THCTensor_(size)(state, input, 0), | |
THCTensor_(size)(state, input, 1), | |
outputHeight, outputWidth); | |
THCTensor_(zero)(state, output); | |
THCDeviceTensor<real, 4> idata = toDeviceTensor<real, 4>(state, input); | |
THCDeviceTensor<real, 4> odata = toDeviceTensor<real, 4>(state, output); | |
THAssert(inputHeight > 0 && inputWidth > 0 && outputHeight > 0 && outputWidth > 0); | |
const accreal rheight = linear_upsampling_compute_scale<accreal>(inputHeight, outputHeight, align_corners); | |
const accreal rwidth = linear_upsampling_compute_scale<accreal>(inputWidth, outputWidth, align_corners); | |
const int num_kernels = outputHeight * outputWidth; | |
//const int num_threads = THCState_getCurrentDeviceProperties(state)->maxThreadsPerBlock; | |
const int num_threads = 512; | |
cudaStream_t stream = THCState_getCurrentStream(state); | |
caffe_gpu_interp2_kernel<real, accreal> <<<THCCeilDiv(num_kernels, num_threads), num_threads , | |
0 , stream>>>(num_kernels, rheight, rwidth, align_corners, idata, odata); | |
THCudaCheck(cudaGetLastError()); | |
} | |
void THNN_(SpatialUpSamplingBilinear_updateGradInput)( | |
THCState *state, | |
THCTensor *gradOutput, | |
THCTensor *gradInput, | |
int nbatch, | |
int nchannels, | |
int inputHeight, | |
int inputWidth, | |
int outputHeight, | |
int outputWidth, | |
bool align_corners) | |
{ | |
THNN_(SpatialUpSamplingBilinear_shapeCheck) | |
(state, NULL, gradOutput, | |
nbatch, nchannels, | |
inputHeight, inputWidth, | |
outputHeight, outputWidth); | |
gradOutput = THCTensor_(newContiguous)(state, gradOutput); | |
THCUNN_assertSameGPU(state, 2, gradOutput, gradInput); | |
THCTensor_(resize4d)(state, gradInput, nbatch, nchannels, inputHeight, inputWidth); | |
THCTensor_(zero)(state, gradInput); | |
THCDeviceTensor<real, 4> data1 = toDeviceTensor<real, 4>(state, gradInput); | |
THCDeviceTensor<real, 4> data2 = toDeviceTensor<real, 4>(state, gradOutput); | |
const accreal rheight = linear_upsampling_compute_scale<accreal>(inputHeight, outputHeight, align_corners); | |
const accreal rwidth = linear_upsampling_compute_scale<accreal>(inputWidth, outputWidth, align_corners); | |
const int num_kernels = outputHeight * outputWidth; | |
//const int num_threads = THCState_getCurrentDeviceProperties(state)->maxThreadsPerBlock; | |
const int num_threads = 512; | |
cudaStream_t stream = THCState_getCurrentStream(state); | |
caffe_gpu_interp2_kernel_backward<real ,accreal> <<<THCCeilDiv(num_kernels, num_threads), | |
num_threads, 0, stream>>>(num_kernels, rheight, rwidth, align_corners, data1, data2); | |
THCudaCheck(cudaGetLastError()); | |
THCTensor_(free)(state, gradOutput); | |
} | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment