Skip to content

Instantly share code, notes, and snippets.

@joao-timescale
Created June 9, 2017 22:12
Show Gist options
  • Save joao-timescale/c1af19896aeabbc272bb29dc581518b8 to your computer and use it in GitHub Desktop.
Save joao-timescale/c1af19896aeabbc272bb29dc581518b8 to your computer and use it in GitHub Desktop.
CTC Torch test
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <cuda_runtime.h>
#include "ctc.h"
#define CTC_CHECK(expr) abort_on_error((expr), __LINE__, __func__)
#define CUDA_CHECK(expr) cuda_abort_on_error((expr), __LINE__, __func__)
typedef struct ctcOptions ctcOptions;
inline void abort_on_error(ctcStatus_t status, int line, const char * function_name)
{
if (status != CTC_STATUS_SUCCESS)
{
const char * error_msg = ctcGetStatusString(status);
fprintf(stderr, "[%s:%d] CTC error: %s", function_name, line, error_msg);
exit(-1);
}
}
inline void cuda_abort_on_error(cudaError_t status, int line, const char * function_name)
{
if ( cudaSuccess != status )
{
const char * error_msg = cudaGetErrorString(status);
fprintf(stderr, "[%s:%d] CUDA error: %s", function_name, line, error_msg);
exit(-1);
}
}
int main()
{
const int alphabet_size = 5;
const int minibatch_size = 3;
const int num_activations = 3;
// 2 five dimensional vectors linearized
const size_t activations_size = alphabet_size * minibatch_size * num_activations * sizeof(float);
float activations_base[45] = { 0, 0, 0, 0, 0,
1, 2, 3, 4, 5,
-5, -4, -3, -2, -1,
0, 0, 0, 0, 0,
6, 7, 8, 9, 10,
-10, -9, -8, -7, -6,
0, 0, 0, 0, 0,
11, 12, 13, 14, 15,
-15, -14, -13, -12, -11 };
float * activations = &(activations_base[0]);
const int labels[5] = { 1, 3, 3, 2, 3 };
const int label_lengths[3] = { 1, 2, 2 };
int lengths[3] = { 1, 3, 3 }; // length equivalent to minibatch_size
// Setup CUDA stream to do computations
cudaStream_t stream;
CUDA_CHECK( cudaStreamCreate(&stream) );
// Allocate GPU memory
float * activations_gpu = NULL,
* costs_gpu = NULL,
* grads_gpu = NULL;
CUDA_CHECK( cudaMalloc( &activations_gpu, activations_size ) );
CUDA_CHECK( cudaMalloc( &costs_gpu, minibatch_size * sizeof(float) ) );
CUDA_CHECK( cudaMalloc( &grads_gpu, activations_size ) );
// Move activations data to the GPU
CUDA_CHECK( cudaMemcpyAsync( activations_gpu, activations, activations_size,
cudaMemcpyHostToDevice, stream ) );
// Initialize warp-ctc options
ctcOptions ctc_options;
memset( &ctc_options, 0, sizeof(ctcOptions) );
ctc_options.loc = CTC_GPU;
ctc_options.stream = stream;
// Compute CTC workspace size
size_t gpu_alloc_bytes = 0;
CTC_CHECK( get_workspace_size( &(label_lengths[0]), &(lengths[0]),
alphabet_size, minibatch_size, ctc_options, &gpu_alloc_bytes ) );
// Allocate CTC workspace
void * ctc_gpu_workspace;
CUDA_CHECK( cudaMalloc( &ctc_gpu_workspace, gpu_alloc_bytes ) );
// compute CTC loss
CTC_CHECK( compute_ctc_loss( activations_gpu, grads_gpu, &(labels[0]),
&(label_lengths[0]), &(lengths[0]), alphabet_size, minibatch_size, costs_gpu,
ctc_gpu_workspace, ctc_options) );
// Move data from GPU to host
float * grads_host = NULL,
* costs_host = NULL;
CUDA_CHECK( cudaMallocHost( &grads_host, activations_size ) );
CUDA_CHECK( cudaMallocHost( &costs_host, minibatch_size * sizeof(float) ) );
CUDA_CHECK( cudaMemcpyAsync( grads_host, grads_gpu, activations_size,
cudaMemcpyDeviceToHost, stream ) );
CUDA_CHECK( cudaMemcpyAsync( costs_host, costs_gpu, minibatch_size * sizeof(float),
cudaMemcpyDeviceToHost, stream ) );
// Synchronize stream to ensure data is copied
CUDA_CHECK( cudaStreamSynchronize(stream) );
fprintf( stdout, "Costs: [ " );
for( size_t i = 0; i < minibatch_size; ++i )
{
fprintf( stdout, "%4.10g ", costs_host[i] );
}
fprintf(stdout, "]\n" );
fprintf( stdout, "Gradients: [\n" );
for( size_t k = 0; k < num_activations; ++k)
{
fprintf( stdout, "[" );
for( size_t j = 0; j < minibatch_size; ++j)
{
fprintf( stdout, "[ " );
for( size_t i = 0; i < alphabet_size; ++i )
{
fprintf( stdout, "%4.10g\t", grads_host[i + j*alphabet_size + k*minibatch_size*alphabet_size] );
}
fprintf(stdout, "]\n" );
}
fprintf( stdout, "]\n" );
}
fprintf(stdout, "]\n" );
// Deallocate Host memory
CUDA_CHECK( cudaFreeHost( grads_host ) );
CUDA_CHECK( cudaFreeHost( costs_host ) );
// Deallocate GPU memory
CUDA_CHECK( cudaStreamDestroy(stream) );
CUDA_CHECK( cudaFree( activations_gpu ) );
CUDA_CHECK( cudaFree( costs_gpu ) );
CUDA_CHECK( cudaFree( grads_gpu ) );
CUDA_CHECK( cudaFree( ctc_gpu_workspace ) );
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment