Skip to content

Instantly share code, notes, and snippets.

@csullivan
Created August 28, 2018 21:10
Show Gist options
  • Save csullivan/5681b7f5c83592e589b13f204d8987aa to your computer and use it in GitHub Desktop.
Save csullivan/5681b7f5c83592e589b13f204d8987aa to your computer and use it in GitHub Desktop.
// Generated by the nGraph GPU backend
#include <cublas_v2.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cudnn.h>
#include "ngraph/descriptor/input.hpp"
#include "ngraph/descriptor/layout/dense_tensor_view_layout.hpp"
#include "ngraph/descriptor/output.hpp"
#include "ngraph/descriptor/primary_tensor_view.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/function.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/node.hpp"
#include "ngraph/pass/assign_layout.hpp"
#include "ngraph/pass/dump_sorted.hpp"
#include "ngraph/pass/liveness.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/pass/memory_layout.hpp"
#include "ngraph/runtime/aligned_buffer.hpp"
#include "ngraph/runtime/gpu/cudnn_descriptors.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_kernel_ops.hpp"
#include "ngraph/runtime/gpu/gpu_invoke.hpp"
#include "ngraph/runtime/gpu/gpu_runtime_context.hpp"
#include "ngraph/runtime/gpu/gpu_util.hpp"
#include "ngraph/util.hpp"
using namespace ngraph;
using namespace ngraph::runtime;
using namespace std;
void *__dso_handle = 0;
static gpu::GPURuntimeContext* m_runtime_context = nullptr;
// Declare all constants
static size_t Constant_18_0_idx = 88;
static float* Constant_18_0 = nullptr;
static size_t Constant_14_0_idx = 89;
static float* Constant_14_0 = nullptr;
static size_t Constant_11_0_idx = 90;
static float* Constant_11_0 = nullptr;
static size_t Constant_12_0_idx = 91;
static float* Constant_12_0 = nullptr;
static bool is_constant_mem_ptr_null = true;
static void invoke_constant_mem_ptr()
{
if(is_constant_mem_ptr_null)
{
Constant_18_0 = reinterpret_cast<float*>(runtime::gpu::invoke_memory_primitive(m_runtime_context, Constant_18_0_idx));
Constant_14_0 = reinterpret_cast<float*>(runtime::gpu::invoke_memory_primitive(m_runtime_context, Constant_14_0_idx));
Constant_11_0 = reinterpret_cast<float*>(runtime::gpu::invoke_memory_primitive(m_runtime_context, Constant_11_0_idx));
Constant_12_0 = reinterpret_cast<float*>(runtime::gpu::invoke_memory_primitive(m_runtime_context, Constant_12_0_idx));
is_constant_mem_ptr_null = false;
}
}
// Declare all functions
extern "C" void Function_0(void** inputs, void** outputs, gpu::GPURuntimeContext* ctx);
static void func_Concat_1023(
float* _arg0,
float* _arg1,
float* _out2,
gpu::GPURuntimeContext* ctx
)
{
gpu::invoke_primitive(ctx, 1, std::vector<void*>{_arg0, _arg1}.data(), std::vector<void*>{_out2}.data());
}
static void func_Concat_1021(
float* _arg0,
float* _arg1,
float* _out2,
gpu::GPURuntimeContext* ctx
)
{
gpu::invoke_primitive(ctx, 2, std::vector<void*>{_arg0, _arg1}.data(), std::vector<void*>{_out2}.data());
}
static void func_Broadcast_174(
float* _arg0,
float* _out1,
gpu::GPURuntimeContext* ctx
)
{
gpu::invoke_primitive(ctx, 13, std::vector<void*>{_arg0}.data(), std::vector<void*>{_out1}.data());
}
static void func_Reshape_231(
float* _arg0,
float* _out1,
gpu::GPURuntimeContext* ctx
)
{
runtime::gpu::cuda_memcpyDtD(_out1, _arg0, 6400 * 4);
}
static void func_Concat_1019(
float* _arg0,
float* _arg1,
float* _out2,
gpu::GPURuntimeContext* ctx
)
{
gpu::invoke_primitive(ctx, 14, std::vector<void*>{_arg0, _arg1}.data(), std::vector<void*>{_out2}.data());
}
extern "C" void Function_0(void** inputs, void** outputs, gpu::GPURuntimeContext* ctx)
{
m_runtime_context = ctx;
invoke_constant_mem_ptr();
// Allocate the memory pool
void* pool_base_ptr = ngraph::runtime::gpu::invoke_memory_primitive(ctx, 0);
// Broadcast_75(Parameter_0_0, Broadcast_75_0)
gpu::invoke_primitive(ctx, 0, std::vector<void*>{((float*)(inputs[10]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 0))}.data());
// Reshape_76(Parameter_1_0, Reshape_76_0)
{
const float alpha = 1.0;
const float beta = 0;
CUBLAS_SAFE_CALL(cublasSetPointerMode(*ctx->cublas_handle, CUBLAS_POINTER_MODE_HOST));
CUBLAS_SAFE_CALL(cublasSgeam(*ctx->cublas_handle,CUBLAS_OP_T,CUBLAS_OP_T,10000,200,&alpha,((float*)(inputs[9])),200,&beta,((float*)(inputs[9])),200,((float*)((char *)pool_base_ptr + 12800000)),10000));
CUBLAS_SAFE_CALL(cublasSetPointerMode(*ctx->cublas_handle, CUBLAS_POINTER_MODE_DEVICE));
}
// Concat_1023(Parameter_6_0, Parameter_2_0, Concat_1023_0)
func_Concat_1023(((float*)(inputs[4])), ((float*)(inputs[8])), ((float*)((char *)pool_base_ptr + 20800000)), ctx);
// Concat_1021(Parameter_7_0, Parameter_3_0, Concat_1021_0)
func_Concat_1021(((float*)(inputs[3])), ((float*)(inputs[7])), ((float*)((char *)pool_base_ptr + 20806400)), ctx);
// Concat_1022(Parameter_8_0, Parameter_4_0, Concat_1022_0)
func_Concat_1023(((float*)(inputs[2])), ((float*)(inputs[6])), ((float*)((char *)pool_base_ptr + 22086400)), ctx);
// Concat_1020(Parameter_9_0, Parameter_5_0, Concat_1020_0)
func_Concat_1021(((float*)(inputs[1])), ((float*)(inputs[5])), ((float*)((char *)pool_base_ptr + 22092800)), ctx);
// Slice_157(Parameter_10_0, Slice_157_0)
{
gpu::invoke_primitive(ctx, 3, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23372800))}.data());
}
// Slice_158(Parameter_10_0, Slice_158_0)
{
gpu::invoke_primitive(ctx, 4, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23398400))}.data());
}
// Slice_159(Parameter_10_0, Slice_159_0)
{
gpu::invoke_primitive(ctx, 5, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23424000))}.data());
}
// Slice_160(Parameter_10_0, Slice_160_0)
{
gpu::invoke_primitive(ctx, 6, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23449600))}.data());
}
// Slice_161(Parameter_10_0, Slice_161_0)
{
gpu::invoke_primitive(ctx, 7, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23475200))}.data());
}
// Slice_162(Parameter_10_0, Slice_162_0)
{
gpu::invoke_primitive(ctx, 8, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23500800))}.data());
}
// Slice_163(Parameter_10_0, Slice_163_0)
{
gpu::invoke_primitive(ctx, 9, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23526400))}.data());
}
// Slice_164(Parameter_10_0, Slice_164_0)
{
gpu::invoke_primitive(ctx, 10, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23552000))}.data());
}
// Slice_165(Parameter_10_0, Slice_165_0)
{
gpu::invoke_primitive(ctx, 11, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23577600))}.data());
}
// Slice_166(Parameter_10_0, Slice_166_0)
{
gpu::invoke_primitive(ctx, 12, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23603200))}.data());
}
// Broadcast_174(Constant_18_0, Broadcast_174_0)
func_Broadcast_174(Constant_18_0, ((float*)((char *)pool_base_ptr + 23628800)), ctx);
// Broadcast_170(Constant_14_0, Broadcast_170_0)
func_Broadcast_174(Constant_14_0, ((float*)((char *)pool_base_ptr + 23654400)), ctx);
// Broadcast_167(Constant_11_0, Broadcast_167_0)
func_Broadcast_174(Constant_11_0, ((float*)((char *)pool_base_ptr + 23680000)), ctx);
// Broadcast_168(Constant_12_0, Broadcast_168_0)
func_Broadcast_174(Constant_12_0, ((float*)((char *)pool_base_ptr + 23705600)), ctx);
// Reshape_231(Slice_157_0, Reshape_231_0)
func_Reshape_231(((float*)((char *)pool_base_ptr + 23372800)), ((float*)((char *)pool_base_ptr + 23731200)), ctx);
// Reshape_232(Slice_158_0, Reshape_232_0)
func_Reshape_231(((float*)((char *)pool_base_ptr + 23398400)), ((float*)((char *)pool_base_ptr + 23372800)), ctx);
// Reshape_233(Slice_159_0, Reshape_233_0)
func_Reshape_231(((float*)((char *)pool_base_ptr + 23424000)), ((float*)((char *)pool_base_ptr + 23398400)), ctx);
// Reshape_234(Slice_160_0, Reshape_234_0)
func_Reshape_231(((float*)((char *)pool_base_ptr + 23449600)), ((float*)((char *)pool_base_ptr + 23424000)), ctx);
// Reshape_235(Slice_161_0, Reshape_235_0)
func_Reshape_231(((float*)((char *)pool_base_ptr + 23475200)), ((float*)((char *)pool_base_ptr + 23449600)), ctx);
// Reshape_236(Slice_162_0, Reshape_236_0)
func_Reshape_231(((float*)((char *)pool_base_ptr + 23500800)), ((float*)((char *)pool_base_ptr + 23475200)), ctx);
// Reshape_237(Slice_163_0, Reshape_237_0)
func_Reshape_231(((float*)((char *)pool_base_ptr + 23526400)), ((float*)((char *)pool_base_ptr + 23500800)), ctx);
// Reshape_238(Slice_164_0, Reshape_238_0)
func_Reshape_231(((float*)((char *)pool_base_ptr + 23552000)), ((float*)((char *)pool_base_ptr + 23526400)), ctx);
// Reshape_239(Slice_165_0, Reshape_239_0)
func_Reshape_231(((float*)((char *)pool_base_ptr + 23577600)), ((float*)((char *)pool_base_ptr + 23552000)), ctx);
// Reshape_240(Slice_166_0, Reshape_240_0)
func_Reshape_231(((float*)((char *)pool_base_ptr + 23603200)), ((float*)((char *)pool_base_ptr + 23577600)), ctx);
// Concat_1019(Broadcast_170_0, Broadcast_174_0, Concat_1019_0)
func_Concat_1019(((float*)((char *)pool_base_ptr + 23654400)), ((float*)((char *)pool_base_ptr + 23628800)), ((float*)((char *)pool_base_ptr + 23756800)), ctx);
// Concat_1018(Broadcast_168_0, Broadcast_167_0, Concat_1018_0)
func_Concat_1019(((float*)((char *)pool_base_ptr + 23705600)), ((float*)((char *)pool_base_ptr + 23680000)), ((float*)((char *)pool_base_ptr + 23603200)), ctx);
// Concat_989(Reshape_240_0, Reshape_231_0, Reshape_232_0, Reshape_233_0, Reshape_234_0, Reshape_235_0, Reshape_236_0, Reshape_237_0, Reshape_238_0, Reshape_239_0, Concat_989_0)
{
gpu::invoke_primitive(ctx, 15, std::vector<void*>{((float*)((char *)pool_base_ptr + 23577600)), ((float*)((char *)pool_base_ptr + 23731200)), ((float*)((char *)pool_base_ptr + 23372800)), ((float*)((char *)pool_base_ptr + 23398400)), ((float*)((char *)pool_base_ptr + 23424000)), ((float*)((char *)pool_base_ptr + 23449600)), ((float*)((char *)pool_base_ptr + 23475200)), ((float*)((char *)pool_base_ptr + 23500800)), ((float*)((char *)pool_base_ptr + 23526400)), ((float*)((char *)pool_base_ptr + 23552000))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23808000))}.data());
}
// Rnn_1024(Concat_989_0, Concat_1018_0, Concat_1020_0, Concat_1021_0, Concat_1022_0, Concat_1023_0, Concat_1019_0, Rnn_1024_0, Rnn_1024_1, Rnn_1024_2)
gpu::invoke_primitive(ctx, 16, std::vector<void*>{((float*)((char *)pool_base_ptr + 23808000)), ((float*)((char *)pool_base_ptr + 23603200)), ((float*)((char *)pool_base_ptr + 22092800)), ((float*)((char *)pool_base_ptr + 20806400)), ((float*)((char *)pool_base_ptr + 22086400)), ((float*)((char *)pool_base_ptr + 20800000)), ((float*)((char *)pool_base_ptr + 23756800))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 24064000)), ((float*)((char *)pool_base_ptr + 23424000)), ((float*)((char *)pool_base_ptr + 23372800))}.data());
// GetOutputElement_1025(Rnn_1024_0, Rnn_1024_1, Rnn_1024_2, GetOutputElement_1025_0)
{
runtime::gpu::cuda_memcpyDtH(((float*)((char *)pool_base_ptr + 20800000)), ((float*)((char *)pool_base_ptr + 24064000)), 256000);
}
// Slice_981(GetOutputElement_1025_0, Slice_981_0)
{
gpu::invoke_primitive(ctx, 17, std::vector<void*>{((float*)((char *)pool_base_ptr + 20800000))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 21056000))}.data());
}
// Slice_979(GetOutputElement_1025_0, Slice_979_0)
{
gpu::invoke_primitive(ctx, 18, std::vector<void*>{((float*)((char *)pool_base_ptr + 20800000))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 21081600))}.data());
}
// Slice_988(GetOutputElement_1025_0, Slice_988_0)
{
gpu::invoke_primitive(ctx, 19, std::vector<void*>{((float*)((char *)pool_base_ptr + 20800000))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 21107200))}.data());
}
// Slice_987(GetOutputElement_1025_0, Slice_987_0)
{
gpu::invoke_primitive(ctx, 20, std::vector<void*>{((float*)((char *)pool_base_ptr + 20800000))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 21132800))}.data());
}
// Slice_986(GetOutputElement_1025_0, Slice_986_0)
{
gpu::invoke_primitive(ctx, 21, std::vector<void*>{((float*)((char *)pool_base_ptr + 20800000))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 21158400))}.data());
}
// Slice_985(GetOutputElement_1025_0, Slice_985_0)
{
gpu::invoke_primitive(ctx, 22, std::vector<void*>{((float*)((char *)pool_base_ptr + 20800000))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 21184000))}.data());
}
// Slice_984(GetOutputElement_1025_0, Slice_984_0)
{
gpu::invoke_primitive(ctx, 23, std::vector<void*>{((float*)((char *)pool_base_ptr + 20800000))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 21209600))}.data());
}
// Slice_983(GetOutputElement_1025_0, Slice_983_0)
{
gpu::invoke_primitive(ctx, 24, std::vector<void*>{((float*)((char *)pool_base_ptr + 20800000))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 21235200))}.data());
}
// Slice_980(GetOutputElement_1025_0, Slice_980_0)
{
gpu::invoke_primitive(ctx, 25, std::vector<void*>{((float*)((char *)pool_base_ptr + 20800000))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 21260800))}.data());
}
// Slice_982(GetOutputElement_1025_0, Slice_982_0)
{
gpu::invoke_primitive(ctx, 26, std::vector<void*>{((float*)((char *)pool_base_ptr + 20800000))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 21286400))}.data());
}
// Reshape_444(Slice_981_0, Reshape_444_0)
func_Reshape_231(((float*)((char *)pool_base_ptr + 21056000)), ((float*)((char *)pool_base_ptr + 20800000)), ctx);
// Reshape_338(Slice_979_0, Reshape_338_0)
func_Reshape_231(((float*)((char *)pool_base_ptr + 21081600)), ((float*)((char *)pool_base_ptr + 20825600)), ctx);
// Reshape_790(Slice_988_0, Reshape_790_0)
func_Reshape_231(((float*)((char *)pool_base_ptr + 21107200)), ((float*)((char *)pool_base_ptr + 20851200)), ctx);
// Reshape_762(Slice_987_0, Reshape_762_0)
func_Reshape_231(((float*)((char *)pool_base_ptr + 21132800)), ((float*)((char *)pool_base_ptr + 20876800)), ctx);
// Reshape_709(Slice_986_0, Reshape_709_0)
func_Reshape_231(((float*)((char *)pool_base_ptr + 21158400)), ((float*)((char *)pool_base_ptr + 20902400)), ctx);
// Reshape_656(Slice_985_0, Reshape_656_0)
func_Reshape_231(((float*)((char *)pool_base_ptr + 21184000)), ((float*)((char *)pool_base_ptr + 20928000)), ctx);
// Reshape_603(Slice_984_0, Reshape_603_0)
func_Reshape_231(((float*)((char *)pool_base_ptr + 21209600)), ((float*)((char *)pool_base_ptr + 20953600)), ctx);
// Reshape_550(Slice_983_0, Reshape_550_0)
func_Reshape_231(((float*)((char *)pool_base_ptr + 21235200)), ((float*)((char *)pool_base_ptr + 20979200)), ctx);
// Reshape_391(Slice_980_0, Reshape_391_0)
func_Reshape_231(((float*)((char *)pool_base_ptr + 21260800)), ((float*)((char *)pool_base_ptr + 21004800)), ctx);
// Reshape_497(Slice_982_0, Reshape_497_0)
func_Reshape_231(((float*)((char *)pool_base_ptr + 21286400)), ((float*)((char *)pool_base_ptr + 21030400)), ctx);
// Concat_791(Reshape_338_0, Reshape_391_0, Reshape_444_0, Reshape_497_0, Reshape_550_0, Reshape_603_0, Reshape_656_0, Reshape_709_0, Reshape_762_0, Reshape_790_0, Concat_791_0)
{
gpu::invoke_primitive(ctx, 27, std::vector<void*>{((float*)((char *)pool_base_ptr + 20825600)), ((float*)((char *)pool_base_ptr + 21004800)), ((float*)((char *)pool_base_ptr + 20800000)), ((float*)((char *)pool_base_ptr + 21030400)), ((float*)((char *)pool_base_ptr + 20979200)), ((float*)((char *)pool_base_ptr + 20953600)), ((float*)((char *)pool_base_ptr + 20928000)), ((float*)((char *)pool_base_ptr + 20902400)), ((float*)((char *)pool_base_ptr + 20876800)), ((float*)((char *)pool_base_ptr + 20851200))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 21056000))}.data());
}
// Reshape_792(Concat_791_0, Reshape_792_0)
{
runtime::gpu::cuda_memcpyDtD(((float*)((char *)pool_base_ptr + 20800000)), ((float*)((char *)pool_base_ptr + 21056000)), 64000 * 4);
}
// Dot_793(Reshape_792_0, Reshape_76_0, Dot_793_0)
{
const float alpha = 1.0;
const float beta = 0.0;
int m = 320;
int n = 10000;
int k = 200;
CUBLAS_SAFE_CALL(cublasSetPointerMode(*ctx->cublas_handle, CUBLAS_POINTER_MODE_HOST));
CUBLAS_SAFE_CALL(cublasSgemm(*ctx->cublas_handle,CUBLAS_OP_N,CUBLAS_OP_N,n,m,k,&alpha,((float*)((char *)pool_base_ptr + 12800000)),n,((float*)((char *)pool_base_ptr + 20800000)),k,&beta,((float*)((char *)pool_base_ptr + 21056000)),n));
CUBLAS_SAFE_CALL(cublasSetPointerMode(*ctx->cublas_handle, CUBLAS_POINTER_MODE_DEVICE));
}
// Add_794(Dot_793_0, Broadcast_75_0, Add_794_0)
{
gpu::invoke_primitive(ctx, 28, std::vector<void*>{((float*)((char *)pool_base_ptr + 21056000)), ((float*)((char *)pool_base_ptr + 0))}.data(), std::vector<void*>{((float*)(outputs[0]))}.data());
}
// Result_795(Add_794_0, Result_795_0)
{
runtime::gpu::cuda_memcpyDtD(((float*)(outputs[0])), ((float*)(outputs[0])), 3200000 * 4);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment