Created
August 28, 2018 21:10
-
-
Save csullivan/5681b7f5c83592e589b13f204d8987aa to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Generated by the nGraph GPU backend | |
#include <cublas_v2.h> | |
#include <cuda.h> | |
#include <cuda_runtime.h> | |
#include <cudnn.h> | |
#include "ngraph/descriptor/input.hpp" | |
#include "ngraph/descriptor/layout/dense_tensor_view_layout.hpp" | |
#include "ngraph/descriptor/output.hpp" | |
#include "ngraph/descriptor/primary_tensor_view.hpp" | |
#include "ngraph/file_util.hpp" | |
#include "ngraph/function.hpp" | |
#include "ngraph/graph_util.hpp" | |
#include "ngraph/node.hpp" | |
#include "ngraph/pass/assign_layout.hpp" | |
#include "ngraph/pass/dump_sorted.hpp" | |
#include "ngraph/pass/liveness.hpp" | |
#include "ngraph/pass/manager.hpp" | |
#include "ngraph/pass/memory_layout.hpp" | |
#include "ngraph/runtime/aligned_buffer.hpp" | |
#include "ngraph/runtime/gpu/cudnn_descriptors.hpp" | |
#include "ngraph/runtime/gpu/gpu_cuda_kernel_ops.hpp" | |
#include "ngraph/runtime/gpu/gpu_invoke.hpp" | |
#include "ngraph/runtime/gpu/gpu_runtime_context.hpp" | |
#include "ngraph/runtime/gpu/gpu_util.hpp" | |
#include "ngraph/util.hpp" | |
using namespace ngraph; | |
using namespace ngraph::runtime; | |
using namespace std; | |
void *__dso_handle = 0; | |
static gpu::GPURuntimeContext* m_runtime_context = nullptr; | |
// Declare all constants | |
static size_t Constant_18_0_idx = 88; | |
static float* Constant_18_0 = nullptr; | |
static size_t Constant_14_0_idx = 89; | |
static float* Constant_14_0 = nullptr; | |
static size_t Constant_11_0_idx = 90; | |
static float* Constant_11_0 = nullptr; | |
static size_t Constant_12_0_idx = 91; | |
static float* Constant_12_0 = nullptr; | |
static bool is_constant_mem_ptr_null = true; | |
static void invoke_constant_mem_ptr() | |
{ | |
if(is_constant_mem_ptr_null) | |
{ | |
Constant_18_0 = reinterpret_cast<float*>(runtime::gpu::invoke_memory_primitive(m_runtime_context, Constant_18_0_idx)); | |
Constant_14_0 = reinterpret_cast<float*>(runtime::gpu::invoke_memory_primitive(m_runtime_context, Constant_14_0_idx)); | |
Constant_11_0 = reinterpret_cast<float*>(runtime::gpu::invoke_memory_primitive(m_runtime_context, Constant_11_0_idx)); | |
Constant_12_0 = reinterpret_cast<float*>(runtime::gpu::invoke_memory_primitive(m_runtime_context, Constant_12_0_idx)); | |
is_constant_mem_ptr_null = false; | |
} | |
} | |
// Declare all functions | |
extern "C" void Function_0(void** inputs, void** outputs, gpu::GPURuntimeContext* ctx); | |
static void func_Concat_1023( | |
float* _arg0, | |
float* _arg1, | |
float* _out2, | |
gpu::GPURuntimeContext* ctx | |
) | |
{ | |
gpu::invoke_primitive(ctx, 1, std::vector<void*>{_arg0, _arg1}.data(), std::vector<void*>{_out2}.data()); | |
} | |
static void func_Concat_1021( | |
float* _arg0, | |
float* _arg1, | |
float* _out2, | |
gpu::GPURuntimeContext* ctx | |
) | |
{ | |
gpu::invoke_primitive(ctx, 2, std::vector<void*>{_arg0, _arg1}.data(), std::vector<void*>{_out2}.data()); | |
} | |
static void func_Broadcast_174( | |
float* _arg0, | |
float* _out1, | |
gpu::GPURuntimeContext* ctx | |
) | |
{ | |
gpu::invoke_primitive(ctx, 13, std::vector<void*>{_arg0}.data(), std::vector<void*>{_out1}.data()); | |
} | |
static void func_Reshape_231( | |
float* _arg0, | |
float* _out1, | |
gpu::GPURuntimeContext* ctx | |
) | |
{ | |
runtime::gpu::cuda_memcpyDtD(_out1, _arg0, 6400 * 4); | |
} | |
static void func_Concat_1019( | |
float* _arg0, | |
float* _arg1, | |
float* _out2, | |
gpu::GPURuntimeContext* ctx | |
) | |
{ | |
gpu::invoke_primitive(ctx, 14, std::vector<void*>{_arg0, _arg1}.data(), std::vector<void*>{_out2}.data()); | |
} | |
extern "C" void Function_0(void** inputs, void** outputs, gpu::GPURuntimeContext* ctx) | |
{ | |
m_runtime_context = ctx; | |
invoke_constant_mem_ptr(); | |
// Allocate the memory pool | |
void* pool_base_ptr = ngraph::runtime::gpu::invoke_memory_primitive(ctx, 0); | |
// Broadcast_75(Parameter_0_0, Broadcast_75_0) | |
gpu::invoke_primitive(ctx, 0, std::vector<void*>{((float*)(inputs[10]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 0))}.data()); | |
// Reshape_76(Parameter_1_0, Reshape_76_0) | |
{ | |
const float alpha = 1.0; | |
const float beta = 0; | |
CUBLAS_SAFE_CALL(cublasSetPointerMode(*ctx->cublas_handle, CUBLAS_POINTER_MODE_HOST)); | |
CUBLAS_SAFE_CALL(cublasSgeam(*ctx->cublas_handle,CUBLAS_OP_T,CUBLAS_OP_T,10000,200,&alpha,((float*)(inputs[9])),200,&beta,((float*)(inputs[9])),200,((float*)((char *)pool_base_ptr + 12800000)),10000)); | |
CUBLAS_SAFE_CALL(cublasSetPointerMode(*ctx->cublas_handle, CUBLAS_POINTER_MODE_DEVICE)); | |
} | |
// Concat_1023(Parameter_6_0, Parameter_2_0, Concat_1023_0) | |
func_Concat_1023(((float*)(inputs[4])), ((float*)(inputs[8])), ((float*)((char *)pool_base_ptr + 20800000)), ctx); | |
// Concat_1021(Parameter_7_0, Parameter_3_0, Concat_1021_0) | |
func_Concat_1021(((float*)(inputs[3])), ((float*)(inputs[7])), ((float*)((char *)pool_base_ptr + 20806400)), ctx); | |
// Concat_1022(Parameter_8_0, Parameter_4_0, Concat_1022_0) | |
func_Concat_1023(((float*)(inputs[2])), ((float*)(inputs[6])), ((float*)((char *)pool_base_ptr + 22086400)), ctx); | |
// Concat_1020(Parameter_9_0, Parameter_5_0, Concat_1020_0) | |
func_Concat_1021(((float*)(inputs[1])), ((float*)(inputs[5])), ((float*)((char *)pool_base_ptr + 22092800)), ctx); | |
// Slice_157(Parameter_10_0, Slice_157_0) | |
{ | |
gpu::invoke_primitive(ctx, 3, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23372800))}.data()); | |
} | |
// Slice_158(Parameter_10_0, Slice_158_0) | |
{ | |
gpu::invoke_primitive(ctx, 4, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23398400))}.data()); | |
} | |
// Slice_159(Parameter_10_0, Slice_159_0) | |
{ | |
gpu::invoke_primitive(ctx, 5, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23424000))}.data()); | |
} | |
// Slice_160(Parameter_10_0, Slice_160_0) | |
{ | |
gpu::invoke_primitive(ctx, 6, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23449600))}.data()); | |
} | |
// Slice_161(Parameter_10_0, Slice_161_0) | |
{ | |
gpu::invoke_primitive(ctx, 7, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23475200))}.data()); | |
} | |
// Slice_162(Parameter_10_0, Slice_162_0) | |
{ | |
gpu::invoke_primitive(ctx, 8, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23500800))}.data()); | |
} | |
// Slice_163(Parameter_10_0, Slice_163_0) | |
{ | |
gpu::invoke_primitive(ctx, 9, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23526400))}.data()); | |
} | |
// Slice_164(Parameter_10_0, Slice_164_0) | |
{ | |
gpu::invoke_primitive(ctx, 10, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23552000))}.data()); | |
} | |
// Slice_165(Parameter_10_0, Slice_165_0) | |
{ | |
gpu::invoke_primitive(ctx, 11, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23577600))}.data()); | |
} | |
// Slice_166(Parameter_10_0, Slice_166_0) | |
{ | |
gpu::invoke_primitive(ctx, 12, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23603200))}.data()); | |
} | |
// Broadcast_174(Constant_18_0, Broadcast_174_0) | |
func_Broadcast_174(Constant_18_0, ((float*)((char *)pool_base_ptr + 23628800)), ctx); | |
// Broadcast_170(Constant_14_0, Broadcast_170_0) | |
func_Broadcast_174(Constant_14_0, ((float*)((char *)pool_base_ptr + 23654400)), ctx); | |
// Broadcast_167(Constant_11_0, Broadcast_167_0) | |
func_Broadcast_174(Constant_11_0, ((float*)((char *)pool_base_ptr + 23680000)), ctx); | |
// Broadcast_168(Constant_12_0, Broadcast_168_0) | |
func_Broadcast_174(Constant_12_0, ((float*)((char *)pool_base_ptr + 23705600)), ctx); | |
// Reshape_231(Slice_157_0, Reshape_231_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 23372800)), ((float*)((char *)pool_base_ptr + 23731200)), ctx); | |
// Reshape_232(Slice_158_0, Reshape_232_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 23398400)), ((float*)((char *)pool_base_ptr + 23372800)), ctx); | |
// Reshape_233(Slice_159_0, Reshape_233_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 23424000)), ((float*)((char *)pool_base_ptr + 23398400)), ctx); | |
// Reshape_234(Slice_160_0, Reshape_234_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 23449600)), ((float*)((char *)pool_base_ptr + 23424000)), ctx); | |
// Reshape_235(Slice_161_0, Reshape_235_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 23475200)), ((float*)((char *)pool_base_ptr + 23449600)), ctx); | |
// Reshape_236(Slice_162_0, Reshape_236_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 23500800)), ((float*)((char *)pool_base_ptr + 23475200)), ctx); | |
// Reshape_237(Slice_163_0, Reshape_237_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 23526400)), ((float*)((char *)pool_base_ptr + 23500800)), ctx); | |
// Reshape_238(Slice_164_0, Reshape_238_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 23552000)), ((float*)((char *)pool_base_ptr + 23526400)), ctx); | |
// Reshape_239(Slice_165_0, Reshape_239_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 23577600)), ((float*)((char *)pool_base_ptr + 23552000)), ctx); | |
// Reshape_240(Slice_166_0, Reshape_240_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 23603200)), ((float*)((char *)pool_base_ptr + 23577600)), ctx); | |
// Concat_1019(Broadcast_170_0, Broadcast_174_0, Concat_1019_0) | |
func_Concat_1019(((float*)((char *)pool_base_ptr + 23654400)), ((float*)((char *)pool_base_ptr + 23628800)), ((float*)((char *)pool_base_ptr + 23756800)), ctx); | |
// Concat_1018(Broadcast_168_0, Broadcast_167_0, Concat_1018_0) | |
func_Concat_1019(((float*)((char *)pool_base_ptr + 23705600)), ((float*)((char *)pool_base_ptr + 23680000)), ((float*)((char *)pool_base_ptr + 23603200)), ctx); | |
// Concat_989(Reshape_240_0, Reshape_231_0, Reshape_232_0, Reshape_233_0, Reshape_234_0, Reshape_235_0, Reshape_236_0, Reshape_237_0, Reshape_238_0, Reshape_239_0, Concat_989_0) | |
{ | |
gpu::invoke_primitive(ctx, 15, std::vector<void*>{((float*)((char *)pool_base_ptr + 23577600)), ((float*)((char *)pool_base_ptr + 23731200)), ((float*)((char *)pool_base_ptr + 23372800)), ((float*)((char *)pool_base_ptr + 23398400)), ((float*)((char *)pool_base_ptr + 23424000)), ((float*)((char *)pool_base_ptr + 23449600)), ((float*)((char *)pool_base_ptr + 23475200)), ((float*)((char *)pool_base_ptr + 23500800)), ((float*)((char *)pool_base_ptr + 23526400)), ((float*)((char *)pool_base_ptr + 23552000))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23808000))}.data()); | |
} | |
// Rnn_1024(Concat_989_0, Concat_1018_0, Concat_1020_0, Concat_1021_0, Concat_1022_0, Concat_1023_0, Concat_1019_0, Rnn_1024_0, Rnn_1024_1, Rnn_1024_2) | |
gpu::invoke_primitive(ctx, 16, std::vector<void*>{((float*)((char *)pool_base_ptr + 23808000)), ((float*)((char *)pool_base_ptr + 23603200)), ((float*)((char *)pool_base_ptr + 22092800)), ((float*)((char *)pool_base_ptr + 20806400)), ((float*)((char *)pool_base_ptr + 22086400)), ((float*)((char *)pool_base_ptr + 20800000)), ((float*)((char *)pool_base_ptr + 23756800))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 24064000)), ((float*)((char *)pool_base_ptr + 23424000)), ((float*)((char *)pool_base_ptr + 23372800))}.data()); | |
// GetOutputElement_1025(Rnn_1024_0, Rnn_1024_1, Rnn_1024_2, GetOutputElement_1025_0) | |
{ | |
runtime::gpu::cuda_memcpyDtH(((float*)((char *)pool_base_ptr + 20800000)), ((float*)((char *)pool_base_ptr + 24064000)), 256000); | |
} | |
// Slice_981(GetOutputElement_1025_0, Slice_981_0) | |
{ | |
gpu::invoke_primitive(ctx, 17, std::vector<void*>{((float*)((char *)pool_base_ptr + 20800000))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 21056000))}.data()); | |
} | |
// Slice_979(GetOutputElement_1025_0, Slice_979_0) | |
{ | |
gpu::invoke_primitive(ctx, 18, std::vector<void*>{((float*)((char *)pool_base_ptr + 20800000))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 21081600))}.data()); | |
} | |
// Slice_988(GetOutputElement_1025_0, Slice_988_0) | |
{ | |
gpu::invoke_primitive(ctx, 19, std::vector<void*>{((float*)((char *)pool_base_ptr + 20800000))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 21107200))}.data()); | |
} | |
// Slice_987(GetOutputElement_1025_0, Slice_987_0) | |
{ | |
gpu::invoke_primitive(ctx, 20, std::vector<void*>{((float*)((char *)pool_base_ptr + 20800000))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 21132800))}.data()); | |
} | |
// Slice_986(GetOutputElement_1025_0, Slice_986_0) | |
{ | |
gpu::invoke_primitive(ctx, 21, std::vector<void*>{((float*)((char *)pool_base_ptr + 20800000))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 21158400))}.data()); | |
} | |
// Slice_985(GetOutputElement_1025_0, Slice_985_0) | |
{ | |
gpu::invoke_primitive(ctx, 22, std::vector<void*>{((float*)((char *)pool_base_ptr + 20800000))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 21184000))}.data()); | |
} | |
// Slice_984(GetOutputElement_1025_0, Slice_984_0) | |
{ | |
gpu::invoke_primitive(ctx, 23, std::vector<void*>{((float*)((char *)pool_base_ptr + 20800000))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 21209600))}.data()); | |
} | |
// Slice_983(GetOutputElement_1025_0, Slice_983_0) | |
{ | |
gpu::invoke_primitive(ctx, 24, std::vector<void*>{((float*)((char *)pool_base_ptr + 20800000))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 21235200))}.data()); | |
} | |
// Slice_980(GetOutputElement_1025_0, Slice_980_0) | |
{ | |
gpu::invoke_primitive(ctx, 25, std::vector<void*>{((float*)((char *)pool_base_ptr + 20800000))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 21260800))}.data()); | |
} | |
// Slice_982(GetOutputElement_1025_0, Slice_982_0) | |
{ | |
gpu::invoke_primitive(ctx, 26, std::vector<void*>{((float*)((char *)pool_base_ptr + 20800000))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 21286400))}.data()); | |
} | |
// Reshape_444(Slice_981_0, Reshape_444_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 21056000)), ((float*)((char *)pool_base_ptr + 20800000)), ctx); | |
// Reshape_338(Slice_979_0, Reshape_338_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 21081600)), ((float*)((char *)pool_base_ptr + 20825600)), ctx); | |
// Reshape_790(Slice_988_0, Reshape_790_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 21107200)), ((float*)((char *)pool_base_ptr + 20851200)), ctx); | |
// Reshape_762(Slice_987_0, Reshape_762_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 21132800)), ((float*)((char *)pool_base_ptr + 20876800)), ctx); | |
// Reshape_709(Slice_986_0, Reshape_709_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 21158400)), ((float*)((char *)pool_base_ptr + 20902400)), ctx); | |
// Reshape_656(Slice_985_0, Reshape_656_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 21184000)), ((float*)((char *)pool_base_ptr + 20928000)), ctx); | |
// Reshape_603(Slice_984_0, Reshape_603_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 21209600)), ((float*)((char *)pool_base_ptr + 20953600)), ctx); | |
// Reshape_550(Slice_983_0, Reshape_550_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 21235200)), ((float*)((char *)pool_base_ptr + 20979200)), ctx); | |
// Reshape_391(Slice_980_0, Reshape_391_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 21260800)), ((float*)((char *)pool_base_ptr + 21004800)), ctx); | |
// Reshape_497(Slice_982_0, Reshape_497_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 21286400)), ((float*)((char *)pool_base_ptr + 21030400)), ctx); | |
// Concat_791(Reshape_338_0, Reshape_391_0, Reshape_444_0, Reshape_497_0, Reshape_550_0, Reshape_603_0, Reshape_656_0, Reshape_709_0, Reshape_762_0, Reshape_790_0, Concat_791_0) | |
{ | |
gpu::invoke_primitive(ctx, 27, std::vector<void*>{((float*)((char *)pool_base_ptr + 20825600)), ((float*)((char *)pool_base_ptr + 21004800)), ((float*)((char *)pool_base_ptr + 20800000)), ((float*)((char *)pool_base_ptr + 21030400)), ((float*)((char *)pool_base_ptr + 20979200)), ((float*)((char *)pool_base_ptr + 20953600)), ((float*)((char *)pool_base_ptr + 20928000)), ((float*)((char *)pool_base_ptr + 20902400)), ((float*)((char *)pool_base_ptr + 20876800)), ((float*)((char *)pool_base_ptr + 20851200))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 21056000))}.data()); | |
} | |
// Reshape_792(Concat_791_0, Reshape_792_0) | |
{ | |
runtime::gpu::cuda_memcpyDtD(((float*)((char *)pool_base_ptr + 20800000)), ((float*)((char *)pool_base_ptr + 21056000)), 64000 * 4); | |
} | |
// Dot_793(Reshape_792_0, Reshape_76_0, Dot_793_0) | |
{ | |
const float alpha = 1.0; | |
const float beta = 0.0; | |
int m = 320; | |
int n = 10000; | |
int k = 200; | |
CUBLAS_SAFE_CALL(cublasSetPointerMode(*ctx->cublas_handle, CUBLAS_POINTER_MODE_HOST)); | |
CUBLAS_SAFE_CALL(cublasSgemm(*ctx->cublas_handle,CUBLAS_OP_N,CUBLAS_OP_N,n,m,k,&alpha,((float*)((char *)pool_base_ptr + 12800000)),n,((float*)((char *)pool_base_ptr + 20800000)),k,&beta,((float*)((char *)pool_base_ptr + 21056000)),n)); | |
CUBLAS_SAFE_CALL(cublasSetPointerMode(*ctx->cublas_handle, CUBLAS_POINTER_MODE_DEVICE)); | |
} | |
// Add_794(Dot_793_0, Broadcast_75_0, Add_794_0) | |
{ | |
gpu::invoke_primitive(ctx, 28, std::vector<void*>{((float*)((char *)pool_base_ptr + 21056000)), ((float*)((char *)pool_base_ptr + 0))}.data(), std::vector<void*>{((float*)(outputs[0]))}.data()); | |
} | |
// Result_795(Add_794_0, Result_795_0) | |
{ | |
runtime::gpu::cuda_memcpyDtD(((float*)(outputs[0])), ((float*)(outputs[0])), 3200000 * 4); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment