Created
August 28, 2018 21:13
-
-
Save csullivan/4637382efbb3b4a34cdce0729cf21e41 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Generated by the nGraph GPU backend | |
#include <cublas_v2.h> | |
#include <cuda.h> | |
#include <cuda_runtime.h> | |
#include <cudnn.h> | |
#include "ngraph/descriptor/input.hpp" | |
#include "ngraph/descriptor/layout/dense_tensor_view_layout.hpp" | |
#include "ngraph/descriptor/output.hpp" | |
#include "ngraph/descriptor/primary_tensor_view.hpp" | |
#include "ngraph/file_util.hpp" | |
#include "ngraph/function.hpp" | |
#include "ngraph/graph_util.hpp" | |
#include "ngraph/node.hpp" | |
#include "ngraph/pass/assign_layout.hpp" | |
#include "ngraph/pass/dump_sorted.hpp" | |
#include "ngraph/pass/liveness.hpp" | |
#include "ngraph/pass/manager.hpp" | |
#include "ngraph/pass/memory_layout.hpp" | |
#include "ngraph/runtime/aligned_buffer.hpp" | |
#include "ngraph/runtime/gpu/cudnn_descriptors.hpp" | |
#include "ngraph/runtime/gpu/gpu_cuda_kernel_ops.hpp" | |
#include "ngraph/runtime/gpu/gpu_invoke.hpp" | |
#include "ngraph/runtime/gpu/gpu_runtime_context.hpp" | |
#include "ngraph/runtime/gpu/gpu_util.hpp" | |
#include "ngraph/util.hpp" | |
using namespace ngraph; | |
using namespace ngraph::runtime; | |
using namespace std; | |
void *__dso_handle = 0; | |
static gpu::GPURuntimeContext* m_runtime_context = nullptr; | |
// Declare all constants | |
static size_t Constant_18_0_idx = 47; | |
static float* Constant_18_0 = nullptr; | |
static size_t Constant_14_0_idx = 48; | |
static float* Constant_14_0 = nullptr; | |
static size_t Constant_11_0_idx = 49; | |
static float* Constant_11_0 = nullptr; | |
static size_t Constant_12_0_idx = 50; | |
static float* Constant_12_0 = nullptr; | |
static bool is_constant_mem_ptr_null = true; | |
static void invoke_constant_mem_ptr() | |
{ | |
if(is_constant_mem_ptr_null) | |
{ | |
Constant_18_0 = reinterpret_cast<float*>(runtime::gpu::invoke_memory_primitive(m_runtime_context, Constant_18_0_idx)); | |
Constant_14_0 = reinterpret_cast<float*>(runtime::gpu::invoke_memory_primitive(m_runtime_context, Constant_14_0_idx)); | |
Constant_11_0 = reinterpret_cast<float*>(runtime::gpu::invoke_memory_primitive(m_runtime_context, Constant_11_0_idx)); | |
Constant_12_0 = reinterpret_cast<float*>(runtime::gpu::invoke_memory_primitive(m_runtime_context, Constant_12_0_idx)); | |
is_constant_mem_ptr_null = false; | |
} | |
} | |
// Declare all functions | |
extern "C" void Function_0(void** inputs, void** outputs, gpu::GPURuntimeContext* ctx); | |
static void func_Concat_1024( | |
float* _arg0, | |
float* _arg1, | |
float* _out2, | |
gpu::GPURuntimeContext* ctx | |
) | |
{ | |
gpu::invoke_primitive(ctx, 1, std::vector<void*>{_arg0, _arg1}.data(), std::vector<void*>{_out2}.data()); | |
} | |
static void func_Concat_1022( | |
float* _arg0, | |
float* _arg1, | |
float* _out2, | |
gpu::GPURuntimeContext* ctx | |
) | |
{ | |
gpu::invoke_primitive(ctx, 2, std::vector<void*>{_arg0, _arg1}.data(), std::vector<void*>{_out2}.data()); | |
} | |
static void func_Broadcast_174( | |
float* _arg0, | |
float* _out1, | |
gpu::GPURuntimeContext* ctx | |
) | |
{ | |
gpu::invoke_primitive(ctx, 13, std::vector<void*>{_arg0}.data(), std::vector<void*>{_out1}.data()); | |
} | |
static void func_Reshape_231( | |
float* _arg0, | |
float* _out1, | |
gpu::GPURuntimeContext* ctx | |
) | |
{ | |
runtime::gpu::cuda_memcpyDtD(_out1, _arg0, 6400 * 4); | |
} | |
static void func_Concat_1020( | |
float* _arg0, | |
float* _arg1, | |
float* _out2, | |
gpu::GPURuntimeContext* ctx | |
) | |
{ | |
gpu::invoke_primitive(ctx, 14, std::vector<void*>{_arg0, _arg1}.data(), std::vector<void*>{_out2}.data()); | |
} | |
static void func_Reshape_1012( | |
float* _arg0, | |
float* _out1, | |
gpu::GPURuntimeContext* ctx | |
) | |
{ | |
runtime::gpu::cuda_memcpyDtD(_out1, _arg0, 64000 * 4); | |
} | |
extern "C" void Function_0(void** inputs, void** outputs, gpu::GPURuntimeContext* ctx) | |
{ | |
m_runtime_context = ctx; | |
invoke_constant_mem_ptr(); | |
// Allocate the memory pool | |
void* pool_base_ptr = ngraph::runtime::gpu::invoke_memory_primitive(ctx, 0); | |
// Broadcast_75(Parameter_0_0, Broadcast_75_0) | |
gpu::invoke_primitive(ctx, 0, std::vector<void*>{((float*)(inputs[10]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 0))}.data()); | |
// Reshape_76(Parameter_1_0, Reshape_76_0) | |
{ | |
const float alpha = 1.0; | |
const float beta = 0; | |
CUBLAS_SAFE_CALL(cublasSetPointerMode(*ctx->cublas_handle, CUBLAS_POINTER_MODE_HOST)); | |
CUBLAS_SAFE_CALL(cublasSgeam(*ctx->cublas_handle,CUBLAS_OP_T,CUBLAS_OP_T,10000,200,&alpha,((float*)(inputs[9])),200,&beta,((float*)(inputs[9])),200,((float*)((char *)pool_base_ptr + 12800000)),10000)); | |
CUBLAS_SAFE_CALL(cublasSetPointerMode(*ctx->cublas_handle, CUBLAS_POINTER_MODE_DEVICE)); | |
} | |
// Concat_1024(Parameter_6_0, Parameter_2_0, Concat_1024_0) | |
func_Concat_1024(((float*)(inputs[4])), ((float*)(inputs[8])), ((float*)((char *)pool_base_ptr + 20800000)), ctx); | |
// Concat_1022(Parameter_7_0, Parameter_3_0, Concat_1022_0) | |
func_Concat_1022(((float*)(inputs[3])), ((float*)(inputs[7])), ((float*)((char *)pool_base_ptr + 20806400)), ctx); | |
// Concat_1023(Parameter_8_0, Parameter_4_0, Concat_1023_0) | |
func_Concat_1024(((float*)(inputs[2])), ((float*)(inputs[6])), ((float*)((char *)pool_base_ptr + 22086400)), ctx); | |
// Concat_1021(Parameter_9_0, Parameter_5_0, Concat_1021_0) | |
func_Concat_1022(((float*)(inputs[1])), ((float*)(inputs[5])), ((float*)((char *)pool_base_ptr + 22092800)), ctx); | |
// Slice_157(Parameter_10_0, Slice_157_0) | |
{ | |
gpu::invoke_primitive(ctx, 3, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23372800))}.data()); | |
} | |
// Slice_158(Parameter_10_0, Slice_158_0) | |
{ | |
gpu::invoke_primitive(ctx, 4, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23398400))}.data()); | |
} | |
// Slice_159(Parameter_10_0, Slice_159_0) | |
{ | |
gpu::invoke_primitive(ctx, 5, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23424000))}.data()); | |
} | |
// Slice_160(Parameter_10_0, Slice_160_0) | |
{ | |
gpu::invoke_primitive(ctx, 6, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23449600))}.data()); | |
} | |
// Slice_161(Parameter_10_0, Slice_161_0) | |
{ | |
gpu::invoke_primitive(ctx, 7, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23475200))}.data()); | |
} | |
// Slice_162(Parameter_10_0, Slice_162_0) | |
{ | |
gpu::invoke_primitive(ctx, 8, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23500800))}.data()); | |
} | |
// Slice_163(Parameter_10_0, Slice_163_0) | |
{ | |
gpu::invoke_primitive(ctx, 9, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23526400))}.data()); | |
} | |
// Slice_164(Parameter_10_0, Slice_164_0) | |
{ | |
gpu::invoke_primitive(ctx, 10, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23552000))}.data()); | |
} | |
// Slice_165(Parameter_10_0, Slice_165_0) | |
{ | |
gpu::invoke_primitive(ctx, 11, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23577600))}.data()); | |
} | |
// Slice_166(Parameter_10_0, Slice_166_0) | |
{ | |
gpu::invoke_primitive(ctx, 12, std::vector<void*>{((float*)(inputs[0]))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23603200))}.data()); | |
} | |
// Broadcast_174(Constant_18_0, Broadcast_174_0) | |
func_Broadcast_174(Constant_18_0, ((float*)((char *)pool_base_ptr + 23628800)), ctx); | |
// Broadcast_170(Constant_14_0, Broadcast_170_0) | |
func_Broadcast_174(Constant_14_0, ((float*)((char *)pool_base_ptr + 23654400)), ctx); | |
// Broadcast_167(Constant_11_0, Broadcast_167_0) | |
func_Broadcast_174(Constant_11_0, ((float*)((char *)pool_base_ptr + 23680000)), ctx); | |
// Broadcast_168(Constant_12_0, Broadcast_168_0) | |
func_Broadcast_174(Constant_12_0, ((float*)((char *)pool_base_ptr + 23705600)), ctx); | |
// Reshape_231(Slice_157_0, Reshape_231_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 23372800)), ((float*)((char *)pool_base_ptr + 23731200)), ctx); | |
// Reshape_232(Slice_158_0, Reshape_232_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 23398400)), ((float*)((char *)pool_base_ptr + 23372800)), ctx); | |
// Reshape_233(Slice_159_0, Reshape_233_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 23424000)), ((float*)((char *)pool_base_ptr + 23398400)), ctx); | |
// Reshape_234(Slice_160_0, Reshape_234_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 23449600)), ((float*)((char *)pool_base_ptr + 23424000)), ctx); | |
// Reshape_235(Slice_161_0, Reshape_235_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 23475200)), ((float*)((char *)pool_base_ptr + 23449600)), ctx); | |
// Reshape_236(Slice_162_0, Reshape_236_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 23500800)), ((float*)((char *)pool_base_ptr + 23475200)), ctx); | |
// Reshape_237(Slice_163_0, Reshape_237_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 23526400)), ((float*)((char *)pool_base_ptr + 23500800)), ctx); | |
// Reshape_238(Slice_164_0, Reshape_238_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 23552000)), ((float*)((char *)pool_base_ptr + 23526400)), ctx); | |
// Reshape_239(Slice_165_0, Reshape_239_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 23577600)), ((float*)((char *)pool_base_ptr + 23552000)), ctx); | |
// Reshape_240(Slice_166_0, Reshape_240_0) | |
func_Reshape_231(((float*)((char *)pool_base_ptr + 23603200)), ((float*)((char *)pool_base_ptr + 23577600)), ctx); | |
// Concat_1020(Broadcast_170_0, Broadcast_174_0, Concat_1020_0) | |
func_Concat_1020(((float*)((char *)pool_base_ptr + 23654400)), ((float*)((char *)pool_base_ptr + 23628800)), ((float*)((char *)pool_base_ptr + 23756800)), ctx); | |
// Concat_1019(Broadcast_168_0, Broadcast_167_0, Concat_1019_0) | |
func_Concat_1020(((float*)((char *)pool_base_ptr + 23705600)), ((float*)((char *)pool_base_ptr + 23680000)), ((float*)((char *)pool_base_ptr + 23603200)), ctx); | |
// Concat_989(Reshape_240_0, Reshape_231_0, Reshape_232_0, Reshape_233_0, Reshape_234_0, Reshape_235_0, Reshape_236_0, Reshape_237_0, Reshape_238_0, Reshape_239_0, Concat_989_0) | |
{ | |
gpu::invoke_primitive(ctx, 15, std::vector<void*>{((float*)((char *)pool_base_ptr + 23577600)), ((float*)((char *)pool_base_ptr + 23731200)), ((float*)((char *)pool_base_ptr + 23372800)), ((float*)((char *)pool_base_ptr + 23398400)), ((float*)((char *)pool_base_ptr + 23424000)), ((float*)((char *)pool_base_ptr + 23449600)), ((float*)((char *)pool_base_ptr + 23475200)), ((float*)((char *)pool_base_ptr + 23500800)), ((float*)((char *)pool_base_ptr + 23526400)), ((float*)((char *)pool_base_ptr + 23552000))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 23808000))}.data()); | |
} | |
// Rnn_1025(Concat_989_0, Concat_1019_0, Concat_1021_0, Concat_1022_0, Concat_1023_0, Concat_1024_0, Concat_1020_0, Rnn_1025_0, Rnn_1025_1, Rnn_1025_2) | |
gpu::invoke_primitive(ctx, 16, std::vector<void*>{((float*)((char *)pool_base_ptr + 23808000)), ((float*)((char *)pool_base_ptr + 23603200)), ((float*)((char *)pool_base_ptr + 22092800)), ((float*)((char *)pool_base_ptr + 20806400)), ((float*)((char *)pool_base_ptr + 22086400)), ((float*)((char *)pool_base_ptr + 20800000)), ((float*)((char *)pool_base_ptr + 23756800))}.data(), std::vector<void*>{((float*)((char *)pool_base_ptr + 24064000)), ((float*)((char *)pool_base_ptr + 23372800)), ((float*)((char *)pool_base_ptr + 23424000))}.data()); | |
// GetOutputElement_1026(Rnn_1025_0, Rnn_1025_1, Rnn_1025_2, GetOutputElement_1026_0) | |
{ | |
runtime::gpu::cuda_memcpyDtH(((float*)((char *)pool_base_ptr + 20800000)), ((float*)((char *)pool_base_ptr + 24064000)), 256000); | |
} | |
// Reshape_1012(GetOutputElement_1026_0, Reshape_1012_0) | |
func_Reshape_1012(((float*)((char *)pool_base_ptr + 20800000)), ((float*)((char *)pool_base_ptr + 21056000)), ctx); | |
// Reshape_792(Reshape_1012_0, Reshape_792_0) | |
func_Reshape_1012(((float*)((char *)pool_base_ptr + 21056000)), ((float*)((char *)pool_base_ptr + 20800000)), ctx); | |
// Dot_793(Reshape_792_0, Reshape_76_0, Dot_793_0) | |
{ | |
const float alpha = 1.0; | |
const float beta = 0.0; | |
int m = 320; | |
int n = 10000; | |
int k = 200; | |
CUBLAS_SAFE_CALL(cublasSetPointerMode(*ctx->cublas_handle, CUBLAS_POINTER_MODE_HOST)); | |
CUBLAS_SAFE_CALL(cublasSgemm(*ctx->cublas_handle,CUBLAS_OP_N,CUBLAS_OP_N,n,m,k,&alpha,((float*)((char *)pool_base_ptr + 12800000)),n,((float*)((char *)pool_base_ptr + 20800000)),k,&beta,((float*)((char *)pool_base_ptr + 21056000)),n)); | |
CUBLAS_SAFE_CALL(cublasSetPointerMode(*ctx->cublas_handle, CUBLAS_POINTER_MODE_DEVICE)); | |
} | |
// Add_794(Dot_793_0, Broadcast_75_0, Add_794_0) | |
{ | |
gpu::invoke_primitive(ctx, 17, std::vector<void*>{((float*)((char *)pool_base_ptr + 21056000)), ((float*)((char *)pool_base_ptr + 0))}.data(), std::vector<void*>{((float*)(outputs[0]))}.data()); | |
} | |
// Result_795(Add_794_0, Result_795_0) | |
{ | |
runtime::gpu::cuda_memcpyDtD(((float*)(outputs[0])), ((float*)(outputs[0])), 3200000 * 4); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment