Skip to content

Instantly share code, notes, and snippets.

View csullivan's full-sized avatar

Chris Sullivan csullivan

  • NVIDIA
  • Portland
View GitHub Profile
[ RUN ] gpu_fusion.fuse_lstm_cells
[DEBUG] 2018-08-27T20:45:02z graph_rewrite.cpp 35 Running matcher Unnamed(Divide_77) on Parameter_109
[DEBUG] 2018-08-27T20:45:02z matcher.cpp 276 [MATCHER] Starting match pattern = Divide_77 , graph_node = Parameter_109
[DEBUG] 2018-08-27T20:45:02z matcher.cpp 150 [MATCHER] in match_node : pattern = Divide_77 matched Parameter_109
[DEBUG] 2018-08-27T20:45:02z graph_rewrite.cpp 35 Running matcher Unnamed(Multiply_107) on Parameter_109
[DEBUG] 2018-08-27T20:45:02z matcher.cpp 276 [MATCHER] Starting match pattern = Multiply_107 , graph_node = Parameter_109
[DEBUG] 2018-08-27T20:45:02z matcher.cpp 150 [MATCHER] in match_node : pattern = Multiply_107 matched Parameter_109
[DEBUG] 2018-08-27T20:45:02z graph_rewrite.cpp 35 Running matcher Unnamed(Divide_77) on Parameter_110
[DEBUG] 2018-08-27T20:45:02z matcher.cpp 276 [MATCHER] Starting match pattern = Divide_77 , graph_node = Parameter_110
[DEBUG] 2018-08-27T20:45:02z matcher.cpp 150 [MATCHER] in match_node : pattern = Divid
// Generated by the nGraph GPU backend
#include <cublas_v2.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cudnn.h>
#include "ngraph/descriptor/input.hpp"
#include "ngraph/descriptor/layout/dense_tensor_view_layout.hpp"
#include "ngraph/descriptor/output.hpp"
// Generated by the nGraph GPU backend
#include <cublas_v2.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cudnn.h>
#include "ngraph/descriptor/input.hpp"
#include "ngraph/descriptor/layout/dense_tensor_view_layout.hpp"
#include "ngraph/descriptor/output.hpp"
diff --git a/src/ngraph/runtime/gpu/cudnn_emitter.cpp b/src/ngraph/runtime/gpu/cudnn_emitter.cpp
index bfdc117a..bf0dece0 100644
--- a/src/ngraph/runtime/gpu/cudnn_emitter.cpp
+++ b/src/ngraph/runtime/gpu/cudnn_emitter.cpp
@@ -1229,14 +1229,16 @@ size_t runtime::gpu::CUDNNEmitter::build_batchnorm(const cudnnBatchNormMode_t& b
const Prop& direction,
const Shape& tensor_shape,
const Shape& param_shape,
- double epsilon)
+ double epsilon,
ALLOC: Parameter_0
ALLOC: Parameter_1
ALLOC: Parameter_2
ALLOC: Parameter_3
ALLOC: Parameter_4
ALLOC: Parameter_5
ALLOC: Parameter_6
ALLOC: Parameter_7
ALLOC: Parameter_8
ALLOC: Parameter_9
diff --git a/src/ngraph/runtime/gpu/CMakeLists.txt b/src/ngraph/runtime/gpu/CMakeLists.txt
index 04d96608..aaad210c 100644
--- a/src/ngraph/runtime/gpu/CMakeLists.txt
+++ b/src/ngraph/runtime/gpu/CMakeLists.txt
@@ -42,7 +42,6 @@ set(SRC
pass/tensor_memory_reservation.cpp
gpu_kernel_args.cpp
pass/gpu_rnn_fusion.cpp
- op/lstm.cpp
op/rnn.cpp
diff --git a/src/ngraph/runtime/cpu/cpu_external_function.cpp b/src/ngraph/runtime/cpu/cpu_external_function.cpp
index bc30f4d1..4fbd85aa 100644
--- a/src/ngraph/runtime/cpu/cpu_external_function.cpp
+++ b/src/ngraph/runtime/cpu/cpu_external_function.cpp
@@ -22,6 +22,7 @@
#include <typeindex>
#include <typeinfo>
#include <unordered_map>
+#include <algorithm>
INFO:root:start with arguments Namespace(batch_size=64, benchmark=0, brightness=0, contrast=0, data_nthreads=4, data_train='/dataset/mxnet_imagenet/train.rec', data_train_idx='', data_val='/dataset/mxnet_imagenet/val.rec', data_val_idx='', disp_batches=20, dtype='float32', fill_value=127, gc_threshold=0.5, gc_type='none', gpus='0', image_shape='3,224,224', initializer='default', is_nnp=False, kv_store='device', load_epoch=None, loss='', lr=0.1, lr_factor=0.1, lr_step_epochs='30,60', macrobatch_size=0, max_crop_size=-1, max_random_area=1, max_random_aspect_ratio=0, max_random_h=0, max_random_l=0, max_random_rotate_angle=0, max_random_s=0, max_random_scale=1, max_random_shear_ratio=0, min_crop_size=-1, min_random_area=1, min_random_aspect_ratio=None, min_random_scale=1, model_prefix=None, mom=0.9, monitor=0, network='resnet', num_classes=1000, num_epochs=80, num_examples=1281167, num_layers=50, optimizer='sgd', pad_size=0, pca_noise=0, profile_server_suffix='', profile_worker_suffix='', random_crop=0, random_mi
diff --git a/src/ngraph/runtime/gpu/cuda_emitter.cpp b/src/ngraph/runtime/gpu/cuda_emitter.cpp
index a9ef0e00..09fe458b 100644
--- a/src/ngraph/runtime/gpu/cuda_emitter.cpp
+++ b/src/ngraph/runtime/gpu/cuda_emitter.cpp
@@ -3096,11 +3096,11 @@ void* runtime::gpu::CUDAEmitter::get_init_reduce_val(std::string reduce_op, std:
{
if (reduce_op == "max")
{
- return m_host_parameters->min_by_datatype(data_type);
+ return TypeInfo::Get(data_type)->max_ptr();
diff --git a/src/ngraph/runtime/gpu/gpu_external_function.cpp b/src/ngraph/runtime/gpu/gpu_external_function.cpp
index 71cdd614..e836f16b 100644
--- a/src/ngraph/runtime/gpu/gpu_external_function.cpp
+++ b/src/ngraph/runtime/gpu/gpu_external_function.cpp
@@ -561,7 +561,7 @@ void runtime::gpu::GPU_ExternalFunction::compile()
m_shared_context->m_primitive_emitter->get_memory_allocator());
ngraph::pass::Manager pass_manager;
-#if CUDNN_VERSION >= 7200
+#if CUDNN_VERSION >= 9200