August 27, 2018 20:50 · August 28, 2018 21:10 · August 28, 2018 21:13 · September 10, 2018 23:00 · September 24, 2018 18:27 · September 25, 2018 16:27
 [ RUN      ] gpu_fusion.fuse_lstm_cells
 [DEBUG] 2018-08-27T20:45:02z graph_rewrite.cpp 35	Running matcher Unnamed(Divide_77) on Parameter_109
 [DEBUG] 2018-08-27T20:45:02z matcher.cpp 276	[MATCHER] Starting match pattern = Divide_77 , graph_node = Parameter_109
 [DEBUG] 2018-08-27T20:45:02z matcher.cpp 150	[MATCHER] in match_node : pattern = Divide_77 matched Parameter_109
 [DEBUG] 2018-08-27T20:45:02z graph_rewrite.cpp 35	Running matcher Unnamed(Multiply_107) on Parameter_109
 [DEBUG] 2018-08-27T20:45:02z matcher.cpp 276	[MATCHER] Starting match pattern = Multiply_107 , graph_node = Parameter_109
 [DEBUG] 2018-08-27T20:45:02z matcher.cpp 150	[MATCHER] in match_node : pattern = Multiply_107 matched Parameter_109
 [DEBUG] 2018-08-27T20:45:02z graph_rewrite.cpp 35	Running matcher Unnamed(Divide_77) on Parameter_110
 [DEBUG] 2018-08-27T20:45:02z matcher.cpp 276	[MATCHER] Starting match pattern = Divide_77 , graph_node = Parameter_110
 [DEBUG] 2018-08-27T20:45:02z matcher.cpp 150	[MATCHER] in match_node : pattern = Divid

 // Generated by the nGraph GPU backend
 #include <cublas_v2.h>
 #include <cuda.h>
 #include <cuda_runtime.h>
 #include <cudnn.h>

 #include "ngraph/descriptor/input.hpp"
 #include "ngraph/descriptor/layout/dense_tensor_view_layout.hpp"
 #include "ngraph/descriptor/output.hpp"

 // Generated by the nGraph GPU backend
 #include <cublas_v2.h>
 #include <cuda.h>
 #include <cuda_runtime.h>
 #include <cudnn.h>

 #include "ngraph/descriptor/input.hpp"
 #include "ngraph/descriptor/layout/dense_tensor_view_layout.hpp"
 #include "ngraph/descriptor/output.hpp"
 diff --git a/src/ngraph/runtime/gpu/cudnn_emitter.cpp b/src/ngraph/runtime/gpu/cudnn_emitter.cpp
 index bfdc117a..bf0dece0 100644
 --- a/src/ngraph/runtime/gpu/cudnn_emitter.cpp
 +++ b/src/ngraph/runtime/gpu/cudnn_emitter.cpp
 @@ -1229,14 +1229,16 @@ size_t runtime::gpu::CUDNNEmitter::build_batchnorm(const cudnnBatchNormMode_t& b
                                                    const Prop& direction,
                                                    const Shape& tensor_shape,
                                                    const Shape& param_shape,
 -                                                   double epsilon)
 +                                                   double epsilon,
 ALLOC: Parameter_0
 ALLOC: Parameter_1
 ALLOC: Parameter_2
 ALLOC: Parameter_3
 ALLOC: Parameter_4
 ALLOC: Parameter_5
 ALLOC: Parameter_6
 ALLOC: Parameter_7
 ALLOC: Parameter_8
 ALLOC: Parameter_9
 diff --git a/src/ngraph/runtime/gpu/CMakeLists.txt b/src/ngraph/runtime/gpu/CMakeLists.txt
 index 04d96608..aaad210c 100644
 --- a/src/ngraph/runtime/gpu/CMakeLists.txt
 +++ b/src/ngraph/runtime/gpu/CMakeLists.txt
 @@ -42,7 +42,6 @@ set(SRC
     pass/tensor_memory_reservation.cpp
     gpu_kernel_args.cpp
     pass/gpu_rnn_fusion.cpp
 -    op/lstm.cpp
     op/rnn.cpp
 diff --git a/src/ngraph/runtime/cpu/cpu_external_function.cpp b/src/ngraph/runtime/cpu/cpu_external_function.cpp
 index bc30f4d1..4fbd85aa 100644
 --- a/src/ngraph/runtime/cpu/cpu_external_function.cpp
 +++ b/src/ngraph/runtime/cpu/cpu_external_function.cpp
 @@ -22,6 +22,7 @@
 #include <typeindex>
 #include <typeinfo>
 #include <unordered_map>
 +#include <algorithm>
 
 INFO:root:start with arguments Namespace(batch_size=64, benchmark=0, brightness=0, contrast=0, data_nthreads=4, data_train='/dataset/mxnet_imagenet/train.rec', data_train_idx='', data_val='/dataset/mxnet_imagenet/val.rec', data_val_idx='', disp_batches=20, dtype='float32', fill_value=127, gc_threshold=0.5, gc_type='none', gpus='0', image_shape='3,224,224', initializer='default', is_nnp=False, kv_store='device', load_epoch=None, loss='', lr=0.1, lr_factor=0.1, lr_step_epochs='30,60', macrobatch_size=0, max_crop_size=-1, max_random_area=1, max_random_aspect_ratio=0, max_random_h=0, max_random_l=0, max_random_rotate_angle=0, max_random_s=0, max_random_scale=1, max_random_shear_ratio=0, min_crop_size=-1, min_random_area=1, min_random_aspect_ratio=None, min_random_scale=1, model_prefix=None, mom=0.9, monitor=0, network='resnet', num_classes=1000, num_epochs=80, num_examples=1281167, num_layers=50, optimizer='sgd', pad_size=0, pca_noise=0, profile_server_suffix='', profile_worker_suffix='', random_crop=0, random_mi
 diff --git a/src/ngraph/runtime/gpu/cuda_emitter.cpp b/src/ngraph/runtime/gpu/cuda_emitter.cpp
 index a9ef0e00..09fe458b 100644
 --- a/src/ngraph/runtime/gpu/cuda_emitter.cpp
 +++ b/src/ngraph/runtime/gpu/cuda_emitter.cpp
 @@ -3096,11 +3096,11 @@ void* runtime::gpu::CUDAEmitter::get_init_reduce_val(std::string reduce_op, std:
 {
     if (reduce_op == "max")
     {
 -        return m_host_parameters->min_by_datatype(data_type);
 +        return TypeInfo::Get(data_type)->max_ptr();
 diff --git a/src/ngraph/runtime/gpu/gpu_external_function.cpp b/src/ngraph/runtime/gpu/gpu_external_function.cpp
 index 71cdd614..e836f16b 100644
 --- a/src/ngraph/runtime/gpu/gpu_external_function.cpp
 +++ b/src/ngraph/runtime/gpu/gpu_external_function.cpp
 @@ -561,7 +561,7 @@ void runtime::gpu::GPU_ExternalFunction::compile()
         m_shared_context->m_primitive_emitter->get_memory_allocator());
 
     ngraph::pass::Manager pass_manager;
 -#if CUDNN_VERSION >= 7200
 +#if CUDNN_VERSION >= 9200
	[ RUN ] gpu_fusion.fuse_lstm_cells
	[DEBUG] 2018-08-27T20:45:02z graph_rewrite.cpp 35 Running matcher Unnamed(Divide_77) on Parameter_109
	[DEBUG] 2018-08-27T20:45:02z matcher.cpp 276 [MATCHER] Starting match pattern = Divide_77 , graph_node = Parameter_109
	[DEBUG] 2018-08-27T20:45:02z matcher.cpp 150 [MATCHER] in match_node : pattern = Divide_77 matched Parameter_109
	[DEBUG] 2018-08-27T20:45:02z graph_rewrite.cpp 35 Running matcher Unnamed(Multiply_107) on Parameter_109
	[DEBUG] 2018-08-27T20:45:02z matcher.cpp 276 [MATCHER] Starting match pattern = Multiply_107 , graph_node = Parameter_109
	[DEBUG] 2018-08-27T20:45:02z matcher.cpp 150 [MATCHER] in match_node : pattern = Multiply_107 matched Parameter_109
	[DEBUG] 2018-08-27T20:45:02z graph_rewrite.cpp 35 Running matcher Unnamed(Divide_77) on Parameter_110
	[DEBUG] 2018-08-27T20:45:02z matcher.cpp 276 [MATCHER] Starting match pattern = Divide_77 , graph_node = Parameter_110
	[DEBUG] 2018-08-27T20:45:02z matcher.cpp 150 [MATCHER] in match_node : pattern = Divid

	// Generated by the nGraph GPU backend
	#include <cublas_v2.h>
	#include <cuda.h>
	#include <cuda_runtime.h>
	#include <cudnn.h>

	#include "ngraph/descriptor/input.hpp"
	#include "ngraph/descriptor/layout/dense_tensor_view_layout.hpp"
	#include "ngraph/descriptor/output.hpp"
	diff --git a/src/ngraph/runtime/gpu/cudnn_emitter.cpp b/src/ngraph/runtime/gpu/cudnn_emitter.cpp
	index bfdc117a..bf0dece0 100644
	--- a/src/ngraph/runtime/gpu/cudnn_emitter.cpp
	+++ b/src/ngraph/runtime/gpu/cudnn_emitter.cpp
	@@ -1229,14 +1229,16 @@ size_t runtime::gpu::CUDNNEmitter::build_batchnorm(const cudnnBatchNormMode_t& b
	const Prop& direction,
	const Shape& tensor_shape,
	const Shape& param_shape,
	- double epsilon)
	+ double epsilon,
	ALLOC: Parameter_0
	ALLOC: Parameter_1
	ALLOC: Parameter_2
	ALLOC: Parameter_3
	ALLOC: Parameter_4
	ALLOC: Parameter_5
	ALLOC: Parameter_6
	ALLOC: Parameter_7
	ALLOC: Parameter_8
	ALLOC: Parameter_9
	diff --git a/src/ngraph/runtime/gpu/CMakeLists.txt b/src/ngraph/runtime/gpu/CMakeLists.txt
	index 04d96608..aaad210c 100644
	--- a/src/ngraph/runtime/gpu/CMakeLists.txt
	+++ b/src/ngraph/runtime/gpu/CMakeLists.txt
	@@ -42,7 +42,6 @@ set(SRC
	pass/tensor_memory_reservation.cpp
	gpu_kernel_args.cpp
	pass/gpu_rnn_fusion.cpp
	- op/lstm.cpp
	op/rnn.cpp
	diff --git a/src/ngraph/runtime/cpu/cpu_external_function.cpp b/src/ngraph/runtime/cpu/cpu_external_function.cpp
	index bc30f4d1..4fbd85aa 100644
	--- a/src/ngraph/runtime/cpu/cpu_external_function.cpp
	+++ b/src/ngraph/runtime/cpu/cpu_external_function.cpp
	@@ -22,6 +22,7 @@
	#include <typeindex>
	#include <typeinfo>
	#include <unordered_map>
	+#include <algorithm>
	diff --git a/src/ngraph/runtime/gpu/cuda_emitter.cpp b/src/ngraph/runtime/gpu/cuda_emitter.cpp
	index a9ef0e00..09fe458b 100644
	--- a/src/ngraph/runtime/gpu/cuda_emitter.cpp
	+++ b/src/ngraph/runtime/gpu/cuda_emitter.cpp
	@@ -3096,11 +3096,11 @@ void* runtime::gpu::CUDAEmitter::get_init_reduce_val(std::string reduce_op, std:
	{
	if (reduce_op == "max")
	{
	- return m_host_parameters->min_by_datatype(data_type);
	+ return TypeInfo::Get(data_type)->max_ptr();
	diff --git a/src/ngraph/runtime/gpu/gpu_external_function.cpp b/src/ngraph/runtime/gpu/gpu_external_function.cpp
	index 71cdd614..e836f16b 100644
	--- a/src/ngraph/runtime/gpu/gpu_external_function.cpp
	+++ b/src/ngraph/runtime/gpu/gpu_external_function.cpp
	@@ -561,7 +561,7 @@ void runtime::gpu::GPU_ExternalFunction::compile()
	m_shared_context->m_primitive_emitter->get_memory_allocator());

	ngraph::pass::Manager pass_manager;
	-#if CUDNN_VERSION >= 7200
	+#if CUDNN_VERSION >= 9200