Created
September 10, 2018 23:00
-
-
Save csullivan/c05b7f6da21496cc7cf6ef04b1eebe8e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/src/ngraph/runtime/gpu/cudnn_emitter.cpp b/src/ngraph/runtime/gpu/cudnn_emitter.cpp | |
index bfdc117a..bf0dece0 100644 | |
--- a/src/ngraph/runtime/gpu/cudnn_emitter.cpp | |
+++ b/src/ngraph/runtime/gpu/cudnn_emitter.cpp | |
@@ -1229,14 +1229,16 @@ size_t runtime::gpu::CUDNNEmitter::build_batchnorm(const cudnnBatchNormMode_t& b | |
const Prop& direction, | |
const Shape& tensor_shape, | |
const Shape& param_shape, | |
- double epsilon) | |
+ double epsilon, | |
+ bool global_stats) | |
{ | |
// Assumes NC{d1...dN} format | |
std::stringstream ss; | |
ss.precision(std::numeric_limits<double>::digits10 + 2); | |
ss << "bn_op" << bn_op << "_dtype_" << dtype << "_dir" << static_cast<int>(direction) << "_ts" | |
- << join(tensor_shape, "_") << "_ps" << join(param_shape, "_") << "_eps" << epsilon; | |
+ << join(tensor_shape, "_") << "_ps" << join(param_shape, "_") << "_eps" << epsilon << "_g" | |
+ << global_stats; | |
std::string hash = ss.str(); | |
std::replace(hash.begin(), hash.end(), '.', '_'); | |
@@ -1300,6 +1302,8 @@ size_t runtime::gpu::CUDNNEmitter::build_batchnorm(const cudnnBatchNormMode_t& b | |
void* bias_factor = m_host_parameters.allocate_by_datatype(data_type, (m - 1) / m); | |
batchnorm.reset(new gpu::primitive{ | |
[=, &op_desc, &tensor_desc, &derived_param_desc](void** inputs, void** outputs) { | |
+ auto mean = (global_stats ? inputs[3] : outputs[1]); | |
+ auto variance = (global_stats ? inputs[4] : outputs[2]); | |
CUDNN_SAFE_CALL(cudnnBatchNormalizationForwardTraining(*m_ctx->cudnn_handle, | |
bn_op, | |
alpha, | |
@@ -1312,8 +1316,8 @@ size_t runtime::gpu::CUDNNEmitter::build_batchnorm(const cudnnBatchNormMode_t& b | |
inputs[0], | |
inputs[1], | |
exp_avg_factor, | |
- outputs[1], | |
- outputs[2], | |
+ mean, | |
+ variance, | |
epsilon, | |
NULL, | |
NULL)); | |
@@ -1324,13 +1328,13 @@ size_t runtime::gpu::CUDNNEmitter::build_batchnorm(const cudnnBatchNormMode_t& b | |
op_desc, | |
beta, | |
derived_param_desc, | |
- outputs[2], | |
+ variance, | |
beta, | |
derived_param_desc, | |
- outputs[2], | |
+ variance, | |
bias_factor, | |
derived_param_desc, | |
- outputs[2])); | |
+ variance)); | |
debug_sync(); | |
}}); | |
break; | |
diff --git a/src/ngraph/runtime/gpu/cudnn_emitter.hpp b/src/ngraph/runtime/gpu/cudnn_emitter.hpp | |
index 34437e8a..3ba03da9 100644 | |
--- a/src/ngraph/runtime/gpu/cudnn_emitter.hpp | |
+++ b/src/ngraph/runtime/gpu/cudnn_emitter.hpp | |
@@ -121,7 +121,8 @@ namespace ngraph | |
const Prop& direction, | |
const Shape& tensor_shape, | |
const Shape& param_shape, | |
- double epsilon); | |
+ double epsilon, | |
+ bool global_stats = false); | |
size_t build_softmax(const cudnnSoftmaxAlgorithm_t& algorithm, | |
const cudnnSoftmaxMode_t& mode, | |
diff --git a/src/ngraph/runtime/gpu/gpu_emitter.cpp b/src/ngraph/runtime/gpu/gpu_emitter.cpp | |
index d3583021..38228e24 100644 | |
--- a/src/ngraph/runtime/gpu/gpu_emitter.cpp | |
+++ b/src/ngraph/runtime/gpu/gpu_emitter.cpp | |
@@ -1254,41 +1254,37 @@ namespace ngraph | |
auto& cudnn_emitter = | |
external_function->get_primitive_emitter()->get_cudnn_emitter(); | |
- CUDNNEmitter::Prop direction; | |
- if (batchnorm->get_training_flag() && args.size() == 3) | |
- { | |
- direction = CUDNNEmitter::Prop::Forward; | |
+ size_t bn_index; | |
+ if (batchnorm->get_training_flag()) | |
+ { | |
+ bn_index = | |
+ cudnn_emitter->build_batchnorm(CUDNN_BATCHNORM_SPATIAL, | |
+ out[0].get_type(), | |
+ CUDNNEmitter::Prop::Forward, | |
+ args[2].get_shape(), | |
+ args[0].get_shape(), | |
+ batchnorm->get_eps_value(), | |
+ batchnorm->get_arguments().size() == 5); | |
} | |
else | |
{ | |
- direction = CUDNNEmitter::Prop::Inference; | |
+ bn_index = | |
+ cudnn_emitter->build_batchnorm(CUDNN_BATCHNORM_SPATIAL, | |
+ out[0].get_type(), | |
+ CUDNNEmitter::Prop::Inference, | |
+ args[2].get_shape(), | |
+ args[0].get_shape(), | |
+ batchnorm->get_eps_value()); | |
} | |
- auto bn_index = cudnn_emitter->build_batchnorm(CUDNN_BATCHNORM_SPATIAL, | |
- out[0].get_type(), | |
- direction, | |
- args[2].get_shape(), | |
- args[0].get_shape(), | |
- batchnorm->get_eps_value()); | |
- | |
writer.block_begin(); | |
{ | |
- writer << "gpu::invoke_primitive(ctx, " << bn_index << ", "; | |
- writer << "std::vector<void*>{" << args.front().get_name(); | |
- for (size_t i = 1; i < args.size(); i++) | |
- { | |
- writer << ", " << args[i].get_name(); | |
- } | |
- writer << "}.data(), "; | |
- writer << "std::vector<void*>{" << out.front().get_name(); | |
- for (size_t i = 1; i < out.size(); i++) | |
- { | |
- writer << ", " << out[i].get_name(); | |
- } | |
- writer << "}.data()"; | |
- writer << ");\n"; | |
+ writer << "static void* input[] = {" << node_names(args) << "};\n"; | |
+ writer << "static void* output[] = {" << node_names(out) << "};\n"; | |
+ writer << "gpu::invoke_primitive(ctx, " << bn_index << ", input, output);\n"; | |
} | |
writer.block_end(); | |
+ | |
} | |
template <> | |
diff --git a/src/ngraph/runtime/gpu/gpu_emitter.hpp b/src/ngraph/runtime/gpu/gpu_emitter.hpp | |
index c9f46546..c4570b18 100644 | |
--- a/src/ngraph/runtime/gpu/gpu_emitter.hpp | |
+++ b/src/ngraph/runtime/gpu/gpu_emitter.hpp | |
@@ -99,6 +103,16 @@ namespace ngraph | |
} | |
writer.block_end(); | |
} | |
+ private: | |
+ static std::string node_names(const std::vector<GPU_TensorViewWrapper>& args) | |
+ { | |
+ std::vector<std::string> names; | |
+ for (const GPU_TensorViewWrapper& tv : args) | |
+ { | |
+ names.push_back(tv.get_name()); | |
+ } | |
+ return ngraph::join(names); | |
+ } | |
}; | |
Shape get_padded_shape(const Shape& input_shape, | |
const Shape& padding_below, | |
diff --git a/src/tools/nbench/benchmark.cpp b/src/tools/nbench/benchmark.cpp | |
index a02e9d38..dce22daf 100644 | |
--- a/src/tools/nbench/benchmark.cpp | |
+++ b/src/tools/nbench/benchmark.cpp | |
@@ -103,7 +103,7 @@ static void random_init(shared_ptr<runtime::TensorView> tv) | |
} | |
else if (et == element::f32) | |
{ | |
- init_real_tv<float>(tv, -1, 1); | |
+ init_real_tv<float>(tv, 0, 1); | |
} | |
else if (et == element::f64) | |
{ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment