Skip to content

Instantly share code, notes, and snippets.

@csullivan
Created September 10, 2018 23:00
Show Gist options
  • Save csullivan/c05b7f6da21496cc7cf6ef04b1eebe8e to your computer and use it in GitHub Desktop.
Save csullivan/c05b7f6da21496cc7cf6ef04b1eebe8e to your computer and use it in GitHub Desktop.
diff --git a/src/ngraph/runtime/gpu/cudnn_emitter.cpp b/src/ngraph/runtime/gpu/cudnn_emitter.cpp
index bfdc117a..bf0dece0 100644
--- a/src/ngraph/runtime/gpu/cudnn_emitter.cpp
+++ b/src/ngraph/runtime/gpu/cudnn_emitter.cpp
@@ -1229,14 +1229,16 @@ size_t runtime::gpu::CUDNNEmitter::build_batchnorm(const cudnnBatchNormMode_t& b
const Prop& direction,
const Shape& tensor_shape,
const Shape& param_shape,
- double epsilon)
+ double epsilon,
+ bool global_stats)
{
// Assumes NC{d1...dN} format
std::stringstream ss;
ss.precision(std::numeric_limits<double>::digits10 + 2);
ss << "bn_op" << bn_op << "_dtype_" << dtype << "_dir" << static_cast<int>(direction) << "_ts"
- << join(tensor_shape, "_") << "_ps" << join(param_shape, "_") << "_eps" << epsilon;
+ << join(tensor_shape, "_") << "_ps" << join(param_shape, "_") << "_eps" << epsilon << "_g"
+ << global_stats;
std::string hash = ss.str();
std::replace(hash.begin(), hash.end(), '.', '_');
@@ -1300,6 +1302,8 @@ size_t runtime::gpu::CUDNNEmitter::build_batchnorm(const cudnnBatchNormMode_t& b
void* bias_factor = m_host_parameters.allocate_by_datatype(data_type, (m - 1) / m);
batchnorm.reset(new gpu::primitive{
[=, &op_desc, &tensor_desc, &derived_param_desc](void** inputs, void** outputs) {
+ auto mean = (global_stats ? inputs[3] : outputs[1]);
+ auto variance = (global_stats ? inputs[4] : outputs[2]);
CUDNN_SAFE_CALL(cudnnBatchNormalizationForwardTraining(*m_ctx->cudnn_handle,
bn_op,
alpha,
@@ -1312,8 +1316,8 @@ size_t runtime::gpu::CUDNNEmitter::build_batchnorm(const cudnnBatchNormMode_t& b
inputs[0],
inputs[1],
exp_avg_factor,
- outputs[1],
- outputs[2],
+ mean,
+ variance,
epsilon,
NULL,
NULL));
@@ -1324,13 +1328,13 @@ size_t runtime::gpu::CUDNNEmitter::build_batchnorm(const cudnnBatchNormMode_t& b
op_desc,
beta,
derived_param_desc,
- outputs[2],
+ variance,
beta,
derived_param_desc,
- outputs[2],
+ variance,
bias_factor,
derived_param_desc,
- outputs[2]));
+ variance));
debug_sync();
}});
break;
diff --git a/src/ngraph/runtime/gpu/cudnn_emitter.hpp b/src/ngraph/runtime/gpu/cudnn_emitter.hpp
index 34437e8a..3ba03da9 100644
--- a/src/ngraph/runtime/gpu/cudnn_emitter.hpp
+++ b/src/ngraph/runtime/gpu/cudnn_emitter.hpp
@@ -121,7 +121,8 @@ namespace ngraph
const Prop& direction,
const Shape& tensor_shape,
const Shape& param_shape,
- double epsilon);
+ double epsilon,
+ bool global_stats = false);
size_t build_softmax(const cudnnSoftmaxAlgorithm_t& algorithm,
const cudnnSoftmaxMode_t& mode,
diff --git a/src/ngraph/runtime/gpu/gpu_emitter.cpp b/src/ngraph/runtime/gpu/gpu_emitter.cpp
index d3583021..38228e24 100644
--- a/src/ngraph/runtime/gpu/gpu_emitter.cpp
+++ b/src/ngraph/runtime/gpu/gpu_emitter.cpp
@@ -1254,41 +1254,37 @@ namespace ngraph
auto& cudnn_emitter =
external_function->get_primitive_emitter()->get_cudnn_emitter();
- CUDNNEmitter::Prop direction;
- if (batchnorm->get_training_flag() && args.size() == 3)
- {
- direction = CUDNNEmitter::Prop::Forward;
+ size_t bn_index;
+ if (batchnorm->get_training_flag())
+ {
+ bn_index =
+ cudnn_emitter->build_batchnorm(CUDNN_BATCHNORM_SPATIAL,
+ out[0].get_type(),
+ CUDNNEmitter::Prop::Forward,
+ args[2].get_shape(),
+ args[0].get_shape(),
+ batchnorm->get_eps_value(),
+ batchnorm->get_arguments().size() == 5);
}
else
{
- direction = CUDNNEmitter::Prop::Inference;
+ bn_index =
+ cudnn_emitter->build_batchnorm(CUDNN_BATCHNORM_SPATIAL,
+ out[0].get_type(),
+ CUDNNEmitter::Prop::Inference,
+ args[2].get_shape(),
+ args[0].get_shape(),
+ batchnorm->get_eps_value());
}
- auto bn_index = cudnn_emitter->build_batchnorm(CUDNN_BATCHNORM_SPATIAL,
- out[0].get_type(),
- direction,
- args[2].get_shape(),
- args[0].get_shape(),
- batchnorm->get_eps_value());
-
writer.block_begin();
{
- writer << "gpu::invoke_primitive(ctx, " << bn_index << ", ";
- writer << "std::vector<void*>{" << args.front().get_name();
- for (size_t i = 1; i < args.size(); i++)
- {
- writer << ", " << args[i].get_name();
- }
- writer << "}.data(), ";
- writer << "std::vector<void*>{" << out.front().get_name();
- for (size_t i = 1; i < out.size(); i++)
- {
- writer << ", " << out[i].get_name();
- }
- writer << "}.data()";
- writer << ");\n";
+ writer << "static void* input[] = {" << node_names(args) << "};\n";
+ writer << "static void* output[] = {" << node_names(out) << "};\n";
+ writer << "gpu::invoke_primitive(ctx, " << bn_index << ", input, output);\n";
}
writer.block_end();
+
}
template <>
diff --git a/src/ngraph/runtime/gpu/gpu_emitter.hpp b/src/ngraph/runtime/gpu/gpu_emitter.hpp
index c9f46546..c4570b18 100644
--- a/src/ngraph/runtime/gpu/gpu_emitter.hpp
+++ b/src/ngraph/runtime/gpu/gpu_emitter.hpp
@@ -99,6 +103,16 @@ namespace ngraph
}
writer.block_end();
}
+ private:
+ static std::string node_names(const std::vector<GPU_TensorViewWrapper>& args)
+ {
+ std::vector<std::string> names;
+ for (const GPU_TensorViewWrapper& tv : args)
+ {
+ names.push_back(tv.get_name());
+ }
+ return ngraph::join(names);
+ }
};
Shape get_padded_shape(const Shape& input_shape,
const Shape& padding_below,
diff --git a/src/tools/nbench/benchmark.cpp b/src/tools/nbench/benchmark.cpp
index a02e9d38..dce22daf 100644
--- a/src/tools/nbench/benchmark.cpp
+++ b/src/tools/nbench/benchmark.cpp
@@ -103,7 +103,7 @@ static void random_init(shared_ptr<runtime::TensorView> tv)
}
else if (et == element::f32)
{
- init_real_tv<float>(tv, -1, 1);
+ init_real_tv<float>(tv, 0, 1);
}
else if (et == element::f64)
{
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment