Norod · May 21, 2018 10:38
diff --git a/tensorflow_r1.8_rc1_macOS_myBuildSettings.txt b/tensorflow_r1.8_rc1_macOS_myBuildSettings.txt
 exports
 -------
 export CUDA_HOME=/usr/local/cuda
 export DYLD_LIBRARY_PATH=/Users/dadler/lib:/usr/local/cuda/lib:/usr/local/cuda/extras/CUPTI/lib 
 export LD_LIBRARY_PATH=$DYLD_LIBRARY_PATH
 export PATH=$DYLD_LIBRARY_PATH:$PATH

 bazel
 -----
 I used bazel 0.10.0 (installed from bazel-0.10.0-installer-darwin-x86_64.sh)

 clang
 -----
 $clang --version
 Apple LLVM version 8.1.0 (clang-802.0.42)
 Target: x86_64-apple-darwin17.5.0
 Thread model: posix
 InstalledDir: /Library/Developer/CommandLineTools/usr/bin

 configure
 ---------
 CUDA_TOOLKIT_PATH="/usr/local/cuda"  TF_UNOFFICIAL_SETTING=1 TF_NEED_CUDA=1 CUDNN_INSTALL_PATH="/usr/local/cuda" TF_CUDA_COMPUTE_CAPABILITIES="3.0" TF_CUDNN_VERSION="7.0" TF_CUDA_VERSION="9.1" TF_CUDA_VERSION_TOOLKIT="9.1"  ./configure

 builds
 ------
 bazel build --config=cuda --config=opt --verbose_failures --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" --action_env PATH --action_env LD_LIBRARY_PATH --action_env DYLD_LIBRARY_PATH //tensorflow/cc:tutorials_example_trainer 
 bazel build --config=cuda --config=opt --verbose_failures --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" --action_env PATH --action_env LD_LIBRARY_PATH --action_env DYLD_LIBRARY_PATH //tensorflow/tools/pip_package:build_pip_package

 pack and install
 ----------------
 bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
 pip install /tmp/tensorflow_pkg/tensorflow-1.8.0rc1-cp36-cp36m-macosx_10_13_x86_64.whl 
diff --git a/tensorflow_r1.8_rc1_macOS_patch.diff b/tensorflow_r1.8_rc1_macOS_patch.diff
 From b1639e15f212e9cc188276cf20d586f1be035bdb Mon Sep 17 00:00:00 2001
 From: Doron Adler <[email protected]>
 Date: Wed, 25 Apr 2018 20:40:27 +0300
 Subject: [PATCH 1/1] tensorflow_r1.8_rc1_macOS_patch

 diff --git a/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc b/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc
 index a561d918bd..785e0ddf4e 100644
 --- a/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc
 +++ b/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc
 @@ -69,7 +69,7 @@ __global__ void concat_variable_kernel(
   IntType num_inputs = input_ptr_data.size;
 
   // verbose declaration needed due to template
 -  extern __shared__ __align__(sizeof(T)) unsigned char smem[];
 +  extern __shared__ unsigned char smem[];
   IntType* smem_col_scan = reinterpret_cast<IntType*>(smem);
 
   if (useSmem) {
 diff --git a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
 index 94989089ec..a2e3e8bc87 100644
 --- a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
 +++ b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
 @@ -172,7 +172,7 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNHWCSmall(
     const DepthwiseArgs args, const T* input, const T* filter, T* output) {
   assert(CanLaunchDepthwiseConv2dGPUSmall(args));
   // Holds block plus halo and filter data for blockDim.x depths.
 -  extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[];
 +  extern __shared__ unsigned char shared_memory[];
   T* const shared_data = reinterpret_cast<T*>(shared_memory);
 
   const int num_batches = args.batch;
 @@ -452,7 +452,7 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNCHWSmall(
     const DepthwiseArgs args, const T* input, const T* filter, T* output) {
   assert(CanLaunchDepthwiseConv2dGPUSmall(args));
   // Holds block plus halo and filter data for blockDim.z depths.
 -  extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[];
 +  extern __shared__ unsigned char shared_memory[];
   T* const shared_data = reinterpret_cast<T*>(shared_memory);
 
   const int num_batches = args.batch;
 @@ -1118,7 +1118,7 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNHWCSmall(
     const DepthwiseArgs args, const T* output, const T* input, T* filter) {
   assert(CanLaunchDepthwiseConv2dBackpropFilterGPUSmall(args, blockDim.z));
   // Holds block plus halo and filter data for blockDim.x depths.
 -  extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[];
 +  extern __shared__ unsigned char shared_memory[];
   T* const shared_data = reinterpret_cast<T*>(shared_memory);
 
   const int num_batches = args.batch;
 @@ -1388,7 +1388,7 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNCHWSmall(
     const DepthwiseArgs args, const T* output, const T* input, T* filter) {
   assert(CanLaunchDepthwiseConv2dBackpropFilterGPUSmall(args, blockDim.x));
   // Holds block plus halo and filter data for blockDim.z depths.
 -  extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[];
 +  extern __shared__ unsigned char shared_memory[];
   T* const shared_data = reinterpret_cast<T*>(shared_memory);
 
   const int num_batches = args.batch;
 diff --git a/tensorflow/core/kernels/split_lib_gpu.cu.cc b/tensorflow/core/kernels/split_lib_gpu.cu.cc
 index 393818730b..a7d9e02853 100644
 --- a/tensorflow/core/kernels/split_lib_gpu.cu.cc
 +++ b/tensorflow/core/kernels/split_lib_gpu.cu.cc
 @@ -121,7 +121,7 @@ __global__ void split_v_kernel(const T* input_ptr,
   int num_outputs = output_ptr_data.size;
 
   // verbose declaration needed due to template
 -  extern __shared__ __align__(sizeof(T)) unsigned char smem[];
 +  extern __shared__ unsigned char smem[];
   IntType* smem_col_scan = reinterpret_cast<IntType*>(smem);
 
   if (useSmem) {
 diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
 index 48728ac131..7696d30dd8 100644
 --- a/tensorflow/workspace.bzl
 +++ b/tensorflow/workspace.bzl
 @@ -330,11 +330,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   tf_http_archive(
       name = "protobuf_archive",
       urls = [
 -          "https://mirror.bazel.build/github.com/google/protobuf/archive/396336eb961b75f03b25824fe86cf6490fb75e3a.tar.gz",
 -          "https://github.com/google/protobuf/archive/396336eb961b75f03b25824fe86cf6490fb75e3a.tar.gz",
 +          "https://mirror.bazel.build/github.com/dtrebbien/protobuf/archive/50f552646ba1de79e07562b41f3999fe036b4fd0.tar.gz",
 +          "https://github.com/dtrebbien/protobuf/archive/50f552646ba1de79e07562b41f3999fe036b4fd0.tar.gz",
       ],
 -      sha256 = "846d907acf472ae233ec0882ef3a2d24edbbe834b80c305e867ac65a1f2c59e3",
 -      strip_prefix = "protobuf-396336eb961b75f03b25824fe86cf6490fb75e3a",
 +      sha256 = "eb16b33431b91fe8cee479575cee8de202f3626aaf00d9bf1783c6e62b4ffbc7",
 +      strip_prefix = "protobuf-50f552646ba1de79e07562b41f3999fe036b4fd0",
   )
 
   # We need to import the protobuf library under the names com_google_protobuf
 diff --git a/third_party/gpus/cuda/BUILD.tpl b/third_party/gpus/cuda/BUILD.tpl
 index 2a37c65bc7..61b203e005 100644
 --- a/third_party/gpus/cuda/BUILD.tpl
 +++ b/third_party/gpus/cuda/BUILD.tpl
 @@ -110,7 +110,7 @@ cc_library(
         ".",
         "cuda/include",
     ],
 -    linkopts = ["-lgomp"],
 +    # linkopts = ["-lgomp"],
     linkstatic = 1,
     visibility = ["//visibility:public"],
 )
 -- 
 2.17.0
	exports
	-------
	export CUDA_HOME=/usr/local/cuda
	export DYLD_LIBRARY_PATH=/Users/dadler/lib:/usr/local/cuda/lib:/usr/local/cuda/extras/CUPTI/lib
	export LD_LIBRARY_PATH=$DYLD_LIBRARY_PATH
	export PATH=$DYLD_LIBRARY_PATH:$PATH

	bazel
	-----
	I used bazel 0.10.0 (installed from bazel-0.10.0-installer-darwin-x86_64.sh)

	clang
	-----
	$clang --version
	Apple LLVM version 8.1.0 (clang-802.0.42)
	Target: x86_64-apple-darwin17.5.0
	Thread model: posix
	InstalledDir: /Library/Developer/CommandLineTools/usr/bin

	configure
	---------
	CUDA_TOOLKIT_PATH="/usr/local/cuda" TF_UNOFFICIAL_SETTING=1 TF_NEED_CUDA=1 CUDNN_INSTALL_PATH="/usr/local/cuda" TF_CUDA_COMPUTE_CAPABILITIES="3.0" TF_CUDNN_VERSION="7.0" TF_CUDA_VERSION="9.1" TF_CUDA_VERSION_TOOLKIT="9.1" ./configure

	builds
	------
	bazel build --config=cuda --config=opt --verbose_failures --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" --action_env PATH --action_env LD_LIBRARY_PATH --action_env DYLD_LIBRARY_PATH //tensorflow/cc:tutorials_example_trainer
	bazel build --config=cuda --config=opt --verbose_failures --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" --action_env PATH --action_env LD_LIBRARY_PATH --action_env DYLD_LIBRARY_PATH //tensorflow/tools/pip_package:build_pip_package

	pack and install
	----------------
	bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
	pip install /tmp/tensorflow_pkg/tensorflow-1.8.0rc1-cp36-cp36m-macosx_10_13_x86_64.whl
	From b1639e15f212e9cc188276cf20d586f1be035bdb Mon Sep 17 00:00:00 2001
	From: Doron Adler <[email protected]>
	Date: Wed, 25 Apr 2018 20:40:27 +0300
	Subject: [PATCH 1/1] tensorflow_r1.8_rc1_macOS_patch

	diff --git a/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc b/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc
	index a561d918bd..785e0ddf4e 100644
	--- a/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc
	+++ b/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc
	@@ -69,7 +69,7 @@ __global__ void concat_variable_kernel(
	IntType num_inputs = input_ptr_data.size;

	// verbose declaration needed due to template
	- extern __shared__ __align__(sizeof(T)) unsigned char smem[];
	+ extern __shared__ unsigned char smem[];
	IntType* smem_col_scan = reinterpret_cast<IntType*>(smem);

	if (useSmem) {
	diff --git a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
	index 94989089ec..a2e3e8bc87 100644
	--- a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
	+++ b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
	@@ -172,7 +172,7 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNHWCSmall(
	const DepthwiseArgs args, const T* input, const T* filter, T* output) {
	assert(CanLaunchDepthwiseConv2dGPUSmall(args));
	// Holds block plus halo and filter data for blockDim.x depths.
	- extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[];
	+ extern __shared__ unsigned char shared_memory[];
	T* const shared_data = reinterpret_cast<T*>(shared_memory);

	const int num_batches = args.batch;
	@@ -452,7 +452,7 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNCHWSmall(
	const DepthwiseArgs args, const T* input, const T* filter, T* output) {
	assert(CanLaunchDepthwiseConv2dGPUSmall(args));
	// Holds block plus halo and filter data for blockDim.z depths.
	- extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[];
	+ extern __shared__ unsigned char shared_memory[];
	T* const shared_data = reinterpret_cast<T*>(shared_memory);

	const int num_batches = args.batch;
	@@ -1118,7 +1118,7 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNHWCSmall(
	const DepthwiseArgs args, const T* output, const T* input, T* filter) {
	assert(CanLaunchDepthwiseConv2dBackpropFilterGPUSmall(args, blockDim.z));
	// Holds block plus halo and filter data for blockDim.x depths.
	- extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[];
	+ extern __shared__ unsigned char shared_memory[];
	T* const shared_data = reinterpret_cast<T*>(shared_memory);

	const int num_batches = args.batch;
	@@ -1388,7 +1388,7 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNCHWSmall(
	const DepthwiseArgs args, const T* output, const T* input, T* filter) {
	assert(CanLaunchDepthwiseConv2dBackpropFilterGPUSmall(args, blockDim.x));
	// Holds block plus halo and filter data for blockDim.z depths.
	- extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[];
	+ extern __shared__ unsigned char shared_memory[];
	T* const shared_data = reinterpret_cast<T*>(shared_memory);

	const int num_batches = args.batch;
	diff --git a/tensorflow/core/kernels/split_lib_gpu.cu.cc b/tensorflow/core/kernels/split_lib_gpu.cu.cc
	index 393818730b..a7d9e02853 100644
	--- a/tensorflow/core/kernels/split_lib_gpu.cu.cc
	+++ b/tensorflow/core/kernels/split_lib_gpu.cu.cc
	@@ -121,7 +121,7 @@ __global__ void split_v_kernel(const T* input_ptr,
	int num_outputs = output_ptr_data.size;

	// verbose declaration needed due to template
	- extern __shared__ __align__(sizeof(T)) unsigned char smem[];
	+ extern __shared__ unsigned char smem[];
	IntType* smem_col_scan = reinterpret_cast<IntType*>(smem);

	if (useSmem) {
	diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
	index 48728ac131..7696d30dd8 100644
	--- a/tensorflow/workspace.bzl
	+++ b/tensorflow/workspace.bzl
	@@ -330,11 +330,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
	tf_http_archive(
	name = "protobuf_archive",
	urls = [
	- "https://mirror.bazel.build/github.com/google/protobuf/archive/396336eb961b75f03b25824fe86cf6490fb75e3a.tar.gz",
	- "https://github.com/google/protobuf/archive/396336eb961b75f03b25824fe86cf6490fb75e3a.tar.gz",
	+ "https://mirror.bazel.build/github.com/dtrebbien/protobuf/archive/50f552646ba1de79e07562b41f3999fe036b4fd0.tar.gz",
	+ "https://github.com/dtrebbien/protobuf/archive/50f552646ba1de79e07562b41f3999fe036b4fd0.tar.gz",
	],
	- sha256 = "846d907acf472ae233ec0882ef3a2d24edbbe834b80c305e867ac65a1f2c59e3",
	- strip_prefix = "protobuf-396336eb961b75f03b25824fe86cf6490fb75e3a",
	+ sha256 = "eb16b33431b91fe8cee479575cee8de202f3626aaf00d9bf1783c6e62b4ffbc7",
	+ strip_prefix = "protobuf-50f552646ba1de79e07562b41f3999fe036b4fd0",
	)

	# We need to import the protobuf library under the names com_google_protobuf
	diff --git a/third_party/gpus/cuda/BUILD.tpl b/third_party/gpus/cuda/BUILD.tpl
	index 2a37c65bc7..61b203e005 100644
	--- a/third_party/gpus/cuda/BUILD.tpl
	+++ b/third_party/gpus/cuda/BUILD.tpl
	@@ -110,7 +110,7 @@ cc_library(
	".",
	"cuda/include",
	],
	- linkopts = ["-lgomp"],
	+ # linkopts = ["-lgomp"],
	linkstatic = 1,
	visibility = ["//visibility:public"],
	)
	--
	2.17.0