Last active
May 21, 2018 10:38
-
-
Save Norod/1f84448c9ab33dfc5b84787c11c9c100 to your computer and use it in GitHub Desktop.
tensorflow_r1.8_rc1_macOS_patch.diff
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
exports | |
------- | |
export CUDA_HOME=/usr/local/cuda | |
export DYLD_LIBRARY_PATH=/Users/dadler/lib:/usr/local/cuda/lib:/usr/local/cuda/extras/CUPTI/lib | |
export LD_LIBRARY_PATH=$DYLD_LIBRARY_PATH | |
export PATH=$DYLD_LIBRARY_PATH:$PATH | |
bazel | |
----- | |
I used bazel 0.10.0 (installed from bazel-0.10.0-installer-darwin-x86_64.sh) | |
clang | |
----- | |
$clang --version | |
Apple LLVM version 8.1.0 (clang-802.0.42) | |
Target: x86_64-apple-darwin17.5.0 | |
Thread model: posix | |
InstalledDir: /Library/Developer/CommandLineTools/usr/bin | |
configure | |
--------- | |
CUDA_TOOLKIT_PATH="/usr/local/cuda" TF_UNOFFICIAL_SETTING=1 TF_NEED_CUDA=1 CUDNN_INSTALL_PATH="/usr/local/cuda" TF_CUDA_COMPUTE_CAPABILITIES="3.0" TF_CUDNN_VERSION="7.0" TF_CUDA_VERSION="9.1" TF_CUDA_VERSION_TOOLKIT="9.1" ./configure | |
builds | |
------ | |
bazel build --config=cuda --config=opt --verbose_failures --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" --action_env PATH --action_env LD_LIBRARY_PATH --action_env DYLD_LIBRARY_PATH //tensorflow/cc:tutorials_example_trainer | |
bazel build --config=cuda --config=opt --verbose_failures --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" --action_env PATH --action_env LD_LIBRARY_PATH --action_env DYLD_LIBRARY_PATH //tensorflow/tools/pip_package:build_pip_package | |
pack and install | |
---------------- | |
bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg | |
pip install /tmp/tensorflow_pkg/tensorflow-1.8.0rc1-cp36-cp36m-macosx_10_13_x86_64.whl |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From b1639e15f212e9cc188276cf20d586f1be035bdb Mon Sep 17 00:00:00 2001 | |
From: Doron Adler <[email protected]> | |
Date: Wed, 25 Apr 2018 20:40:27 +0300 | |
Subject: [PATCH 1/1] tensorflow_r1.8_rc1_macOS_patch | |
diff --git a/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc b/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc | |
index a561d918bd..785e0ddf4e 100644 | |
--- a/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc | |
+++ b/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc | |
@@ -69,7 +69,7 @@ __global__ void concat_variable_kernel( | |
IntType num_inputs = input_ptr_data.size; | |
// verbose declaration needed due to template | |
- extern __shared__ __align__(sizeof(T)) unsigned char smem[]; | |
+ extern __shared__ unsigned char smem[]; | |
IntType* smem_col_scan = reinterpret_cast<IntType*>(smem); | |
if (useSmem) { | |
diff --git a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc | |
index 94989089ec..a2e3e8bc87 100644 | |
--- a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc | |
+++ b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc | |
@@ -172,7 +172,7 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNHWCSmall( | |
const DepthwiseArgs args, const T* input, const T* filter, T* output) { | |
assert(CanLaunchDepthwiseConv2dGPUSmall(args)); | |
// Holds block plus halo and filter data for blockDim.x depths. | |
- extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[]; | |
+ extern __shared__ unsigned char shared_memory[]; | |
T* const shared_data = reinterpret_cast<T*>(shared_memory); | |
const int num_batches = args.batch; | |
@@ -452,7 +452,7 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNCHWSmall( | |
const DepthwiseArgs args, const T* input, const T* filter, T* output) { | |
assert(CanLaunchDepthwiseConv2dGPUSmall(args)); | |
// Holds block plus halo and filter data for blockDim.z depths. | |
- extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[]; | |
+ extern __shared__ unsigned char shared_memory[]; | |
T* const shared_data = reinterpret_cast<T*>(shared_memory); | |
const int num_batches = args.batch; | |
@@ -1118,7 +1118,7 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNHWCSmall( | |
const DepthwiseArgs args, const T* output, const T* input, T* filter) { | |
assert(CanLaunchDepthwiseConv2dBackpropFilterGPUSmall(args, blockDim.z)); | |
// Holds block plus halo and filter data for blockDim.x depths. | |
- extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[]; | |
+ extern __shared__ unsigned char shared_memory[]; | |
T* const shared_data = reinterpret_cast<T*>(shared_memory); | |
const int num_batches = args.batch; | |
@@ -1388,7 +1388,7 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNCHWSmall( | |
const DepthwiseArgs args, const T* output, const T* input, T* filter) { | |
assert(CanLaunchDepthwiseConv2dBackpropFilterGPUSmall(args, blockDim.x)); | |
// Holds block plus halo and filter data for blockDim.z depths. | |
- extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[]; | |
+ extern __shared__ unsigned char shared_memory[]; | |
T* const shared_data = reinterpret_cast<T*>(shared_memory); | |
const int num_batches = args.batch; | |
diff --git a/tensorflow/core/kernels/split_lib_gpu.cu.cc b/tensorflow/core/kernels/split_lib_gpu.cu.cc | |
index 393818730b..a7d9e02853 100644 | |
--- a/tensorflow/core/kernels/split_lib_gpu.cu.cc | |
+++ b/tensorflow/core/kernels/split_lib_gpu.cu.cc | |
@@ -121,7 +121,7 @@ __global__ void split_v_kernel(const T* input_ptr, | |
int num_outputs = output_ptr_data.size; | |
// verbose declaration needed due to template | |
- extern __shared__ __align__(sizeof(T)) unsigned char smem[]; | |
+ extern __shared__ unsigned char smem[]; | |
IntType* smem_col_scan = reinterpret_cast<IntType*>(smem); | |
if (useSmem) { | |
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl | |
index 48728ac131..7696d30dd8 100644 | |
--- a/tensorflow/workspace.bzl | |
+++ b/tensorflow/workspace.bzl | |
@@ -330,11 +330,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): | |
tf_http_archive( | |
name = "protobuf_archive", | |
urls = [ | |
- "https://mirror.bazel.build/github.com/google/protobuf/archive/396336eb961b75f03b25824fe86cf6490fb75e3a.tar.gz", | |
- "https://github.com/google/protobuf/archive/396336eb961b75f03b25824fe86cf6490fb75e3a.tar.gz", | |
+ "https://mirror.bazel.build/github.com/dtrebbien/protobuf/archive/50f552646ba1de79e07562b41f3999fe036b4fd0.tar.gz", | |
+ "https://github.com/dtrebbien/protobuf/archive/50f552646ba1de79e07562b41f3999fe036b4fd0.tar.gz", | |
], | |
- sha256 = "846d907acf472ae233ec0882ef3a2d24edbbe834b80c305e867ac65a1f2c59e3", | |
- strip_prefix = "protobuf-396336eb961b75f03b25824fe86cf6490fb75e3a", | |
+ sha256 = "eb16b33431b91fe8cee479575cee8de202f3626aaf00d9bf1783c6e62b4ffbc7", | |
+ strip_prefix = "protobuf-50f552646ba1de79e07562b41f3999fe036b4fd0", | |
) | |
# We need to import the protobuf library under the names com_google_protobuf | |
diff --git a/third_party/gpus/cuda/BUILD.tpl b/third_party/gpus/cuda/BUILD.tpl | |
index 2a37c65bc7..61b203e005 100644 | |
--- a/third_party/gpus/cuda/BUILD.tpl | |
+++ b/third_party/gpus/cuda/BUILD.tpl | |
@@ -110,7 +110,7 @@ cc_library( | |
".", | |
"cuda/include", | |
], | |
- linkopts = ["-lgomp"], | |
+ # linkopts = ["-lgomp"], | |
linkstatic = 1, | |
visibility = ["//visibility:public"], | |
) | |
-- | |
2.17.0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment