Created
August 7, 2017 19:35
-
-
Save lissyx/c008b43fd808d132989ec4d238d664fb to your computer and use it in GitHub Desktop.
RPi3 ARMv8
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl | |
index 48ef8dfa8..be831d5c0 100644 | |
--- a/tensorflow/core/platform/default/build_config.bzl | |
+++ b/tensorflow/core/platform/default/build_config.bzl | |
@@ -8,7 +8,7 @@ load("//tensorflow:tensorflow.bzl", "if_not_mobile") | |
WITH_GCP_SUPPORT = False | |
WITH_HDFS_SUPPORT = False | |
WITH_XLA_SUPPORT = False | |
-WITH_JEMALLOC = True | |
+WITH_JEMALLOC = False | |
# Appends a suffix to a list of deps. | |
def tf_deps(deps, suffix): | |
diff --git a/tensorflow/tools/graph_transforms/quantize_nodes.cc b/tensorflow/tools/graph_transforms/quantize_nodes.cc | |
index f460f31d3..7e7b4abe0 100644 | |
--- a/tensorflow/tools/graph_transforms/quantize_nodes.cc | |
+++ b/tensorflow/tools/graph_transforms/quantize_nodes.cc | |
@@ -684,6 +684,11 @@ Status QuantizeNodes(const GraphDef& input_graph_def, | |
const NodeDef& float_node = match.node; | |
const QuantizedOpInfo& op_info = op_map[float_node.op()]; | |
+ bool are_under_while_if = false; | |
+ if ( (float_node.name().find("/while/") != string::npos) || (float_node.name().find("/if/") != string::npos) ) { | |
+ are_under_while_if = true; | |
+ } | |
+ | |
DataTypeVector input_types; | |
DataTypeVector output_types; | |
TF_RETURN_IF_ERROR( | |
@@ -723,10 +728,15 @@ Status QuantizeNodes(const GraphDef& input_graph_def, | |
string unique_input_name = | |
namespace_prefix + "/" + UniqueNodeNameFromInput(input_name); | |
+ fprintf(stderr, "float_node.op()=%s input_name=%s are_under_while_if=%d\n", float_node.op().c_str(), input_name.c_str(), are_under_while_if); | |
+ | |
// Add some common constants we need for reshaping inputs. | |
NodeDef reshape_dims; | |
reshape_dims.set_op("Const"); | |
reshape_dims.set_name(unique_input_name + "/reshape_dims"); | |
+ if (are_under_while_if) { | |
+ AddNodeInput("^" + input_name, &reshape_dims); | |
+ } | |
SetNodeAttr("dtype", DT_INT32, &reshape_dims); | |
Tensor reshape_dims_tensor(DT_INT32, {1}); | |
reshape_dims_tensor.flat<int32>()(0) = -1; | |
@@ -736,6 +746,9 @@ Status QuantizeNodes(const GraphDef& input_graph_def, | |
NodeDef reduction_dims; | |
reduction_dims.set_op("Const"); | |
reduction_dims.set_name(unique_input_name + "/reduction_dims"); | |
+ if (are_under_while_if) { | |
+ AddNodeInput("^" + input_name, &reduction_dims); | |
+ } | |
SetNodeAttr("dtype", DT_INT32, &reduction_dims); | |
Tensor reduction_dims_tensor(DT_INT32, {1}); | |
reduction_dims_tensor.flat<int32>()(0) = 0; | |
diff --git a/tools/arm_compiler/BUILD b/tools/arm_compiler/BUILD | |
index 92699753f..58fa200b4 100644 | |
--- a/tools/arm_compiler/BUILD | |
+++ b/tools/arm_compiler/BUILD | |
@@ -13,6 +13,7 @@ cc_toolchain_suite( | |
"linaro-armeabi|gcc": ":cc-compiler-linaro-armeabi", | |
"linaro64-armeabi|gcc": ":cc-compiler-linaro64-armeabi", | |
"rpi-armeabi|gcc": ":cc-compiler-rpi-armeabi", | |
+ "rpi3-aarch64|gcc": ":cc-compiler-rpi3-aarch64", | |
}, | |
) | |
@@ -146,3 +147,18 @@ cc_toolchain( | |
supports_param_files = 1, | |
visibility = ["//visibility:public"], | |
) | |
+ | |
+cc_toolchain( | |
+ name = "cc-compiler-rpi3-aarch64", | |
+ all_files = ":gcc_linux_all_files", | |
+ compiler_files = ":gcc_linux_compiler_files", | |
+ cpu = "gcc-aarch64", | |
+ dwp_files = ":empty", | |
+ dynamic_runtime_libs = [":empty"], | |
+ linker_files = ":gcc_linux_linker_files", | |
+ objcopy_files = "//tools/arm_compiler/gcc_arm_rpi:objcopy", | |
+ static_runtime_libs = [":empty"], | |
+ strip_files = "//tools/arm_compiler/gcc_arm_rpi:strip", | |
+ supports_param_files = 1, | |
+ visibility = ["//visibility:public"], | |
+) | |
diff --git a/tools/arm_compiler/CROSSTOOL b/tools/arm_compiler/CROSSTOOL | |
index e8f855b8a..c09182065 100644 | |
--- a/tools/arm_compiler/CROSSTOOL | |
+++ b/tools/arm_compiler/CROSSTOOL | |
@@ -17,6 +17,11 @@ default_toolchain { | |
toolchain_identifier: "gcc_rpi_linux_armhf" | |
} | |
+default_toolchain { | |
+ cpu: "rpi3-aarch64" | |
+ toolchain_identifier: "gcc_rpi3_linux_aarch64" | |
+} | |
+ | |
toolchain { | |
abi_version: "armeabi" | |
abi_libc_version: "glibc_2.13" | |
@@ -352,8 +357,6 @@ toolchain { | |
linker_flag: "-Wl,--gc-sections" | |
} | |
} | |
- | |
- | |
toolchain { | |
abi_version: "armeabi" | |
abi_libc_version: "glibc_2.19" | |
@@ -412,6 +415,8 @@ toolchain { | |
compiler_flag: "external/GccArmRpi/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/arm-linux-gnueabihf/sysroot/usr/include/arm-linux-gnueabihf" | |
compiler_flag: "-isystem" | |
compiler_flag: "external/GccArmRpi/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/lib/gcc/arm-linux-gnueabihf/4.9.3/include" | |
+ compiler_flag: "-isystem" | |
+ compiler_flag: "DEEPSPEECH_ROOT/multistrap-raspbian-jessie/usr/include/" | |
cxx_flag: "-std=c++11" | |
cxx_flag: "-isystem" | |
@@ -433,6 +438,7 @@ toolchain { | |
cxx_builtin_include_directory: "%package(@GccArmRpi//arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/lib/gcc/arm-linux-gnueabihf/4.9.3/include)%" | |
cxx_builtin_include_directory: "%package(@GccArmRpi//arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/lib/gcc/arm-linux-gnueabihf/4.9.3/include-fixed)%" | |
cxx_builtin_include_directory: "%package(@GccArmRpi//arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/arm-linux-gnueabihf/include)%/c++/4.9.3" | |
+ cxx_builtin_include_directory: "DEEPSPEECH_ROOT/multistrap-raspbian-jessie/usr/include/" | |
# Anticipated future default. | |
# This makes GCC and Clang do what we want when called through symlinks. | |
@@ -473,6 +479,7 @@ toolchain { | |
linker_flag: "-Lexternal/GccArmRpi/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/arm-linux-gnueabihf/sysroot/lib" | |
linker_flag: "-Lexternal/GccArmRpi/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/arm-linux-gnueabihf/sysroot/usr/lib" | |
linker_flag: "-Bexternal/GccArmRpi/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/arm-linux-gnueabihf/bin" | |
+ linker_flag: "-LDEEPSPEECH_ROOT/multistrap-raspbian-jessie/usr/lib" | |
linker_flag: "-pie" | |
linker_flag: "-lstdc++" | |
# linker_flag: "-lm" | |
@@ -497,7 +504,7 @@ toolchain { | |
# Maybe we should enable https://gcc.gnu.org/wiki/DebugFission for opt or | |
# even generally? However, that can't happen here, as it requires special | |
# handling in Bazel. | |
- compiler_flag: "-g0" | |
+ compiler_flag: "-g" | |
# Disable assertions | |
compiler_flag: "-DNDEBUG" | |
@@ -530,3 +537,155 @@ toolchain { | |
} | |
linking_mode_flags { mode: DYNAMIC } | |
} | |
+ | |
+toolchain { | |
+ abi_version: "aarch64" | |
+ abi_libc_version: "glibc_2.19" | |
+ builtin_sysroot: "" | |
+ compiler: "gcc" | |
+ host_system_name: "aarch64" | |
+ needsPic: true | |
+ supports_gold_linker: false | |
+ supports_incremental_linker: false | |
+ supports_fission: false | |
+ supports_interface_shared_objects: false | |
+ supports_normalizing_ar: false | |
+ supports_start_end_lib: false | |
+ supports_thin_archives: false | |
+ target_libc: "glibc_2.19" | |
+ target_cpu: "rpi3-aarch64" | |
+ target_system_name: "arm64-v8a" | |
+ toolchain_identifier: "gcc_rpi3_linux_aarch64" | |
+ | |
+ tool_path { name: "ar" path: "/usr/bin/aarch64-linux-gnu-ar" } | |
+ tool_path { name: "compat-ld" path: "/usr/bin/aarch64-linux-gnu-ld" } | |
+ tool_path { name: "cpp" path: "/usr/bin/aarch64-linux-gnu-cpp" } | |
+ tool_path { name: "dwp" path: "/usr/bin/aarch64-linux-gnu-dwp" } | |
+ tool_path { name: "gcc" path: "/usr/bin/aarch64-linux-gnu-gcc" } | |
+ tool_path { name: "gcov" path: "/usr/bin/aarch64-linux-gnu-gcov" } | |
+ # C(++) compiles invoke the compiler (as that is the one knowing where | |
+ # to find libraries), but we provide LD so other rules can invoke the linker. | |
+ tool_path { name: "ld" path: "/usr/bin/aarch64-linux-gnu-ld" } | |
+ tool_path { name: "nm" path: "/usr/bin/aarch64-linux-gnu-nm" } | |
+ tool_path { name: "objcopy" path: "/usr/bin/aarch64-linux-gnu-objcopy" } | |
+ objcopy_embed_flag: "-I" | |
+ objcopy_embed_flag: "binary" | |
+ tool_path { name: "objdump" path: "/usr/bin/aarch64-linux-gnu-objdump" } | |
+ tool_path { name: "strip" path: "/usr/bin/aarch64-linux-gnu-strip" } | |
+ | |
+ compiler_flag: "-march=armv8-a+crc" | |
+ #compiler_flag: "--sysroot=external/GccArmRpi/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/arm-linux-gnueabihf/sysroot" | |
+ compiler_flag: "--sysroot=DEEPSPEECH_ROOT/multistrap-debian_arm64-sid/" | |
+ #compiler_flag: "-Wl,--sysroot=external/GccArmRpi/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/arm-linux-gnueabihf/sysroot" | |
+ #compiler_flag: "-mfloat-abi=hard" | |
+ # Local change to disable IS_MOBILE_PLATFORM | |
+ compiler_flag: "-D__ARM_RPI__" | |
+ # compiler_flag: "-nostdinc" | |
+ # compiler_flag: "-isystem" | |
+ cxx_flag: "-std=c++11" | |
+ cxx_builtin_include_directory: "DEEPSPEECH_ROOT/multistrap-debian_arm64-sid/usr/include/" | |
+ | |
+ # Anticipated future default. | |
+ # This makes GCC and Clang do what we want when called through symlinks. | |
+ unfiltered_cxx_flag: "-no-canonical-prefixes" | |
+ | |
+ # Make C++ compilation deterministic. Use linkstamping instead of these | |
+ # compiler symbols. | |
+ unfiltered_cxx_flag: "-Wno-builtin-macro-redefined" | |
+ unfiltered_cxx_flag: "-D__DATE__=\"redacted\"" | |
+ unfiltered_cxx_flag: "-D__TIMESTAMP__=\"redacted\"" | |
+ unfiltered_cxx_flag: "-D__TIME__=\"redacted\"" | |
+ | |
+ # Security hardening on by default. | |
+ compiler_flag: "-fstack-protector" | |
+ compiler_flag: "-fPIE" | |
+ # All warnings are enabled. Maybe enable -Werror as well? | |
+ compiler_flag: "-Wall" | |
+ # Enable a few more warnings that aren't part of -Wall. | |
+ compiler_flag: "-Wunused-but-set-parameter" | |
+ # But disable some that are problematic. | |
+ compiler_flag: "-Wno-free-nonheap-object" # has false positives | |
+ # Keep stack frames for debugging, even in opt mode. | |
+ compiler_flag: "-fno-omit-frame-pointer" | |
+ # Enable coloring even if there's no attached terminal. Bazel removes the | |
+ # escape sequences if --nocolor is specified. | |
+ compiler_flag: "-fdiagnostics-color=always" | |
+ | |
+ # compiler_flag: "-D__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1" | |
+ # compiler_flag: "-D__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2" | |
+ # compiler_flag: "-D__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4" | |
+ # compiler_flag: "-D__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8" | |
+ | |
+ cxx_builtin_include_directory: "/usr/aarch64-linux-gnu/include/" | |
+ cxx_builtin_include_directory: "/usr/lib/gcc-cross/aarch64-linux-gnu/6/include/" | |
+ cxx_builtin_include_directory: "/usr/lib/gcc-cross/aarch64-linux-gnu/6/include-fixed/" | |
+ | |
+ # linker_flag: "-target" | |
+ # linker_flag: "arm-linux-gnueabihf" | |
+ linker_flag: "--sysroot=DEEPSPEECH_ROOT/multistrap-debian_arm64-sid" | |
+ linker_flag: "-pass-exit-codes" | |
+ #linker_flag: "-Lexternal/GccArmRpi/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/arm-linux-gnueabihf/lib" | |
+ #linker_flag: "-Lexternal/GccArmRpi/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/arm-linux-gnueabihf/sysroot/lib" | |
+ #linker_flag: "-Lexternal/GccArmRpi/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/arm-linux-gnueabihf/sysroot/usr/lib" | |
+ #linker_flag: "-LDEEPSPEECH_ROOT/multistrap-debian_arm64-sid/lib" | |
+ #linker_flag: "-LDEEPSPEECH_ROOT/multistrap-debian_arm64-sid/usr/lib" | |
+ #linker_flag: "-Bexternal/GccArmRpi/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/arm-linux-gnueabihf/bin" | |
+ linker_flag: "-pie" | |
+ linker_flag: "-lstdc++" | |
+ # linker_flag: "-lm" | |
+ # linker_flag: "-lpthread" | |
+ linker_flag: "-Wl,--dynamic-linker=/lib/ld-linux-armhf.so.3" | |
+ linker_flag: "-Wl,-no-as-needed" | |
+ linker_flag: "-Wl,-z,relro,-z,now" | |
+ linker_flag: "-no-canonical-prefixes" | |
+ # Stamp the binary with a unique identifier. | |
+ linker_flag: "-Wl,--build-id=md5" | |
+ linker_flag: "-Wl,--hash-style=gnu" | |
+ | |
+ compilation_mode_flags { | |
+ mode: DBG | |
+ # Enable debug symbols. | |
+ compiler_flag: "-g" | |
+ } | |
+ compilation_mode_flags { | |
+ mode: OPT | |
+ | |
+ # No debug symbols. | |
+ # Maybe we should enable https://gcc.gnu.org/wiki/DebugFission for opt or | |
+ # even generally? However, that can't happen here, as it requires special | |
+ # handling in Bazel. | |
+ compiler_flag: "-g" | |
+ | |
+ # Disable assertions | |
+ compiler_flag: "-DNDEBUG" | |
+ | |
+ # Removal of unused code and data at link time (can this increase binary size in some cases?). | |
+ compiler_flag: "-ffunction-sections" | |
+ compiler_flag: "-fdata-sections" | |
+ linker_flag: "-Wl,--gc-sections" | |
+ | |
+ # Conservative choice for -O | |
+ # -O3 can increase binary size and even slow down the resulting binaries. | |
+ # Profile first and / or use FDO if you need better performance than this. | |
+ compiler_flag: "-O2" | |
+ | |
+ # Conservative choice; -D_FORTIFY_SOURCE=2 may be unsafe in some cases. | |
+ # We need to undef it before redefining it as some distributions now have | |
+ # it enabled by default. | |
+ # Also depends on -O. | |
+ compiler_flag: "-U_FORTIFY_SOURCE" | |
+ compiler_flag: "-D_FORTIFY_SOURCE=1" | |
+ | |
+ # Basic optims for RPi3, from Gentoo/ArchLinux Wiki | |
+ compiler_flag: "-mtune=cortex-a53" | |
+ #compiler_flag: "-mfpu=crypto-neon-fp-armv8" | |
+ #compiler_flag: "-mfloat-abi=hard" | |
+ # Inference time on small frozen LDC93S1 model goes from 30s to 20s with | |
+ # that flag, but generates instructions that valgrind chokes on. | |
+ #compiler_flag: "-mfpu=neon-fp-armv8" | |
+ #compiler_flag: "-funsafe-math-optimizations" | |
+ compiler_flag: "-ftree-vectorize" | |
+ compiler_flag: "-pipe" | |
+ } | |
+ linking_mode_flags { mode: DYNAMIC } | |
+} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
thanks for help !