KellenSunderland · August 19, 2023 16:12
diff --git a/arm.crosscompile.mk b/arm.crosscompile.mk
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 #-------------------------------------------------------------------------------
 #  Template configuration for compiling mxnet
 #
 #  If you want to change the configuration, please use the following
 #  steps. Assume you are on the root directory of mxnet. First copy the this
 #  file so that any local changes will be ignored by git
 #
 #  $ cp make/config.mk .
 #
 #  Next modify the according entries, and then compile by
 #
 #  $ make
 #
 #  or build in parallel with 8 threads
 #
 #  $ make -j8
 #-------------------------------------------------------------------------------

 #---------------------
 # We do not assign compilers here.  Often when cross-compiling these will already 
 # be set correctly.
 #--------------------

 export NVCC = nvcc

 # whether compile with options for MXNet developer
 DEV = 0

 # whether compile with debug
 DEBUG = 0

 # whether compiler with profiler
 USE_PROFILER =

 # the additional link flags you want to add
 # TODO: Move flags here
 ADD_LDFLAGS=-static-libstdc++

 # the additional compile flags you want to add
 ADD_CFLAGS =

 #---------------------------------------------
 # matrix computation libraries for CPU/GPU
 #---------------------------------------------

 # whether use CUDA during compile
 USE_CUDA = 0

 # add the path to CUDA library to link and compile flag
 # if you have already add them to environment variable, leave it as NONE
 # USE_CUDA_PATH = /usr/local/cuda
 USE_CUDA_PATH = NONE

 # whether use CuDNN R3 library
 USE_CUDNN = 0

 # whether use cuda runtime compiling for writing kernels in native language (i.e. Python)
 USE_NVRTC = 0

 # whether use opencv during compilation
 # you can disable it, however, you will not able to use
 # imbin iterator
 USE_OPENCV = 0

 # use openmp for parallelization
 USE_OPENMP = 1

 # MKL ML Library for Intel CPU/Xeon Phi
 # Please refer to MKL_README.md for details

 # MKL ML Library folder, need to be root for /usr/local
 # Change to User Home directory for standard user
 # For USE_BLAS!=mkl only
 MKLML_ROOT=/usr/local

 # whether use MKL2017 library
 USE_MKL2017 = 0

 # whether use MKL2017 experimental feature for high performance
 # Prerequisite USE_MKL2017=1
 USE_MKL2017_EXPERIMENTAL = 0

 # whether use NNPACK library
 USE_NNPACK = 0

 # For arm builds we're using openblas
 USE_BLAS = openblas

 # whether use lapack during compilation
 # only effective when compiled with blas versions openblas/apple/atlas/mkl
 USE_LAPACK = 1

 # path to lapack library in case of a non-standard installation
 USE_LAPACK_PATH =

 # add path to intel library, you may need it for MKL, if you did not add the path
 # to environment variable
 USE_INTEL_PATH = NONE

 # If use MKL only for BLAS, choose static link automatically to allow python wrapper
 ifeq ($(USE_MKL2017), 0)
 ifeq ($(USE_BLAS), mkl)
 USE_STATIC_MKL = 1
 endif
 else
 USE_STATIC_MKL = NONE
 endif

 #----------------------------
 # distributed computing
 #----------------------------

 # whether or not to enable multi-machine supporting
 USE_DIST_KVSTORE = 0

 # whether or not allow to read and write HDFS directly. If yes, then hadoop is
 # required
 USE_HDFS = 0

 # path to libjvm.so. required if USE_HDFS=1
 LIBJVM=$(JAVA_HOME)/jre/lib/amd64/server

 # whether or not allow to read and write AWS S3 directly. If yes, then
 # libcurl4-openssl-dev is required, it can be installed on Ubuntu by
 # sudo apt-get install -y libcurl4-openssl-dev
 USE_S3 = 0

 #----------------------------
 # additional operators
 #----------------------------

 # path to folders containing projects specific operators that you don't want to put in src/operators
 EXTRA_OPERATORS =

 #----------------------------
 # other features
 #----------------------------

 # Create C++ interface package
 USE_CPP_PACKAGE = 0

 #----------------------------
 # plugins
 #----------------------------

 # whether to use caffe integration. This requires installing caffe.
 # You also need to add CAFFE_PATH/build/lib to your LD_LIBRARY_PATH
 # CAFFE_PATH = $(HOME)/caffe
 # MXNET_PLUGINS += plugin/caffe/caffe.mk

 # whether to use torch integration. This requires installing torch.
 # You also need to add TORCH_PATH/install/lib to your LD_LIBRARY_PATH
 # TORCH_PATH = $(HOME)/torch
 # MXNET_PLUGINS += plugin/torch/torch.mk

 # WARPCTC_PATH = $(HOME)/warp-ctc
 # MXNET_PLUGINS += plugin/warpctc/warpctc.mk

 # whether to use sframe integration. This requires build sframe
 # [email protected]:dato-code/SFrame.git
 # SFRAME_PATH = $(HOME)/SFrame
 # MXNET_PLUGINS += plugin/sframe/plugin.mk
diff --git a/Dockerfile.build.master.jetson b/Dockerfile.build.master.jetson
 # -*- mode: dockerfile -*-
 # Work in progress, some of the manual steps below will be fixed in a subsequent release.
 # Dockerfile to build libmxnet.so, and a python wheel for the Jetson TX1 and TX2
 # Builds from Github MXNet master branch
 # Once complete copy artifacts from /work/build to target device.
 # Install by running 'pip wheel name_of_wheel.whl' and copying the .so to a folder on your LD_LIBRARY_PATH

 FROM nvidia/cuda:8.0-cudnn5-devel as cudabuilder

 FROM dockcross/linux-arm64

 ENV ARCH aarch64
 ENV NVCCFLAGS "-m64"
 ENV CUDA_ARCH "-gencode arch=compute_53,code=sm_53 -gencode arch=compute_62,code=sm_62"
 ENV BUILD_OPTS "USE_OPENCV=0 USE_BLAS=openblas USE_SSE=0 USE_CUDA=1 USE_CUDNN=1 ENABLE_CUDA_RTC=0 USE_NCCL=0 USE_CUDA_PATH=/usr/local/cuda/"
 ENV CC /usr/bin/aarch64-linux-gnu-gcc
 ENV CXX /usr/bin/aarch64-linux-gnu-g++
 ENV FC /usr/bin/aarch64-linux-gnu-gfortran-4.9
 ENV HOSTCC gcc

 WORKDIR /work

 # Build OpenBLAS
 ADD https://api.github.com/repos/xianyi/OpenBLAS/git/refs/heads/master /tmp/openblas_version.json
 RUN git clone https://github.com/xianyi/OpenBLAS.git && \
    cd OpenBLAS && \
    make -j$(nproc) TARGET=ARMV8 && \
    make install && \
    ln -s /opt/OpenBLAS/lib/libopenblas.so /usr/lib/libopenblas.so && \
    ln -s /opt/OpenBLAS/lib/libopenblas.a /usr/lib/libopenblas.a && \
    ln -s /opt/OpenBLAS/lib/libopenblas.a /usr/lib/liblapack.a

 ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/opt/OpenBLAS/lib
 ENV CPLUS_INCLUDE_PATH /opt/OpenBLAS/include

 # Setup CUDA build env (including configuring and copying nvcc)
 COPY --from=cudabuilder /usr/local/cuda /usr/local/cuda
 COPY --from=cudabuilder /usr/include/cudnn.h /usr/include/cudnn.h
 ENV PATH $PATH:/usr/local/cuda/bin
 ENV TARGET_ARCH aarch64
 ENV TARGET_OS linux

 # Install ARM depedencies based on Jetpack 3.1
 RUN wget http://developer.download.nvidia.com/devzone/devcenter/mobile/jetpack_l4t/013/linux-x64/cuda-repo-l4t-8-0-local_8.0.84-1_arm64.deb && \
    wget http://developer.download.nvidia.com/devzone/devcenter/mobile/jetpack_l4t/013/linux-x64/libcudnn6_6.0.21-1+cuda8.0_arm64.deb && \
    dpkg -i cuda-repo-l4t-8-0-local_8.0.84-1_arm64.deb && \
    dpkg -i libcudnn6_6.0.21-1+cuda8.0_arm64.deb && \
    apt update -y && \
    apt install cuda-cudart-cross-aarch64-8-0 cuda-cublas-cross-aarch64-8-0 \
    cuda-nvml-cross-aarch64-8-0 cuda-nvrtc-cross-aarch64-8-0 cuda-cufft-cross-aarch64-8-0 \
    cuda-curand-cross-aarch64-8-0 cuda-cusolver-cross-aarch64-8-0 cuda-cusparse-cross-aarch64-8-0 \
    cuda-misc-headers-cross-aarch64-8-0 cuda-npp-cross-aarch64-8-0 libcudnn6 -y && \
    cp /usr/local/cuda-8.0/targets/aarch64-linux/lib/*.so /usr/local/cuda/lib64/ && \
    cp /usr/local/cuda-8.0/targets/aarch64-linux/lib/stubs/*.so /usr/local/cuda/lib64/stubs/ && \
    cp /usr/lib/aarch64-linux-gnu/libcudnn.* /usr/local/cuda/lib64/ && \
    ln -s /usr/local/cuda/lib64/libcudnn.so.6 /usr/local/cuda/lib64/libcudnn.so && \
    cp -r /usr/local/cuda-8.0/targets/aarch64-linux/include/ /usr/local/cuda/include/ && \
    rm cuda-repo-l4t-8-0-local_8.0.84-1_arm64.deb && rm libcudnn6_6.0.21-1+cuda8.0_arm64.deb

 # Build MXNet
 RUN git clone --recurse https://github.com/apache/incubator-mxnet.git mxnet

 WORKDIR /work/mxnet

 # Add ARM specific settings
 ADD arm.crosscompile.mk make/config.mk

 # Build and link
 RUN make -j$(nproc) $BUILD_OPTS

 # Create a binary wheel for easy installation.
 # When using tool.py output will be in the jetson folder.
 # Scp the .whl file to your target device, and install via
 # pip install
 WORKDIR /work/mxnet/python
 RUN python setup.py  bdist_wheel --universal

 # Copy build artifacts to output folder for tool.py script
 RUN mkdir -p /work/build & cp dist/*.whl /work/build && cp ../lib/* /work/build
diff --git a/Instructions b/Instructions
 Note: requires a newish (~last 6 months) version of docker.

 HOST:
 wget https://gist.githubusercontent.com/KellenSunderland/659f31c283a1ad2c04e9852eabed111c/raw/18f2fd0dc6f5539d94699de41a31b666dc432f9a/arm.crosscompile.mk
 wget https://gist.githubusercontent.com/KellenSunderland/659f31c283a1ad2c04e9852eabed111c/raw/18f2fd0dc6f5539d94699de41a31b666dc432f9a/Dockerfile.build.master.jetson
 docker build -f Dockerfile.build.master.jetson -t mxnet_jetson .
 docker run --rm -v $(pwd)/build:/tmp mxnet_jetson:latest sh -c "cp /work/build/* /tmp"

 Copy build artifacts to device.
 DEVICE:

 pip wheel mxnet-1.0.1-py2.py3-none-any.whl
 cp libmxnet.so /usr/lib64  (or some folder on the LD_LIBRARY_PATH or site package folder for mxnet.  This step will be fixed soon.)
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	#-------------------------------------------------------------------------------
	# Template configuration for compiling mxnet
	#
	# If you want to change the configuration, please use the following
	# steps. Assume you are on the root directory of mxnet. First copy the this
	# file so that any local changes will be ignored by git
	#
	# $ cp make/config.mk .
	#
	# Next modify the according entries, and then compile by
	#
	# $ make
	#
	# or build in parallel with 8 threads
	#
	# $ make -j8
	#-------------------------------------------------------------------------------

	#---------------------
	# We do not assign compilers here. Often when cross-compiling these will already
	# be set correctly.
	#--------------------

	export NVCC = nvcc

	# whether compile with options for MXNet developer
	DEV = 0

	# whether compile with debug
	DEBUG = 0

	# whether compiler with profiler
	USE_PROFILER =

	# the additional link flags you want to add
	# TODO: Move flags here
	ADD_LDFLAGS=-static-libstdc++

	# the additional compile flags you want to add
	ADD_CFLAGS =

	#---------------------------------------------
	# matrix computation libraries for CPU/GPU
	#---------------------------------------------

	# whether use CUDA during compile
	USE_CUDA = 0

	# add the path to CUDA library to link and compile flag
	# if you have already add them to environment variable, leave it as NONE
	# USE_CUDA_PATH = /usr/local/cuda
	USE_CUDA_PATH = NONE

	# whether use CuDNN R3 library
	USE_CUDNN = 0

	# whether use cuda runtime compiling for writing kernels in native language (i.e. Python)
	USE_NVRTC = 0

	# whether use opencv during compilation
	# you can disable it, however, you will not able to use
	# imbin iterator
	USE_OPENCV = 0

	# use openmp for parallelization
	USE_OPENMP = 1

	# MKL ML Library for Intel CPU/Xeon Phi
	# Please refer to MKL_README.md for details

	# MKL ML Library folder, need to be root for /usr/local
	# Change to User Home directory for standard user
	# For USE_BLAS!=mkl only
	MKLML_ROOT=/usr/local

	# whether use MKL2017 library
	USE_MKL2017 = 0

	# whether use MKL2017 experimental feature for high performance
	# Prerequisite USE_MKL2017=1
	USE_MKL2017_EXPERIMENTAL = 0

	# whether use NNPACK library
	USE_NNPACK = 0

	# For arm builds we're using openblas
	USE_BLAS = openblas

	# whether use lapack during compilation
	# only effective when compiled with blas versions openblas/apple/atlas/mkl
	USE_LAPACK = 1

	# path to lapack library in case of a non-standard installation
	USE_LAPACK_PATH =

	# add path to intel library, you may need it for MKL, if you did not add the path
	# to environment variable
	USE_INTEL_PATH = NONE

	# If use MKL only for BLAS, choose static link automatically to allow python wrapper
	ifeq ($(USE_MKL2017), 0)
	ifeq ($(USE_BLAS), mkl)
	USE_STATIC_MKL = 1
	endif
	else
	USE_STATIC_MKL = NONE
	endif

	#----------------------------
	# distributed computing
	#----------------------------

	# whether or not to enable multi-machine supporting
	USE_DIST_KVSTORE = 0

	# whether or not allow to read and write HDFS directly. If yes, then hadoop is
	# required
	USE_HDFS = 0

	# path to libjvm.so. required if USE_HDFS=1
	LIBJVM=$(JAVA_HOME)/jre/lib/amd64/server

	# whether or not allow to read and write AWS S3 directly. If yes, then
	# libcurl4-openssl-dev is required, it can be installed on Ubuntu by
	# sudo apt-get install -y libcurl4-openssl-dev
	USE_S3 = 0

	#----------------------------
	# additional operators
	#----------------------------

	# path to folders containing projects specific operators that you don't want to put in src/operators
	EXTRA_OPERATORS =

	#----------------------------
	# other features
	#----------------------------

	# Create C++ interface package
	USE_CPP_PACKAGE = 0

	#----------------------------
	# plugins
	#----------------------------

	# whether to use caffe integration. This requires installing caffe.
	# You also need to add CAFFE_PATH/build/lib to your LD_LIBRARY_PATH
	# CAFFE_PATH = $(HOME)/caffe
	# MXNET_PLUGINS += plugin/caffe/caffe.mk

	# whether to use torch integration. This requires installing torch.
	# You also need to add TORCH_PATH/install/lib to your LD_LIBRARY_PATH
	# TORCH_PATH = $(HOME)/torch
	# MXNET_PLUGINS += plugin/torch/torch.mk

	# WARPCTC_PATH = $(HOME)/warp-ctc
	# MXNET_PLUGINS += plugin/warpctc/warpctc.mk

	# whether to use sframe integration. This requires build sframe
	# [email protected]:dato-code/SFrame.git
	# SFRAME_PATH = $(HOME)/SFrame
	# MXNET_PLUGINS += plugin/sframe/plugin.mk
	# -- mode: dockerfile --
	# Work in progress, some of the manual steps below will be fixed in a subsequent release.
	# Dockerfile to build libmxnet.so, and a python wheel for the Jetson TX1 and TX2
	# Builds from Github MXNet master branch
	# Once complete copy artifacts from /work/build to target device.
	# Install by running 'pip wheel name_of_wheel.whl' and copying the .so to a folder on your LD_LIBRARY_PATH

	FROM nvidia/cuda:8.0-cudnn5-devel as cudabuilder

	FROM dockcross/linux-arm64

	ENV ARCH aarch64
	ENV NVCCFLAGS "-m64"
	ENV CUDA_ARCH "-gencode arch=compute_53,code=sm_53 -gencode arch=compute_62,code=sm_62"
	ENV BUILD_OPTS "USE_OPENCV=0 USE_BLAS=openblas USE_SSE=0 USE_CUDA=1 USE_CUDNN=1 ENABLE_CUDA_RTC=0 USE_NCCL=0 USE_CUDA_PATH=/usr/local/cuda/"
	ENV CC /usr/bin/aarch64-linux-gnu-gcc
	ENV CXX /usr/bin/aarch64-linux-gnu-g++
	ENV FC /usr/bin/aarch64-linux-gnu-gfortran-4.9
	ENV HOSTCC gcc

	WORKDIR /work

	# Build OpenBLAS
	ADD https://api.github.com/repos/xianyi/OpenBLAS/git/refs/heads/master /tmp/openblas_version.json
	RUN git clone https://github.com/xianyi/OpenBLAS.git && \
	cd OpenBLAS && \
	make -j$(nproc) TARGET=ARMV8 && \
	make install && \
	ln -s /opt/OpenBLAS/lib/libopenblas.so /usr/lib/libopenblas.so && \
	ln -s /opt/OpenBLAS/lib/libopenblas.a /usr/lib/libopenblas.a && \
	ln -s /opt/OpenBLAS/lib/libopenblas.a /usr/lib/liblapack.a

	ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/opt/OpenBLAS/lib
	ENV CPLUS_INCLUDE_PATH /opt/OpenBLAS/include

	# Setup CUDA build env (including configuring and copying nvcc)
	COPY --from=cudabuilder /usr/local/cuda /usr/local/cuda
	COPY --from=cudabuilder /usr/include/cudnn.h /usr/include/cudnn.h
	ENV PATH $PATH:/usr/local/cuda/bin
	ENV TARGET_ARCH aarch64
	ENV TARGET_OS linux

	# Install ARM depedencies based on Jetpack 3.1
	RUN wget http://developer.download.nvidia.com/devzone/devcenter/mobile/jetpack_l4t/013/linux-x64/cuda-repo-l4t-8-0-local_8.0.84-1_arm64.deb && \
	wget http://developer.download.nvidia.com/devzone/devcenter/mobile/jetpack_l4t/013/linux-x64/libcudnn6_6.0.21-1+cuda8.0_arm64.deb && \
	dpkg -i cuda-repo-l4t-8-0-local_8.0.84-1_arm64.deb && \
	dpkg -i libcudnn6_6.0.21-1+cuda8.0_arm64.deb && \
	apt update -y && \
	apt install cuda-cudart-cross-aarch64-8-0 cuda-cublas-cross-aarch64-8-0 \
	cuda-nvml-cross-aarch64-8-0 cuda-nvrtc-cross-aarch64-8-0 cuda-cufft-cross-aarch64-8-0 \
	cuda-curand-cross-aarch64-8-0 cuda-cusolver-cross-aarch64-8-0 cuda-cusparse-cross-aarch64-8-0 \
	cuda-misc-headers-cross-aarch64-8-0 cuda-npp-cross-aarch64-8-0 libcudnn6 -y && \
	cp /usr/local/cuda-8.0/targets/aarch64-linux/lib/*.so /usr/local/cuda/lib64/ && \
	cp /usr/local/cuda-8.0/targets/aarch64-linux/lib/stubs/*.so /usr/local/cuda/lib64/stubs/ && \
	cp /usr/lib/aarch64-linux-gnu/libcudnn.* /usr/local/cuda/lib64/ && \
	ln -s /usr/local/cuda/lib64/libcudnn.so.6 /usr/local/cuda/lib64/libcudnn.so && \
	cp -r /usr/local/cuda-8.0/targets/aarch64-linux/include/ /usr/local/cuda/include/ && \
	rm cuda-repo-l4t-8-0-local_8.0.84-1_arm64.deb && rm libcudnn6_6.0.21-1+cuda8.0_arm64.deb

	# Build MXNet
	RUN git clone --recurse https://github.com/apache/incubator-mxnet.git mxnet

	WORKDIR /work/mxnet

	# Add ARM specific settings
	ADD arm.crosscompile.mk make/config.mk

	# Build and link
	RUN make -j$(nproc) $BUILD_OPTS

	# Create a binary wheel for easy installation.
	# When using tool.py output will be in the jetson folder.
	# Scp the .whl file to your target device, and install via
	# pip install
	WORKDIR /work/mxnet/python
	RUN python setup.py bdist_wheel --universal

	# Copy build artifacts to output folder for tool.py script
	RUN mkdir -p /work/build & cp dist/.whl /work/build && cp ../lib/ /work/build
	Note: requires a newish (~last 6 months) version of docker.

	HOST:
	wget https://gist.githubusercontent.com/KellenSunderland/659f31c283a1ad2c04e9852eabed111c/raw/18f2fd0dc6f5539d94699de41a31b666dc432f9a/arm.crosscompile.mk
	wget https://gist.githubusercontent.com/KellenSunderland/659f31c283a1ad2c04e9852eabed111c/raw/18f2fd0dc6f5539d94699de41a31b666dc432f9a/Dockerfile.build.master.jetson
	docker build -f Dockerfile.build.master.jetson -t mxnet_jetson .
	docker run --rm -v $(pwd)/build:/tmp mxnet_jetson:latest sh -c "cp /work/build/* /tmp"

	Copy build artifacts to device.
	DEVICE:

	pip wheel mxnet-1.0.1-py2.py3-none-any.whl
	cp libmxnet.so /usr/lib64 (or some folder on the LD_LIBRARY_PATH or site package folder for mxnet. This step will be fixed soon.)