Skip to content

Instantly share code, notes, and snippets.

@mengdong
Last active November 10, 2021 02:01
Show Gist options
  • Save mengdong/12a3ff847033a4f662eb8ed593aad3be to your computer and use it in GitHub Desktop.
Save mengdong/12a3ff847033a4f662eb8ed593aad3be to your computer and use it in GitHub Desktop.
dockerfile for merlin nightly
# syntax=docker/dockerfile:1
ARG TRITON_VERSION=21.10-tf2-python
ARG IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3
FROM ${IMAGE}
ARG RMM_VER=vnightly
ARG CUDF_VER=vnightly
ARG HUGECTR_VER=vnightly
ARG NVTAB_VER=vnightly
ARG TF4REC_VER=vnightly
ARG RELEASE=false
ARG SM="70;75;80"
ENV CUDA_SHORT_VERSION=11.2
SHELL ["/bin/bash", "-c"]
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/lib:/repos/dist/lib
ENV DEBIAN_FRONTEND=noninteractive
ENV CUDA_HOME=/usr/local/cuda
ENV CUDA_PATH=$CUDA_HOME
ENV CUDA_CUDA_LIBRARY=${CUDA_HOME}/lib64/stubs
ENV PATH=${CUDA_HOME}/lib64/:${PATH}:${CUDA_HOME}/bin
# Build env variables for rmm
ENV INSTALL_PREFIX=/usr
RUN apt update -y --fix-missing && \
apt upgrade -y && \
apt install -y --no-install-recommends software-properties-common && \
add-apt-repository -y ppa:deadsnakes/ppa && \
apt update -y --fix-missing
RUN apt install -y --no-install-recommends \
git \
libboost-all-dev \
python3.8-dev \
build-essential \
autoconf \
bison \
flex \
libboost-serialization-dev \
libjemalloc-dev \
wget \
libssl-dev \
hwloc \
protobuf-compiler \
clang-format \
aptitude \
numactl \
libnuma-dev \
libaio-dev \
libibverbs-dev \
libtool && \
apt-get autoremove -y && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
update-alternatives --install /usr/bin/python python /usr/bin/python3.8 1 && \
wget https://bootstrap.pypa.io/get-pip.py && \
python get-pip.py && pip install pip==21.0.1
RUN pip uninstall protobuf -y; pip install protobuf
RUN pip install pip install git+git://github.com/gevent/[email protected]#egg=gevent
# Install cmake
RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null && \
apt-add-repository 'deb https://apt.kitware.com/ubuntu/ focal main' && \
apt-get update && \
apt-get install -y cmake
# Install spdlog from source
RUN git clone --branch v1.9.2 https://github.com/gabime/spdlog.git build-env && \
pushd build-env && \
mkdir build && cd build && cmake .. && make -j && make install && \
popd && \
rm -rf build-env
# Install arrow from source
ENV ARROW_HOME=/usr/local
RUN git clone --branch apache-arrow-5.0.0 --recurse-submodules https://github.com/apache/arrow.git build-env && \
pushd build-env && \
export PARQUET_TEST_DATA="${PWD}/cpp/submodules/parquet-testing/data" && \
export ARROW_TEST_DATA="${PWD}/testing/data" && \
pip install -r python/requirements-build.txt && \
mkdir cpp/release && \
pushd cpp/release && \
cmake -DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \
-DCMAKE_INSTALL_LIBDIR=lib \
-DCMAKE_LIBRARY_PATH=${CUDA_CUDA_LIBRARY} \
-DARROW_FLIGHT=ON \
-DARROW_GANDIVA=OFF \
-DARROW_ORC=ON \
-DARROW_WITH_BZ2=ON \
-DARROW_WITH_ZLIB=ON \
-DARROW_WITH_ZSTD=ON \
-DARROW_WITH_LZ4=ON \
-DARROW_WITH_SNAPPY=ON \
-DARROW_WITH_BROTLI=ON \
-DARROW_PARQUET=ON \
-DARROW_PYTHON=ON \
-DARROW_PLASMA=ON \
-DARROW_BUILD_TESTS=ON \
-DARROW_CUDA=ON \
-DARROW_DATASET=ON \
.. && \
make -j$(nproc) && \
make install && \
popd && \
pushd python && \
export PYARROW_WITH_PARQUET=ON && \
export PYARROW_WITH_CUDA=ON && \
export PYARROW_WITH_ORC=ON && \
export PYARROW_WITH_DATASET=ON && \
python setup.py build_ext --build-type=release bdist_wheel && \
pip install dist/*.whl && \
popd && \
popd && \
rm -rf build-env
# Install rmm from source
RUN git clone https://github.com/rapidsai/rmm.git build-env && cd build-env/ && \
if [ "$RELEASE" == "true" ] && [ ${RMM_VER} != "vnightly" ] ; then git fetch --all --tags && git checkout tags/${RMM_VER}; else git checkout branch-21.10; fi; \
sed -i '/11.2/ a "11.4": "11.x",' python/setup.py && \
cd ..; \
pushd build-env && \
./build.sh librmm && \
pip install python/. && \
popd && \
rm -rf build-env
# Build env for CUDF build
RUN git clone https://github.com/rapidsai/cudf.git build-env && cd build-env/ && \
if [ "$RELEASE" == "true" ] && [ ${CUDF_VER} != "vnightly" ] ; then git fetch --all --tags && git checkout tags/${CUDF_VER}; else git checkout branch-21.12; fi; \
git submodule update --init --recursive && \
cd .. && \
pushd build-env && \
export CUDF_HOME=${PWD} && \
export CUDF_ROOT=${PWD}/cpp/build/ && \
export CMAKE_LIBRARY_PATH=${CUDA_CUDA_LIBRARY} && \
./build.sh libcudf cudf dask_cudf --allgpuarch --cmake-args=\"-DCUDF_ENABLE_ARROW_S3=OFF\" && \
protoc -I=python/cudf/cudf/utils/metadata --python_out=/usr/local/lib/python3.8/dist-packages/cudf/utils/metadata python/cudf/cudf/utils/metadata/orc_column_statistics.proto && \
popd && \
rm -rf build-env
ENV PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION='python'
SHELL ["/bin/bash", "-c"]
RUN pip install cupy-cuda114 cachetools fastavro typing_extensions nvtx pandas==1.1.5
RUN pip install pybind11
RUN apt install -y rapidjson-dev
# need for testing fixtures must be available for nvt testing
RUN pip install botocore>=1.21.6
RUN git clone https://github.com/rapidsai/asvdb.git /repos/asvdb && cd /repos/asvdb && python setup.py install
# Install NVTabular
RUN git clone https://github.com/NVIDIA-Merlin/NVTabular.git /nvtabular/ && \
cd /nvtabular/; if [ "$RELEASE" == "true" ] && [ ${NVTAB_VER} != "vnightly" ] ; then git fetch --all --tags && git checkout tags/${NVTAB_VER}; else git checkout main; fi; \
python setup.py develop;
# Install Transformers4Rec
RUN git clone https://github.com/NVIDIA-Merlin/Transformers4Rec.git /transformers4rec && \
cd /transformers4rec/; if [ "$RELEASE" == "true" ] && [ ${TF4REC_VER} != "vnightly" ] ; then git fetch --all --tags && git checkout tags/${TF4REC_VER}; else git checkout main; fi; \
pip install -e .[tensorflow,pytorch,nvtabular]
# Install NVTabular Triton Backend
ARG TRITON_VERSION=21.10
RUN git clone https://github.com/NVIDIA-Merlin/nvtabular_triton_backend.git build-env && \
cd build-env && \
if [ "$RELEASE" == "true" ] && [ ${NVTAB_VER} != "vnightly" ] ; then git fetch --all --tags && git checkout tags/${NVTAB_VER}; else git checkout main; fi; \
cd .. && \
pushd build-env && \
mkdir build && \
cd build && \
cmake -Dpybind11_DIR=/usr/local/lib/python3.8/dist-packages/pybind11/share/cmake/pybind11 \
-D TRITON_COMMON_REPO_TAG="r$TRITON_VERSION" \
-D TRITON_CORE_REPO_TAG="r$TRITON_VERSION" \
-D TRITON_BACKEND_REPO_TAG="r$TRITON_VERSION" .. \
&& make -j 4 && \
mkdir /opt/tritonserver/backends/nvtabular && \
cp libtriton_nvtabular.so /opt/tritonserver/backends/nvtabular/ && \
popd && \
rm -rf build-env
RUN pip install dask==2021.09.1 distributed==2021.09.1 dask[dataframe]==2021.09.1 dask-cuda
RUN pip install pynvml pytest graphviz scikit-learn==0.24.2 scipy matplotlib dask-cuda
RUN pip install nvidia-pyindex; pip install tritonclient[all] grpcio-channelz
RUN apt update; apt install -y graphviz ;
#Hiredis
RUN mkdir -p /var/tmp && cd /var/tmp && git clone --depth=1 https://github.com/redis/hiredis.git && cd - && \
cd /var/tmp/hiredis && \
mkdir build && cd build && \
cmake .. && \
make -j$(nproc) && make install && \
rm -rf /var/tmp/hiredis
#RocksDB
RUN mkdir -p /var/tmp && cd /var/tmp && git clone --depth=1 https://github.com/facebook/rocksdb.git && cd - && \
cd /var/tmp/rocksdb && \
make -j$(nproc) shared_lib && \
make install-shared && \
rm -rf /var/tmp/rocksdb
ENV CPATH=/usr/local/include:$CPATH
# Install HugeCTR
RUN apt update -y && apt install rapidjson-dev -y
RUN git clone https://github.com/NVIDIA-Merlin/HugeCTR.git /repos/HugeCTR && \
cd /repos/HugeCTR && if [ "$RELEASE" == "true" ] && [ ${HUGECTR_VER} != "vnightly" ]; then git fetch --all --tags && git checkout tags/${HUGECTR_VER}; else git checkout master; fi && \
git submodule update --init --recursive && \
mkdir -p build && cd build &&\
cmake -DCMAKE_BUILD_TYPE=Release -DSM=$SM -DENABLE_INFERENCE=ON .. && \
make -j$(nproc) && make install && \
export CPATH=/usr/local/hugectr/include:$CPATH && \
export LIBRARY_PATH=/usr/local/hugectr/lib:$LIBRARY_PATH && \
git clone https://github.com/triton-inference-server/hugectr_backend /repos/hugectr_inference_backend && \
cd /repos/hugectr_inference_backend && if [ "$RELEASE" == "true" ] && [ ${HUGECTR_VER} != "vnightly" ] ; then git fetch --all --tags && git checkout tags/${HUGECTR_VER}; else git checkout main
; fi && \
mkdir -p build && cd build && \
cmake -DCMAKE_INSTALL_PREFIX:PATH=/usr/local/hugectr \
-DTRITON_COMMON_REPO_TAG="r$TRITON_VERSION" \
-DTRITON_CORE_REPO_TAG="r$TRITON_VERSION" \
-DTRITON_BACKEND_REPO_TAG="r$TRITON_VERSION" .. && \
make -j$(nproc) && make install
ENV CPATH=/usr/local/hugectr/include:$CPATH
ENV LIBRARY_PATH=/usr/local/hugectr/lib:$LIBRARY_PATH
ENV LD_LIBRARY_PATH=/usr/local/hugectr/lib:$LD_LIBRARY_PATH
ENV PATH=/usr/local/hugectr/bin:$PATH
RUN ln -s /usr/local/hugectr/backends/hugectr /opt/tritonserver/backends/
RUN rm -rf /repos
RUN pip install tqdm
RUN pip install numba numpy --upgrade
RUN echo $(du -h --max-depth=1 /)
HEALTHCHECK NONE
ENTRYPOINT []
CMD ["/bin/bash"]
# syntax=docker/dockerfile:1
ARG CUDA_VERSION=11.4.1
ARG CUDNN_VERSION=8
ARG IMAGE=nvcr.io/nvidia/tensorflow:21.10-tf2-py3
FROM ${IMAGE} AS phase1
ENV CUDA_SHORT_VERSION=11.4
ARG RELEASE=false
ARG RMM_VER=vnightly
ARG CUDF_VER=vnightly
ARG NVTAB_VER=vnightly
ARG HUGECTR_VER=vnightly
ARG HWLOC_VER=2.4.1
SHELL ["/bin/bash", "-c"]
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/lib:/repos/dist/lib
ENV DEBIAN_FRONTEND=noninteractive
ENV CUDA_HOME=/usr/local/cuda
ENV CUDA_PATH=$CUDA_HOME
ENV CUDA_CUDA_LIBRARY=${CUDA_HOME}/lib64/stubs
ENV PATH=${CUDA_HOME}/lib64/:${PATH}:${CUDA_HOME}/bin
ENV PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION='python'
# Build env variables for rmm
ENV INSTALL_PREFIX=/usr
RUN apt update -y --fix-missing && \
apt upgrade -y && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
vim gdb git wget unzip tar \
zlib1g-dev lsb-release clang-format \
libboost-serialization-dev \
openssl curl zip\
libssl-dev \
protobuf-compiler \
libtbb-dev \
numactl \
libspdlog-dev \
libnuma-dev \
libaio-dev \
libibverbs-dev \
slapd && \
apt install -y --no-install-recommends software-properties-common && \
add-apt-repository -y ppa:deadsnakes/ppa && \
apt update -y --fix-missing
RUN pip install git+git://github.com/gevent/[email protected]#egg=gevent
RUN apt remove --purge cmake -y && wget http://www.cmake.org/files/v3.21/cmake-3.21.1.tar.gz && \
tar xf cmake-3.21.1.tar.gz && cd cmake-3.21.1 && ./configure && make && make install
# Install arrow from source
ENV ARROW_HOME=/usr/local
RUN git clone --branch apache-arrow-5.0.0 --recurse-submodules https://github.com/apache/arrow.git build-env && \
pushd build-env && \
export PARQUET_TEST_DATA="${PWD}/cpp/submodules/parquet-testing/data" && \
export ARROW_TEST_DATA="${PWD}/testing/data" && \
pip install -r python/requirements-build.txt && \
mkdir cpp/release && \
pushd cpp/release && \
cmake -DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \
-DCMAKE_INSTALL_LIBDIR=lib \
-DCMAKE_LIBRARY_PATH=${CUDA_CUDA_LIBRARY} \
-DARROW_FLIGHT=ON \
-DARROW_GANDIVA=OFF \
-DARROW_ORC=ON \
-DARROW_WITH_BZ2=ON \
-DARROW_WITH_ZLIB=ON \
-DARROW_WITH_ZSTD=ON \
-DARROW_WITH_LZ4=ON \
-DARROW_WITH_SNAPPY=ON \
-DARROW_WITH_BROTLI=ON \
-DARROW_PARQUET=ON \
-DARROW_PYTHON=ON \
-DARROW_PLASMA=ON \
-DARROW_BUILD_TESTS=ON \
-DARROW_CUDA=ON \
-DARROW_DATASET=ON \
.. && \
make -j$(nproc) && \
make install && \
popd && \
pushd python && \
export PYARROW_WITH_PARQUET=ON && \
export PYARROW_WITH_CUDA=ON && \
export PYARROW_WITH_ORC=ON && \
export PYARROW_WITH_DATASET=ON && \
python setup.py build_ext --build-type=release bdist_wheel && \
pip install dist/*.whl && \
popd && \
popd && \
rm -rf build-env
FROM phase1 AS phase2
ARG RELEASE=false
ARG RMM_VER=vnightly
ARG CUDF_VER=vnightly
# Install rmm from source
RUN git clone https://github.com/rapidsai/rmm.git build-env && cd build-env/ && \
if [ "$RELEASE" == "true" ] && [ ${RMM_VER} != "vnightly" ] ; then git fetch --all --tags && git checkout tags/${RMM_VER}; else git checkout branch-21.10; fi; \
sed -i '/11.2/ a "11.4": "11.x",' python/setup.py && \
cd ..; \
pushd build-env && \
./build.sh librmm && \
pip install python/. && \
popd && \
rm -rf build-env
# Build env for CUDF build
RUN git clone https://github.com/rapidsai/cudf.git build-env && cd build-env/ && \
if [ "$RELEASE" == "true" ] && [ ${CUDF_VER} != "vnightly" ] ; then git fetch --all --tags && git checkout tags/${CUDF_VER}; else git checkout branch-21.12; fi; \
git submodule update --init --recursive && \
cd .. && \
pushd build-env && \
export CUDF_HOME=${PWD} && \
export CUDF_ROOT=${PWD}/cpp/build/ && \
export CMAKE_LIBRARY_PATH=${CUDA_CUDA_LIBRARY} && \
./build.sh libcudf cudf dask_cudf --allgpuarch --cmake-args=\"-DCUDF_ENABLE_ARROW_S3=OFF\" && \
protoc -I=python/cudf/cudf/utils/metadata --python_out=/usr/local/lib/python3.8/dist-packages/cudf/utils/metadata python/cudf/cudf/utils/metadata/orc_column_statistics.proto && \
popd && \
rm -rf build-env
FROM phase2 AS phase3
ARG RELEASE=false
ARG NVTAB_VER=vnightly
# Install multiple packages
RUN pip install nvtx pandas cupy-cuda114 cachetools typing_extensions fastavro
RUN pip install pynvml pytest graphviz scikit-learn==0.24.2 scipy matplotlib tqdm pydot nvidia-pyindex
RUN pip install tritonclient[all] grpcio-channelz
RUN pip install pybind11 jupyterlab onnx onnxruntime
ENV PATH=$PATH:/usr/lib/x86_64-linux-gnu/ \
NCCL_LAUNCH_MODE=PARALLEL
RUN git clone https://github.com/rapidsai/asvdb.git /repos/asvdb && cd /repos/asvdb && python setup.py install
# Install NVTabular
RUN git clone https://github.com/NVIDIA-Merlin/NVTabular.git /nvtabular/ && \
cd /nvtabular/; if [ "$RELEASE" == "true" ] && [ ${NVTAB_VER} != "vnightly" ] ; then git fetch --all --tags && git checkout tags/${NVTAB_VER}; else git checkout main; fi; \
python setup.py develop;
RUN pip install dask==2021.09.1 distributed==2021.09.1 dask[dataframe]==2021.09.1 dask-cuda
# this will help with install that requires libcuda.so
ENV CUDF_HOME=/usr/include
ENV CUDF_ROOT=/usr/include/libcudf
ENV CONDA_PREFIX=/usr
RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1
# link sub modules expected by hugectr cmake
RUN ln -s /usr/lib/libcudf.so /usr/lib/libcudf_base.so
RUN ln -s /usr/lib/libcudf.so /usr/lib/libcudf_io.so
RUN ln -s /usr/lib/x86_64-linux-gnu/libibverbs.so.1 /usr/lib/x86_64-linux-gnu/libibverbs.so
# Install CUDA-Aware hwloc from source
ARG HWLOC_VER=2.4.1
RUN cd /opt/hpcx/ompi/include/openmpi/opal/mca/hwloc/hwloc201 && rm -rfv hwloc201.h hwloc/include/hwloc.h
RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://download.open-mpi.org/release/hwloc/v2.4/hwloc-${HWLOC_VER}.tar.gz && \
mkdir -p /var/tmp && tar -x -f /var/tmp/hwloc-${HWLOC_VER}.tar.gz -C /var/tmp && \
cd /var/tmp/hwloc-${HWLOC_VER} && \
./configure CPPFLAGS="-I/usr/local/cuda/include/ -L/usr/local/cuda/lib64/" LDFLAGS="-L/usr/local/cuda/lib64" --enable-cuda && \
make -j$(nproc) && make install && \
rm -rf /var/tmp/hwloc-${HWLOC_VER} /var/tmp/hwloc-${HWLOC_VER}.tar.gz
#Hiredis
RUN mkdir -p /var/tmp && cd /var/tmp && git clone --depth=1 https://github.com/redis/hiredis.git && cd - && \
cd /var/tmp/hiredis && \
mkdir build && cd build && \
cmake .. && \
make -j$(nproc) && make install && \
rm -rf /var/tmp/hiredis
#RocksDB
RUN mkdir -p /var/tmp && cd /var/tmp && git clone --depth=1 https://github.com/facebook/rocksdb.git && cd - && \
cd /var/tmp/rocksdb && \
make -j$(nproc) shared_lib && \
make install-shared && \
rm -rf /var/tmp/rocksdb
ENV CPATH=/usr/local/include:$CPATH
FROM phase3 AS phase4
ARG RELEASE=false
ARG HUGECTR_VER=vnightly
RUN pip3 install --no-cache-dir mpi4py ortools sklearn onnx onnxruntime
ENV OMPI_MCA_plm_rsh_agent=sh
ENV OMPI_MCA_opal_cuda_support=true
ENV NCCL_LAUNCH_MODE=PARALLEL
ENV NCCL_COLLNET_ENABLE=0
ENV SHARP_COLL_NUM_COLL_GROUP_RESOURCE_ALLOC_THRESHOLD=0
ENV SHARP_COLL_LOCK_ON_COMM_INIT=1
ENV SHARP_COLL_LOG_LEVEL=3
ENV HCOLL_ENABLE_MCAST=0
# Install hugectr
RUN mkdir -p /var/tmp && cd /var/tmp && git clone https://github.com/NVIDIA-Merlin/HugeCTR.git HugeCTR && cd - && \
cd /var/tmp/HugeCTR && if [ "$RELEASE" == "true" ] && [ ${HUGECTR_VER} != "vnightly" ]; then git fetch --all --tags && git checkout tags/${HUGECTR_VER}; else git checkout master; fi && \
git submodule update --init --recursive
RUN cd /var/tmp/HugeCTR && \
mkdir build && cd build && \
LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs/:$LD_LIBRARY_PATH && \
export PATH=$PATH:/usr/local/cuda-${CUDA_SHORT_VERSION}/compat/ && \
cmake -DCMAKE_CXX_COMPILER=/usr/bin/g++ -DCMAKE_C_COMPILER=/usr/bin/gcc -DCMAKE_BUILD_TYPE=Release -DSM="70;75;80" \
-DENABLE_MULTINODES=ON .. && \
make -j$(nproc) && make install && \
chmod +x /usr/local/hugectr/bin/* && \
chmod +x /usr/local/hugectr/lib/* && \
cd /var/tmp/HugeCTR/onnx_converter && \
python3 setup.py install && \
rm -rf /var/tmp/HugeCTR
ENV PATH=/usr/local/hugectr/bin:$PATH
ENV LIBRARY_PATH=/usr/local/hugectr/lib:$LIBRARY_PATH
ENV LD_LIBRARY_PATH=/usr/local/hugectr/lib:$LD_LIBRARY_PATH
ENV PYTHONPATH=/usr/local/hugectr/lib:$PYTHONPATH
ENV PYTHONPATH=/hugectr/onnx_converter:$PYTHONPATH
# remove fake lib
RUN rm /usr/local/cuda/lib64/stubs/libcuda.so.1
RUN rm -rf /repos
RUN pip install numba numpy --upgrade
RUN echo $(du -h --max-depth=1 /)
HEALTHCHECK NONE
ENTRYPOINT []
CMD ["/bin/bash"]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment