Skip to content

Instantly share code, notes, and snippets.

@jglaser
Last active January 11, 2021 16:29
Show Gist options
  • Save jglaser/2c4f25e182213a36d5c0fc9265baca8e to your computer and use it in GitHub Desktop.
Save jglaser/2c4f25e182213a36d5c0fc9265baca8e to your computer and use it in GitHub Desktop.
Build RAPIDS + BlazingSQL on Summit
# Build BlazingSQL + RAPIDS on Summit
# Jens Glaser <[email protected]> July 28 2020
# wherever bif128 occurs below, this refers to the project ID
# replace with yours, e.g. abc123
# est. "pure compile time" 3-4h
$ module list
Currently Loaded Modules:
1) hsi/5.0.2.p5 4) darshan-runtime/3.1.7 7) cuda/10.1.243 10) spectrum-mpi/10.3.1.2-20200121
2) xalt/1.2.0 5) DefApps 8) gcc/7.4.0 11) boost/1.66.0
3) lsf-tools/2.0 6) python/3.7.0 9) cmake/3.17.3
echo export CC=\$OLCF_GCC_ROOT/bin/gcc >> ~/.bashrc
echo export CXX=\$OLCF_GCC_ROOT/bin/g++ >> ~/.bashrc
source ~/.bashrc
#
# general python environment setup
#
python -m venv $WORLDWORK/bif128/rapids-env
# add/remove environment to/from LD_LIBRAY_PATH whenever it is activated/deactivated
# create a file activate.patch in the home directory directory with this content
cd
----- BEGIN activate.patch
--- activate.old 2020-07-11 16:38:24.436411000 -0400
+++ activate 2020-07-11 16:39:30.320331000 -0400
@@ -2,6 +2,12 @@
# you cannot run it directly
deactivate () {
+ # https://stackoverflow.com/questions/22771204/virtualenv-that-can-find-relocated-libraires-like-mysqlclient-lib-for-mysqldb
+ if ! [ -z ${_OLD_LD_LIBRARY_PATH+x} ] ; then
+ LD_LIBRARY_PATH="$_OLD_LD_LIBRARY_PATH"
+ export LD_LIBRARY_PATH
+ unset _OLD_LD_LIBRARY_PATH
+ fi
# reset old environment variables
if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
PATH="${_OLD_VIRTUAL_PATH:-}"
@@ -44,6 +50,10 @@
PATH="$VIRTUAL_ENV/bin:$PATH"
export PATH
+_OLD_LD_LIBRARY_PATH="$LD_LIBRARY_PATH"
+LD_LIBRARY_PATH="$VIRTUAL_ENV/lib:$VIRTUAL_ENV/lib64:$LD_LIBRARY_PATH"
+export LD_LIBRARY_PATH
+
# unset PYTHONHOME if set
# this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
# could use `if (set -u; : $PYTHONHOME) ;` in bash
----- END activate.patch
cd $WORLDWORK/bif128/rapids-env/bin
patch -p0 < ~/activate.patch
echo alias activate_rapids=". $WORLDWORK/bif128/rapids-env/bin/activate" >> ~/.bashrc
source ~/.bashrc
activate_rapids
# directory to hold sources
cd $MEMBERWORK/bif128
mkdir rapids
cd rapids
export RAPIDS_SRC=$PWD
export CMAKE_PREFIX_PATH=$VIRTUAL_ENV
#
# 1. build google-cloud-cpp
#
# google abseil
cd $RAPIDS_SRC
abseil https://github.com/abseil/abseil-cpp
cd abseil-cpp
export ABSEIL_SRC=$PWD
mkdir -p /tmp/$USER/abseil-build
cd /tmp/$USER/abseil-build
cmake -DBUILD_SHARED_LIBS=ON -D CMAKE_INSTALL_PREFIX=$VIRTUAL_ENV $ABSEIL_SRC
# google test
cd $RAPIDS_SRC
git clone https://github.com/google/googletest.git
cd googletest
git checkout release-1.8.0 # for compatibility with internal gtest in cudf and Apache ORC
export GTEST_SRC=$PWD
mkdir -p /tmp/$USER/gtest-build
cd /tmp/$USER/gtest-build
cmake -DBUILD_SHARED_LIBS=ON -D CMAKE_INSTALL_PREFIX=$VIRTUAL_ENV $GTEST_SRC
make -j 8 install
# google protobuf (comes with grpc)
module load zlib
cd $RAPIDS_SRC
git clone https://github.com/protocolbuffers/protobuf.git
git checkout v3.12.2.0 # grpc master (internal thirdparty), (arrow 1.7.1)
cd protobuf
git submodule update --init --recursive
export PROTOBUF_SRC=$PWD
mkdir -p /tmp/$USER/protobuf-build
cd /tmp/$USER/protobuf-build
cmake -D protobuf_BUILD_TESTS=OFF -D CMAKE_INSTALL_PREFIX=$VIRTUAL_ENV -D CMAKE_BUILD_TYPE=Release $PROTOBUF_SRC/cmake
make -j 8 install
# c-ares
cd $RAPIDS_SRC
git clone https://github.com/c-ares/c-ares
cd c-ares
./buildconf
./configure --prefix=$VIRTUAL_ENV
make -j 8 install
# re2
cd $RAPIDS_SRC
git clone https://github.com/google/re2.git
cd re2
export RE2_SRC=$PWD
mkdir /tmp/$USER/re2-build
cd /tmp/$USER/re2-build
cmake -D CMAKE_INSTALL_PREFIX=$VIRTUAL_ENV -DCMAKE_BUILD_TYPE=RELEASE $RE2_SRC
make -j8 install
# google grpc++
module unload zlib
cd $RAPIDS_SRC
git clone https://github.com/grpc/grpc
cd grpc
git submodule update --init --recursive
export GRPC_SRC=$PWD
cd third-party/abseil-cpp
# see https://github.com/abseil/abseil-cpp/pull/739
git remote add ppc_fix https://github.com/jglaser/abseil-cpp.git
git fetch ppc_fix fix_ppc_build
git checkout fix_ppc_build
mkdir -p /tmp/$USER/grpc-build
cd /tmp/$USER/grpc-build
cmake -DBUILD_SHARED_LIBS=ON -D CMAKE_INSTALL_PREFIX=$VIRTUAL_ENV -DCMAKE_BUILD_TYPE=Release $GRPC_SRC
make -j8 install
# google crc32c
cd $RAPIDS_SRC
git clone https://github.com/google/crc32c.git
cd crc32c
git submodule update --init --recursive
export CRC32C_SRC=$PWD
mkdir -p /tmp/$USER/crc32-build
cd /tmp/$USER/crc32-build
cmake -DCRC32_BUILD_BENCHMARKS=OFF -DCRC32C_BUILD_TESTS=OFF -DCRC32C_USE_GLOG=OFF -DCMAKE_INSTALL_PREFIX=$VIRTUAL_ENV -D CMAKE_BUILD_TYPE=Release $CRC32C_SRC
make -j 8 install
# google-cloud-cpp
cd $RAPIDS_SRC
git clone https://github.com/googleapis/google-cloud-cpp
cd google-cloud-cpp
# don't know why this is necessary, probably the openssl cmake config on Summit is not correct
echo "diff --git a/google/cloud/storage/CMakeLists.txt b/google/cloud/storage/CMakeLists.txt
index 690c292..435f308 100644
--- a/google/cloud/storage/CMakeLists.txt
+++ b/google/cloud/storage/CMakeLists.txt
@@ -232,6 +232,7 @@ target_link_libraries(
Threads::Threads
OpenSSL::SSL
OpenSSL::Crypto
+ crypto
ZLIB::ZLIB)
google_cloud_cpp_add_common_options(storage_client)" | patch
export GCLOUDCPP_SRC=$PWD
mkdir -p /tmp/$USER/google-cloud-cpp-build
cd /tmp/$USER/google-cloud-cpp-build
cmake -DBUILD_SHARED_LIBS=ON -DBUILD_TESTING=OFF -D CMAKE_INSTALL_PREFIX=$VIRTUAL_ENV $GCLOUDCPP_SRC
make -j 16 install
#
# 2. build AWS SDK C++
#
cd $RAPIDS_SRC
git clone https://github.com/aws/aws-sdk-cpp.git
cd aws-sdk-cpp
export AWS_SRC_DIR=$PWD
mkdir -p $MEMBERWORK/bif128/aws-build
cd $MEMBERWORK/bif128/aws-build
cmake -DENABLE_TESTING=OFF -D CMAKE_INSTALL_PREFIX=$VIRTUAL_ENV $AWS_SRC_DIR
make -j4 install
#
# Apache arrow & pyarrow
#
# cython
cd $RAPIDS_SRC
git clone https://github.com/cython/cython.git
cd cython
python setup.py install
# rapidjson
cd $RAPIDS_SRC
git clone https://github.com/Tencent/rapidjson.git
cd rapidjson
git submodule update --init
export RJSON_SRC=$PWD
mkdir -p /tmp/$USER/rapid-json-build
cd /tmp/$USER/rapid-json-build
cmake -D CMAKE_BUILD_TYPE=Release -D RAPIDJSON_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX=$VIRTUAL_ENV $RJSON_SRC
make -j 16 install
# utf8proc
cd $RAPIDS_SRC
git clone https://github.com/JuliaStrings/utf8proc.git
cd utf8proc
export UTF8PROC_SRC=$PWD
mkdir -p /tmp/$USER/utf8proc-build
cd /tmp/$USER/utf8proc-build
cmake -D CMAKE_BUILD_TYPE=RELEASE -D CMAKE_INSTALL_PREFIX=$VIRTUAL_ENV $UTF8PROC_SRC
make -j 8 install
# orc
cd $RAPIDS_SRC
git clone https://github.com/apache/orc
cd orc
# vi cmake_modules/ThirdpartyToolchain.cmake, change protobuf version to 3.12.2
export ORC_SRC=$PWD
mkdir -p /tmp/$USER/orc-build
cd /tmp/$USER/orc-build
cmake -D BUILD_SHARED_LIBS=ON -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_PREFIX=$VIRTUAL_ENV -D BUILD_CPP_TESTS=OFF -D BUILD_JAVA=OFF $ORC_SRC
make -j16 install
## if it errors in snappy test, modify snappy_ep-prefix/src/snappy_ep-build/CMakeCache.txt -> SNAPPY_BUILD_TESTS:BOOL=OFF
# google snappy
cd $RAPIDS_SRC
git clone -b 1.1.7 https://github.com/google/snappy.git # Apache ORC compatibility
cd snappy
export SNAPPY_SRC=$PWD
mkdir -p /tmp/$USER/snappy-build
cd /tmp/$USER/snappy-build
cmake -DBUILD_SHARED_LIBS=ON -DSNAPPY_BUILD_TESTS=OFF -D CMAKE_INSTALL_PREFIX=$VIRTUAL_ENV $SNAPPY_SRC
make -j 8 install
# thrift C++ library
module load boost/1.66.0
cd $RAPIDS_SRC
git clone https://github.com/apache/thrift.git
cd thrift
./bootstrap.sh
export PY_PREFIX=$VIRTUAL_ENV
./configure --prefix=$VIRTUAL_ENV --enable-libs --with-cpp=yes --with-haskell=no
make -j 16 install
# lz4
cd $RAPIDS_SRC
git clone https://github.com/lz4/lz4.git
cd lz4
make
PREFIX=$VIRTUAL_ENV make install
# facebook zstd
cd $RAPIDS_SRC
git clone https://github.com/facebook/zstd.git
cd zstd
export ZSTD_SRC=$PWD
mkdir -p /tmp/$USER/zstd-build
cd /tmp/$USER/zstd-build
cmake -D CMAKE_INSTALL_PREFIX=$VIRTUAL_ENV $ZSTD_SRC/build/cmake
make -j 8 install
# numpy
pip install --no-binary numpy numpy
cd $RAPIDS_SRC
# git clone https://github.com/apache/arrow
# see https://github.com/apache/arrow/pull/7711 (merged)
# need 0.17.1 because cudf and blazingsql depend on this specific version
git clone -b fix_ppc_release https://github.com/jglaser/arrow # patched release 0.17.1
cd arrow
cd cpp
export ARROW_SRC=$PWD
mkdir -p /tmp/$USER/arrow_build
cd /tmp/$USER/arrow_build
module load boost/1.66.0
cmake -DARROW_PARQUET=ON -DARROW_ORC=ON -D ARROW_PYTHON=ON -DARROW_DATASET=ON -DARROW_CUDA=ON -D ARROW_IPC=ON -D CMAKE_INSTALL_PREFIX=$VIRTUAL_ENV -DLZ4_ROOT=$VIRTUAL_ENV -D ARROW_BUILD_SHARED=ON $ARROW_SRC
make -j16 install
cd $RAPIDS_SRC/arrow/python
export PYARROW_WITH_PARQUET=1
export PYARROW_WITH_ORC=1
export PYARROW_WITH_CUDA=1
export PYARROW_WITH_DATASET=1
python setup.py install
cd ..
python -c "import pyarrow" # test
# spdlog
cd $RAPIDS_SRC
git clone https://github.com/gabime/spdlog.git
cd spdlog
export SPDLOG_SRC=$PWD
mkdir -p /tmp/$USER/spdlog-build
cd /tmp/$USER/spdlog-build
cmake -DCMAKE_INSTALL_PREFIX=$VIRTUAL_ENV $SPDLOG_SRC
make -j 8 install
#
# Rapids Memory Management (rmm)
#
cd $RAPIDS_SRC
git clone https://github.com/rapidsai/rmm.git
cd rmm
export RMM_SRC=$PWD
mkdir -p /tmp/$USER/rmm-build
cd /tmp/$USER/rmm-build
cmake -D CMAKE_INSTALL_PREFIX=$VIRTUAL_ENV $RMM_SRC
make -j 8 install
cd $RAPIDS_SRC/rmm/python
python setup.py install
#
# Rapids DLPack (Deep Learning)
#
module load boost/1.66.0
cd $RAPIDS_SRC
git clone https://github.com/rapidsai/dlpack.git
cd dlpack
export DLPACK_SRC=$PWD
mkdir -p /tmp/$USER/dlpack-build
cd /tmp/$USER/dlpack-build
cmake -DCMAKE_INSTALL_PREFIX=$VIRTUAL_ENV $DLPACK_SRC
make -j16 install
#
# cudf
#
# pandas
cd $RAPIDS_SRC
git clone https://github.com/pandas-dev/pandas.git
cd pandas
python setup.py install
# cupy python dependency
cd $RAPIDS_SRC
git clone --recurse-submodules https://github.com/cupy/cupy.git
cd cupy
CUPY_NVCC_GENERATE_CODE="arch=compute_70,code=sm_70" python setup.py clean install
# ignore the cudnn/cutensor/nccl errors, they are non-fatal
# gold & llvm are for getting llvmlite up and running
# install gold linker for LLVM -lto
module load texinfo
cd $RAPIDS_SRC
git clone --depth 1 git://sourceware.org/git/binutils-gdb.git binutils
cd binutils
./configure --prefix=$VIRTUAL_ENV --enable-gold --enable-plugins --disable-werror
make all-gold -j16
cp gold/ld-new $VIRTUAL_ENV/bin/ld
make -j16
cp binutils/ar $VIRTUAL_ENV/bin/ar
cp binutils/nm-new $VIRTUAL_ENV/bin/nm
# compile patched LLVM 9 (static libs) using gold linker (LLVMgold.so req'd by llvmlite 0.33)
# https://llvm.org/docs/GoldPlugin.html (see there for a simple test case)
# not sure if all of this patching and linker setup is REALLY necessary
cd $RAPIDS_SRC
wget https://raw.githubusercontent.com/numba/llvmlite/master/conda-recipes/0001-Revert-Limit-size-of-non-GlobalValue-name.patch
git clone https://github.com/llvm/llvm-project
cd llvm-project
git checkout release/9.x
export LLVM_SRC=$PWD
cd llvm
patch -p1 < ../../0001-Revert-Limit-size-of-non-GlobalValue-name.patch
mkdir -p $MEMBERWORK/bif128/llvm-build
cd $MEMBERWORK/bif128/llvm-build
cmake -D CMAKE_INSTALL_PREFIX=$VIRTUAL_ENV -DLLVM_ENABLE_PROJECTS=clang -DCMAKE_BUILD_TYPE=Release -DLLVM_TARGETS_TO_BUILD="PowerPC" -D LLVM_BINUTILS_INCDIR=$RAPIDS_SRC/binutils/include $LLVM_SRC/llvm
make -j 16 install
# bsub -W 2:00 -P BIF128 -nnodes 1 -Is $SHELL
# jsrun -n 1 -c 42 -b none make -j 140 install
# install LLVMgold.so as plugin to 'ar'
mkdir -p $VIRTUAL_ENV/lib/bfd-plugins
cp $VIRTUAL_ENV/lib/LLVMgold.so $VIRTUAL_ENV/lib/bfd-plugins
# llvmlite python-depency (via numba)
pip uninstall llvmlite
cd $RAPIDS_SRC
git clone https://github.com/numba/llvmlite.git
cd llvmlite
# https://lists.llvm.org/pipermail/llvm-dev/2019-November/137322.html
python setup.py install
cd ..
python -c "import numba" # test
# numba
pip uninstall numba
cd $RAPIDS_SRC
git clone https://github.com/numba/numba.git
cd numba
python setup.py install
# restore default linker
cd $VIRTUAL_ENV/bin
mv ar ar-new
mv nm nm-new
mv ld ld.gold
# python snappy
# fsspec
cd $RAPIDS_SRC
pip install --no-binary fsspec fsspec
# snappy bindings
cd $RAPIDS_SRC
git clone https://github.com/andrix/python-snappy.git
cd python-snappy
CFLAGS=-L$VIRTUAL_ENV/lib python setup.py install
cd $RAPIDS_SRC
CUDF_HOME=$(pwd)/cudf
git clone https://github.com/rapidsai/cudf.git $CUDF_HOME
cd $CUDF_HOME
git submodule update --init --remote --recursive
mkdir -p $MEMBERWORK/bif128/cudf-build
cd $MEMBERWORK/bif128/cudf-build
module load boost/1.66.0
cmake -D GPU_ARCHS=70 -D CMAKE_CUDA_ARCHITECTURES=70 -DBUILD_TESTS=ON -DCMAKE_INSTALL_PREFIX=$VIRTUAL_ENV -DCMAKE_CXX11_ABI=ON ../cpp
make -j 2 install
# bsub -W 2:00 -P BIF128 -nnodes 1 -q batch-hm -Is $SHELL
# jsrun -n 1 -c 42 -b none make -j 140 install
# freestanding STL (check out source-only)
cd $RAPIDS_SRC
git clone --recurse-submodules https://github.com/rapidsai/thirdparty-freestanding.git
# cudf python packages
cd $RAPIDS_SRC/cudf/python/cudf
PARALLEL_LEVEL=16 CFLAGS=-I$RAPIDS_SRC/thirdparty-freestanding/include python setup.py install
cd ..
python -c "import cudf" # test
cd $RAPIDS_SRC/cudf/python/dask_cudf
python setup.py install
# zmq
cd $RAPIDS_SRC
git clone https://github.com/zeromq/libzmq.git
cd libzmq
export ZMQ_SRC=$PWD
mkdir -p /tmp/$USER/zmq-build
cd /tmp/$USER/zmq-build
cmake -D CMAKE_INSTALL_PREFIX=$VIRTUAL_ENV -D ZMQ_BUILD_TESTS=OFF $ZMQ_SRC
make -j16 install
# cppzmq
cd $RAPIDS_SRC
git clone https://github.com/zeromq/cppzmq
cd cppzmq
export CPPZMQ_SRC=$PWD
mkdir -p /tmp/$USER/cppzmq-build
cd /tmp/$USER/cppzmq-build
cmake -D CMAKE_INSTALL_PREFIX=$VIRTUAL_ENV $CPPZMQ_SRC
make -j 8 install
#
# JDK / maven
#
# download IBM Java 8 SDK
# go to https://developer.ibm.com/javasdk/downloads/sdk8/
# under "Linux on Power Systems 64-bit LE", select "Simple unzip with license (InstallAnywhere root not required)"
# accept license and copy download link
cd $RAPIDS_SRC
wget http://public.dhe.ibm.com/ibmdl/export/pub/systems/cloud/runtimes/java/8.0.6.11/linux/ppc64le/ibm-java-sdk-8.0-6.11-ppc64le-archive.bin
# installation location should be $WORLDWORK/bif128/rapids-env/ibm-java-ppc64le-80
echo $WORLDWORK/bif128/rapids-env/ibm-java-ppc64le-80
./ibm-java-sdk-8.0-6.11-ppc64le-archive.bin
# confirm prompts
echo export PATH=$WORLDWORK/bif128/rapids-env/ibm-java-ppc64le-80/bin:\$PATH >> ~/.bashrc
echo export JAVA_HOME=$WORLDWORK/bif128/rapids-env/ibm-java-ppc64le-80 >> ~/.bashrc
# ... add
source ~/.bashrc
# Apache maven (binary)
cd $RAPIDS_SRC
wget https://downloads.apache.org/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz
tar xvfz apache-maven-3.6.3-bin.tar.gz
mv apache-maven-3.6.3 $VIRTUAL_ENV/
echo export PATH=$WORLDWORK/bif128/envs/rapids/apache-maven-3.6.3/bin:\$PATH >> ~/.bashrc
source ~/.bashrc
#
# blazingSQL
# have to circumvent the build.sh script, as it expects thirdparty libraries in a special location
#
cd $RAPIDS_SRC
git clone https://github.com/BlazingDB/blazingsql
# io component
cd $RAPIDS_SRC
cd blazingsql/io
mkdir -p build
cd build
CONDA_PREFIX=$VIRTUAL_ENV cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_PREFIX=$VIRTUAL_ENV -D BUILD_TESTING=OFF ..
make -j 16 install
# comms component
cd $RAPIDS_SRC/blazingsql/comms
mkdir -p build
cd build
# need to point to cudart.so
CONDA_PREFIX=$VIRTUAL_ENV cmake -DCMAKE_CXX_STANDARD_LIBRARIES="-L${OLCF_CUDA_ROOT}/lib64" -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_PREFIX=$VIRTUAL_ENV ..
make -j 8 install
# bsql engine
module load boost/1.66.0
cd $RAPIDS_SRC/blazingsql/engine
mkdir -p build
cd build
CXXFLAGS="-I${CUDF_HOME}/cpp -L${OLCF_CUDA_ROOT}/lib64 -L$VIRTUAL_ENV/lib" CONDA_PREFIX=$VIRTUAL_ENV cmake -D CMAKE_CXX11_ABI=ON -DBUILD_TESTING=OFF -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_PREFIX=$VIRTUAL_ENV ..
make -j 16 install
# python engine
cd $RAPIDS_SRC/blazingsql/engine
CONDA_PREFIX=$VIRTUAL_ENV python setup.py install
# pyblazing
cd $RAPIDS_SRC/blazingsql/pyblazing
python setup.py install
# algebra
cd $RAPIDS_SRC/blazingsql/algebra
mvn clean install -Dmaven.test.skip=true -f pom.xml -Dmaven.repo.local=$VIRTUAL_ENV/blazing-protocol-mvn/
cp blazingdb-calcite-application/target/BlazingCalcite.jar $VIRTUAL_ENV/lib/blazingsql-algebra.jar
cp blazingdb-calcite-core/target/blazingdb-calcite-core.jar $VIRTUAL_ENV/lib/blazingsql-algebra-core.jar
# pyHIVE (SQL)
pip install pyhive
# jpype (java)
cd $RAPIDS_SRC
git clone https://github.com/jpype-project/jpype.git
cd jpype
python setup.py install
ln -s $VIRTUAL_ENV/ibm-java-ppc64le-80/jre/lib/ppc64le/compressedrefs/ $VIRTUAL_ENV/lib/server
# netifaces
cd $RAPIDS_SRC
git clone https://github.com/al45tair/netifaces.git
cd netifaces
python setup.py install
# would be great if CONDA_PREFIX was not hardcoded
export CONDA_PREFIX=$VIRTUAL_ENV
# need to patch pyblazing
cd $RAPIDS_SRC/blazingsql
diff --git a/pyblazing/pyblazing/apiv2/context.py b/pyblazing/pyblazing/apiv2/context.py
index 1004ce9..727f256 100644
--- a/pyblazing/pyblazing/apiv2/context.py
+++ b/pyblazing/pyblazing/apiv2/context.py
@@ -59,7 +59,8 @@ if not os.path.isfile(jvm_path):
# (for newer java versions e.g. 11.x)
jvm_path = os.environ["CONDA_PREFIX"] + "/lib/server/libjvm.so"
-jpype.startJVM("-ea", convertStrings=False, jvmpath=jvm_path)
+#jpype.startJVM("-ea", convertStrings=False, jvmpath=jvm_path)
+jpype.startJVM()
ArrayClass = jpype.JClass("java.util.ArrayList")
ColumnTypeClass = jpype.JClass(
# (apply the above patch)
cd pyblazing
python setup.py install
# test installation
python -c "from blazingsql import BlazingContext"
# nccl 2.7.6
# download local installer from https://developer.nvidia.com/nccl
# (have to register and answer questionnaire)
# Power "O/S agnostic local installer"
cd $RAPIDS_SRC
tar xvfk nccl_2.7.6-1+cuda10.1_ppc64le.txz
cp -r nccl_2.7.6-1+cuda10.1_ppc64le/lib/* $VIRTUAL_ENV/lib/
cp -r nccl_2.7.6-1+cuda10.1_ppc64le/include/* $VIRTUAL_ENV/include
# rapidjson
#cd $RAPIDS_SRC
#git clone https://github.com/Tencent/rapidjson
#cd rapidjson
# treelite
cd $RAPIDS_SRC
git clone https://github.com/dmlc/treelite
cd treelite
export TREELITE_SRC=$PWD
mkdir -p /tmp/$USER/treelite-build
cd /tmp/$USER/treelite-build
CXXFLAGS="-I$VIRTUAL_ENV/include -L$VIRTUAL_ENV/lib" cmake -D CMAKE_INSTALL_PREFIX=$VIRTUAL_ENV $TREELIE_SRC
make -j 32 install
# python treelite
cd $RAPIDS_SRC/treelite
cd python
python setup.py clean install
#joblib
pip install --no-binary joblib joblib
# scikit-learn
git clone https://github.com/scikit-learn/scikit-learn
cd scikit-learn
python setup.py install
# swig
cd $RAPIDS_SRC
git clone https://github.com/swig/swig.git
cd swig
./autogen.sh
./configure --with-python3 --prefix=$VIRTUAL_ENV
make -j16
make -j16 install
# facebook FAISS
module load openblas
cd $RAPIDS_SRC
git clone https://github.com/facebookresearch/faiss.git
cd faiss
#git checkout v1.6.3 # for cuml 0.16 compatibility
git checkout a93a4b39571db0ab6ad0b4ef42a6b8734ca05135
./configure --with-cuda=$OLCF_CUDA_ROOT --with-python=`which python` --prefix=$VIRTUAL_ENV --with-cuda-arch=-gencode="arch=compute_70,code=sm_70"
CXXFLAGS=-fPIC make -j 16
make -j 16 install
#make -C python
# cumlprims (only needed with multi-GPU build of cuml!)
cd $RAPIDS_SRC
wget https://public.dhe.ibm.com/ibmdl/export/pub/software/server/ibm-ai/conda-early-access/linux-ppc64le/libcumlprims-0.14.0a-640.gf76300c.tar.bz2
mkdir tmp
cd tmp
tar xvfk ../libcumlprims-0.14.0a-640.gf76300c.tar.bz2
cp -r include/* $VIRTUAL_ENV/include
cp -r lib/* $VIRTUA_ENV/lib
# doxygen
git clone https://github.com/doxygen/doxygen
cd doxygen
mkdir build
cd build
cmake -D CMAKE_INSTALL_PREFIX=$VIRTUAL_ENV ..
make -j16 install
# cuml (single GPU build)
module load openblas
module load ucx
cd $RAPIDS_SRC
git clone https://github.com/rapidsai/cuml.git
cd cuml
cd build
cmake -DCMAKE_INSTALL_PREFIX=$VIRTUAL_ENV \
-DCMAKE_CXX11_ABI=ON \
-DBLAS_LIBRARIES=$OLCF_OPENBLAS_ROOT/lib/libopenblas.so.0 \
-DGPU_ARCHS=70 \
-DCMAKE_CUDA_ARCHITECTURES=70 \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_CUML_C_LIBRARY=ON \
-DSINGLEGPU=ON \
-DWITH_UCX=ON \
-DBUILD_CUML_MPI_COMMS=OFF \
-DBUILD_CUML_MG_TESTS=OFF \
-DBUILD_STATIC_FAISS=OFF \
-DNVTX=OFF \
-DBUILD_CUML_TESTS=OFF \
-DBUILD_PRIMS_TESTS=OFF \
../cpp
# python component
cd $RAPIDS_SRC/cuml
cd python
CFLAGS="-I$VIRTUAL_ENV/include -L$VIRTUAL_ENV/lib" python setup.py clean --all build_ext --singlegpu install
# test it
python -c "import cuml"
# (continuing from the blazingSQL installation)
# dask-cuda utils
cd $RAPIDS_SRC
git clone https://github.com/rapidsai/dask-cuda.git
cd dask-cuda
python setup.py install
# dask
cd $RAPIDS_SRC
git clone https://github.com/dask/dask.git
cd dask
python setup.py install
# gdrcopy
CXXFLAGS="-L$VIRTUAL_ENV/lib -I$VIRTUAL_ENV/include -lcheck" make PREFIX=$VIRTUAL_ENV CUDA=$OLCF_CUDA_ROOT all install
# ucx
module load hwloc
module load gdrcopy
cd $RAPIDS_SRC
git clone https://github.com/openucx/ucx
cd ucx
#git checkout v1.8.1
git checkout master
./autogen.sh
mkdir build
cd build
# Performance build
../contrib/configure-release --with-gdrcopy=$VIRTUAL_ENV --prefix=$VIRTUAL_ENV --with-cuda=$OLCF_CUDA_ROOT --enable-mt CPPFLAGS="-I/$OLCF_CUDA_ROOT/include" --without-java --with-mpi
#../contrib/configure-release --prefix=$VIRTUAL_ENV --with-cuda=$OLCF_CUDA_ROOT --enable-mt CPPFLAGS="-I/$OLCF_CUDA_ROOT/include" --without-java
# Debug build
../contrib/configure-devel --with-gdrcopy=$VIRTUAL_ENV --prefix=$VIRTUAL_ENV --with-cuda=$OLCF_CUDA_ROOT --enable-mt CPPFLAGS="-I/$OLCF_CUDA_ROOT/include" --without-java --with-mpi --without-valgrind
make -j16 install
# ucx-py
module load hwloc
cd $RAPIDS_SRC
git clone https://github.com/rapidsai/ucx-py.git
cd ucx-py
pip install .
# lz4 bindings
cd $RAPIDS_SRC
git clone https://github.com/python-lz4/python-lz4
cd python-lz4
python setup.py install
# jupyterlab
pip install --no-binary jupyterlab jupyterlab
# bokeh for dask scheduler
pip install --no-binary bokeh bokeh
# forward dask dashboard
pip install --no-binary jupyter-server-proxy jupyter-server-proxy
jupyter serverextension enable --sys-prefix jupyter_server_proxy
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment