Created
October 30, 2019 12:38
-
-
Save thvasilo/e3948f5b347502639350627612051080 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Script to set up the environment and files for training XGBoost jobs | |
# on the master of an MPI cluster created using AWS ParallelCluster | |
# Install personal choice packages | |
sudo apt install -y tmux emacs-nox htop parallel | |
# Needed for dmlc-core (?) | |
sudo apt install -y libcurl4-openssl-dev libssl-dev | |
# Parallel compress/decompress because we work with large bzipped files | |
sudo apt install -y lbzip2 pigz | |
cd /usr/local/bin | |
sudo ln -s /usr/bin/lbzip2 bzip2 | |
sudo ln -s /usr/bin/lbzip2 bunzip2 | |
sudo ln -s /usr/bin/lbzip2 bzcat | |
sudo ln -s /usr/bin/pigz gzip | |
sudo ln -s /usr/bin/pigz gunzip | |
sudo ln -s /usr/bin/pigz gzcat | |
sudo ln -s /usr/bin/pigz zcat | |
# Get and set up CMake | |
cd ${HOME} | |
wget https://github.com/Kitware/CMake/releases/download/v3.15.3/cmake-3.15.3-Linux-x86_64.tar.gz | |
tar -xzf cmake-3.15.3-Linux-x86_64.tar.gz | |
echo "export PATH=${PATH}:${HOME}/cmake-3.15.3-Linux-x86_64/bin/" >> ${HOME}/.bashrc | |
source ${HOME}/.bashrc | |
# Install pip | |
curl -O https://bootstrap.pypa.io/get-pip.py | |
# Use python3 to install pip | |
sudo python3 get-pip.py | |
# Install kubernetes because dmlc tracker requires it, for both Py2 and 3 | |
sudo env 'PATH=$PATH:/usr/local/bin' pip3 install kubernetes --ignore-installed PyYAML | |
pip2 install kubernetes | |
# Build xgboost | |
cd /shared/ # Assuming the default shared directory for ParallelCluster | |
git clone --recursive https://github.com/thvasilo/xgboost | |
cd xgboost | |
mkdir build | |
cd build | |
cmake .. | |
make -j`nproc` | |
# Get and prepare some test data | |
cd /shared/ | |
mkdir data | |
cd data | |
wget https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/rcv1_train.binary.bz2 | |
bunzip2 rcv1_train.binary.bz2 | |
# Fix class indicators | |
sed 's/^+1/1/' rcv1_train.binary | sed 's/^-1/0/' > rcv1 | |
rm rcv1_train.binary | |
wget https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/url_combined.bz2 | |
bunzip2 url_combined.bz2 | |
sed 's/^+1/1/' url_combined | sed 's/^-1/0/' > url | |
rm url_combined | |
wget https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/avazu-app.val.bz2 | |
bunzip2 /shared/data/avazu-app.val.bz2 | |
wget https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/kdda.t.bz2 | |
bunzip2 kdda.t.bz2 | |
# Get hostnames | |
srun -n 3 -N 3 hostname > ${HOME}/hostnames.txt | |
# Test run on mushroom | |
mkdir /shared/model | |
cd /shared/xgboost/demo/distributed-training/ | |
# Make sure slurm tracker is added back code at this commit: https://github.com/thvasilo/dmlc-core/commit/9af9579ed28fc2d51723c2fe023c8832330e645f | |
../../dmlc-core/tracker/dmlc-submit --cluster slurm --num-workers 3 --worker-memory 8g --worker-cores 1 --host-file ~/hostnames.txt ../../xgboost mushroom.aws.conf nthread=1 data=/shared/xgboost/demo/data/agaricus.txt.train eval[test]=/shared/xgboost/demo/data/agaricus.txt.test model_dir=/shared/model |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment