Skip to content

Instantly share code, notes, and snippets.

@ghicheon
Forked from neilpanchal/gcloud-gpu-setup.sh
Created October 23, 2017 04:59
Show Gist options
  • Save ghicheon/70c7346c57278801c2b1fb2d96bdc7ad to your computer and use it in GitHub Desktop.
Save ghicheon/70c7346c57278801c2b1fb2d96bdc7ad to your computer and use it in GitHub Desktop.
Google Cloud GPU Setup: Instructions & bash-script
#!/bin/bash
cd ~/
### CUDA
echo "\n\nChecking for CUDA and installing."
if ! dpkg-query -W cuda; then
curl -O http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/cuda-repo-ubuntu1604_8.0.61-1_amd64.deb
sudo dpkg -i ./cuda-repo-ubuntu1604_8.0.61-1_amd64.deb
sudo apt-get update
sudo apt-get install cuda -y
fi
### Anaconda
echo "\n\nAnaconda Install: Interactive"
wget https://repo.continuum.io/archive/Anaconda3-4.3.0-Linux-x86_64.sh
bash Anaconda3-4.3.0-Linux-x86_64.sh
### bashrc
mv ~/.bashrc ~/.bashrc.bak
echo "
#
# ADDITIONAL BASH
#
alias vibash='vi ~/.bashrc'
alias sourcebash='. ~/.bashrc'
lsports(){ sudo lsof -n -i | grep $1 | grep LISTEN ; }
#### EXPORTS/DIRS
cdusf(){ cd $HOME/usf/$1; }
# config
export DATA=$HOME/data
export WEIGHTS=$HOME/weights
cddata(){ cd $DATA/$1; }
cdweights(){ cd $WEIGHTS/$1; }
# cuda
export CUDA_HOME=/usr/local/cuda-8.0
export LD_LIBRARY_PATH=$CUDA_HOME/lib64
PATH=$CUDA_HOME/bin:$PATH
export PATH
#### HELPERS
# gcloud
alias gconfig='gcloud config configurations activate'
alias gssh='gcloud compute ssh'
# files
alias sampletree='mkdir -p sample/{train,test,valid}'
lsn(){ matchdir=`pwd`/$2; find $matchdir -type f | grep -v sample | shuf -n $1 | awk -F`pwd` '{print "."$NF}' ; }
cpn(){ matchdir=`pwd`/$2; find $matchdir -type f | grep -v sample | shuf -n $1 | awk -F`pwd` '{print "."$NF" sample"$NF}' | xargs -t -n2 cp ; }
mvn(){ matchdir=`pwd`/$2; todir=`pwd`/$3; find $matchdir -type f | grep -v sample | shuf -n $1 | awk -F`pwd` -v todir="$todir" '{print $0" "todir}' | xargs -t -n2 mv ; }
cpnh(){ matchdir=`pwd`/$2; find $matchdir -type f | grep -v sample | head -n $1 | awk -F`pwd` '{print "."$NF" sample"$NF}' | xargs -t -n2 cp ; }
mvnh(){ matchdir=`pwd`/$2; todir=`pwd`/$3; find $matchdir -type f | grep -v sample | head -n $1 | awk -F`pwd` -v todir="$todir" '{print $0" "todir}' | xargs -t -n2 mv ; }
cpnt(){ matchdir=`pwd`/$2; find $matchdir -type f | grep -v sample | tail -n $1 | awk -F`pwd` '{print "."$NF" sample"$NF}' | xargs -t -n2 cp ; }
mvnt(){ matchdir=`pwd`/$2; todir=`pwd`/$3; find $matchdir -type f | grep -v sample | tail -n $1 | awk -F`pwd` -v todir="$todir" '{print $0" "todir}' | xargs -t -n2 mv ; }
# anaconda
alias sd='source deactivate'
sa(){ source activate $1; }
alias jnb='jupyter notebook --ip=0.0.0.0 --port=8888 --no-browser &'
# keras
kerastf() {
rm -rf ~/.keras/keras.json
cp ~/.keras/keras.json.tensor ~/.keras/keras.json
cat ~/.keras/keras.json
}
kerasth() {
rm -rf ~/.keras/keras.json
cp ~/.keras/keras.json.theano ~/.keras/keras.json
cat ~/.keras/keras.json
}
kerastfth() {
rm -rf ~/.keras/keras.json
cp ~/.keras/keras.json.tensorth ~/.keras/keras.json
cat ~/.keras/keras.json
}
#
# INITIAL BASH
#
" > ~/.bashrc
cat ~/.bashrc.bak >> ~/.bashrc
source ~/.bashrc
### DIRECTORIES
mkdir $DATA
mkdir $WEIGHTS
### ML
conda install -y pytorch torchvision cuda80 -c soumith
### THEANO
conda install -y theano
echo '
[cuda]
root = /usr/local/cuda-8.0
'>>~/.theanorc
### Tensorflow
echo "\n\nTensorFlow Install:"
TF=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.0.0-cp36-cp36m-linux_x86_64.whl
pip install --ignore-installed --upgrade $TF
### KERAS
pip install keras
# keras tensorflow setup
echo '{
"image_dim_ordering": "tf",
"epsilon": 1e-07,
"floatx": "float32",
"backend": "tensorflow"
}' > ~/.keras/keras.json.tensor
# keras theano setup
echo '{
"image_dim_ordering": "th",
"epsilon": 1e-07,
"floatx": "float32",
"backend": "theano"
}' > ~/.keras/keras.json.theano
# keras tensorflow-th setup
echo '{
"image_dim_ordering": "th",
"epsilon": 1e-07,
"floatx": "float32",
"backend": "tensorflow"
}' > ~/.keras/keras.json.tensorth
### TOOLS
sudo apt-get install -y unzip
sudo apt-get install -y tree
pip install kaggle-cli
conda install libgcc
pip install bcolz

GCLOUD GPU

The number of virtual CPUs and the storage have little effect on cost. However prices increase dramatically with GPUs

  • n1-standard-8: 8 virtual CPUs and 30 GB of memory.
  • count=4,2,1: # of GPUs
  • boot-disk-size: 200GB
NOTE ON SNAPSHOTS

If creating from a snapshot you must first create the disk

gcloud compute disks create DISK_NAME --source-snapshot SNAPSHOT_NAME

and then add the --disk name option to the scripts below

    --disk name=[DISK_NAME],boot=yes

Note you must also remove boot-disk-size since that is determined by snapshot. The CPU script below shows an example of creating from a snapshot.

4 GPU ( $3.08 / hour !!! )
gcloud beta compute instances create gpu-84 \
    --machine-type n1-standard-8 --zone us-east1-d \
    --accelerator type=nvidia-tesla-k80,count=4 \
    --image-family ubuntu-1604-lts --image-project ubuntu-os-cloud \
    --maintenance-policy TERMINATE --restart-on-failure \
    --boot-disk-size 200GB
2 GPU ( $1.68 / hour !!! )
gcloud beta compute instances create gpu-82 \
    --machine-type n1-standard-8 --zone us-east1-d \
    --accelerator type=nvidia-tesla-k80,count=2 \
    --image-family ubuntu-1604-lts --image-project ubuntu-os-cloud \
    --maintenance-policy TERMINATE --restart-on-failure \
    --boot-disk-size 200GB
1 GPU ( $0.98 / hour !!! )
gcloud beta compute instances create gpu-81 \
    --machine-type n1-standard-8 --zone us-east1-d \
    --accelerator type=nvidia-tesla-k80,count=1 \
    --image-family ubuntu-1604-lts --image-project ubuntu-os-cloud \
    --maintenance-policy TERMINATE --restart-on-failure \
    --boot-disk-size 200GB
CHEAP DEV CPU (from snapshot | $0.28 / hour)

NOTE: This script boots from a GPU disk snapshot. Note that after creating the instance you'll then need to remove the gpu dependencies.

# create disk from gpu-84
gcloud compute disks create cpu8source --source-snapshot gpu-84-20170428

# create instance
gcloud compute instances create cpu-8 \
    --machine-type n1-standard-8 --zone us-east1-d \
    --disk name=cpu8source,boot=yes \
    --image-family ubuntu-1604-lts --image-project ubuntu-os-cloud \
    --maintenance-policy TERMINATE --restart-on-failure
# remove cuda
rm -rf ~/cuda
rm -rf NVIDIA_CUDA-8.0_Samples
sudo rm -rf /usr/local/cuda
conda uninstall cuda80

# tensorflow
TF=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.1.0-cp36-cp36m-linux_x86_64.whl
pip install --ignore-installed --upgrade $TF

# theano: 
# - update .theanorc
# -- update [global]
device = cpu
# --remove [cuda]

# .bashrc -- remove from PATH
# export CUDA_HOME=/usr/local/cuda-8.0 
# export LD_LIBRARY_PATH=$CUDA_HOME/lib64
# PATH=$CUDA_HOME/bin:$PATH 
# export PATH

   

INSTANCE SETUP

gcloud-gpu-setup.sh

# copy files to instance
gcloud compute copy-files gpu-setup.sh gpu-84:~/

# INSTALL PIP
sudo apt-get install python-pip

# ssh into instance and execute < **anaconda install is interactive ** >
. gpu-setup.sh
CUDNN

Locally download cudnn https://developer.nvidia.com/rdp/cudnn-download. Then upload contents to instance. I first tried to upload the tar file but there was an error when unpackaging, so I uploaded the untarred directory:

# glcoud instance
cuda-install-samples-8.0.sh  ~ 
mkdir ~/cuda

# local machine 
gcloud compute copy-files ~/Downloads/cuda/*  	gpu-84:~/cuda

Now move the files to the appropriate /usr/local/cuda locations:

sudo mv cuda/lib64/* /usr/local/cuda/lib64/
sudo mv cuda/include/* /usr/local/cuda/include/
INSTALL CHECKS

# check cuda install
pushd NVIDIA_CUDA-8.0_Samples/1_Utilities/deviceQuery
make
./deviceQuery 
popd

# check TF install
python -c "import tensorflow as tf;print(tf.Session().run(tf.constant('Hi TF')))"
EXTRENAL IP

POSSIBLE CHANGES:

  • do not delete disk when deleted
JUPYTER
# set password  *** copy sha output ***
from notebook.auth import passwd
passwd()
jupyter notebook --generate-config

vi ~/.jupyter/jupyter_notebook_config.py 
# uncomment/update line
# c.NotebookApp.password = 'sha1...'
# full cmd
jupyter notebook --ip=0.0.0.0 --port=8888 --no-browser &

# alias in bashrc from gpu-setup.sh
jnb
GIT (password only every 12 hours)`
git config --global credential.helper 'cache --timeout=43200'
TMUX
https://github.com/brookisme/tmux-setup
GDAL
...
PY2
conda create -y -n py2 python=2 anaconda
sa py2
python -c "print 'i am python 2'"
# tensorflow
TF=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.0.1-cp27-none-linux_x86_64.whl
pip install --ignore-installed --upgrade $TF
# keras 1 ( to be consistent with usf notebooks )
pip install 'keras<2' 
# checks
python -c "import keras; import tensorflow as tf;print(tf.Session().run(tf.constant('Hi TF')))"
# other
conda install libgcc
pip install bcolz
SUBLIME

NOTE: If you don't reserve a static-ip you are going to have to update the ip-address each time. With new ip's first log in through gcloud cli to accept private key.

# example sftp-config.json
{
    "type": "sftp",
    "sync_down_on_open": false,
    "upload_on_save":true,
    "host": "35.185.66.147",
    "remote_path": "/home/brook/play/dvc1",
    "user": "brook",
    "port":22,
    "ignore_regexes":[
        "/data/","/.kaggle-cli/",
        "\\.sublime-(project|workspace)", "sftp-config(-alt\\d?)?\\.json", "sftp-settings\\.json", "/venv/", "\\.svn", "\\.hg", "\\.git", "\\.bzr", "_darcs", "CVS", "\\.DS_Store", "Thumbs\\.db", "desktop\\.ini"],
    "connect_timeout": 30,
    "ssh_key_file": "~/.ssh/google_compute_engine"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment