-
-
Save j-min/08f6bb83037a3b787f5e64d1c5c62ba7 to your computer and use it in GitHub Desktop.
TensorFlow Installation Log
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Note – this is not a bash script (some of the steps require reboot) | |
# I named it .sh just so Github does correct syntax highlighting. | |
# | |
# This is also available as an AMI in us-east-1 (virginia): ami-cf5028a5 | |
# | |
# The CUDA part is mostly based on this excellent blog post: | |
# http://tleyden.github.io/blog/2014/10/25/cuda-6-dot-5-on-aws-gpu-instance-running-ubuntu-14-dot-04/ | |
# Install various packages | |
sudo apt-get update | |
sudo apt-get upgrade -y # choose “install package maintainers version” | |
sudo apt-get install -y build-essential python-pip python-dev git python-numpy swig python-dev default-jdk zip zlib1g-dev | |
export LC_ALL="en_US.UTF-8" | |
export LC_CTYPE="en_US.UTF-8" | |
sudo dpkg-reconfigure locales | |
pip install --upgrade pip | |
# Blacklist Noveau which has some kind of conflict with the nvidia driver | |
echo -e "blacklist nouveau\nblacklist lbm-nouveau\noptions nouveau modeset=0\nalias nouveau off\nalias lbm-nouveau off\n" | sudo tee /etc/modprobe.d/blacklist-nouveau.conf | |
echo options nouveau modeset=0 | sudo tee -a /etc/modprobe.d/nouveau-kms.conf | |
sudo update-initramfs -u | |
sudo reboot # Reboot (annoying you have to do this in 2015!) | |
# Some other annoying thing we have to do | |
sudo apt-get install -y linux-image-extra-virtual | |
sudo reboot # Not sure why this is needed | |
# Install latest Linux headers | |
sudo apt-get install -y linux-source linux-headers-`uname -r` | |
please install the `pkg-config` utility and the X.Org | |
SDK/development package | |
# Install CUDA 8.0 | |
wget https://developer.nvidia.com/compute/cuda/8.0/prod/local_installers/cuda_8.0.44_linux-run | |
chmod +x cuda_8.0.44_linux-run | |
./cuda_8.0.44_linux-run -extract=`pwd`/nvidia_installers | |
cd nvidia_installers | |
sudo ./NVIDIA-Linux-x86_64-367.48.run | |
sudo modprobe nvidia | |
sudo ./cuda-linux64-rel-8.0.44-21122537.run | |
sudo ./cuda-samples-linux-8.0.44-21122537.run | |
# cd /usr/local 에서 확인 | |
# ADD follwing lines at /.bashrc | |
``` | |
# CUDA Toolkit | |
export CUDA_HOME=/usr/local/cuda-8.0 | |
export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:$LD_LIBRARY_PATH | |
export PATH=${CUDA_HOME}/bin:${PATH} | |
``` | |
source /.bashrc | |
# Install CUDNN 5.1 for CUDA 8.0 | |
# YOU NEED TO SCP THIS ONE FROM SOMEWHERE ELSE – it's not available online. | |
# You need to register and get approved to get a download link. Very annoying. | |
# download cudnn 5.1 for cuda 8.0 at https://developer.nvidia.com/rdp/cudnn-download | |
# 외부에서 | |
scp -i tokyo_test.pem cudnn-8.0-linux-x64-v5.1.tgz [email protected]:~ | |
cd ~ | |
tar -xzf cudnn-8.0-linux-x64-v5.1.tgz | |
cd cuda | |
sudo cp lib64/* /usr/local/cuda/lib64/ | |
sudo cp include/* /usr/local/cuda/include/ | |
## Cleanup | |
cd ~ | |
rm -rf cuda installers | |
rm -f cuda_7.5.18_linux.run cudnn-7.5-linux-x64-v5.0-ga.tgz | |
# At this point the root mount is getting a bit full | |
# I had a lot of issues where the disk would fill up and then Bazel would end up in this weird state complaining about random things | |
# Make sure you don't run out of disk space when building Tensorflow! | |
sudo mkdir /mnt/tmp | |
sudo chmod 777 /mnt/tmp | |
sudo rm -rf /tmp | |
sudo ln -s /mnt/tmp /tmp | |
# Note that /mnt is not saved when building an AMI, so don't put anything crucial on it | |
# Install Bazel | |
cd /mnt/tmp | |
git clone https://github.com/bazelbuild/bazel.git | |
cd bazel | |
./compile.sh | |
sudo cp output/bazel /usr/bin | |
# Install TensorFlow | |
cd /mnt/tmp | |
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64" | |
export CUDA_HOME=/usr/local/cuda | |
git clone --recurse-submodules https://github.com/tensorflow/tensorflow | |
cd tensorflow | |
# Patch to support older K520 devices on AWS | |
# wget "https://gist.githubusercontent.com/infojunkie/cb6d1a4e8bf674c6e38e/raw/5e01e5b2b1f7afd3def83810f8373fbcf6e47e02/cuda_30.patch" | |
# git apply cuda_30.patch | |
# According to https://github.com/tensorflow/tensorflow/issues/25#issuecomment-156234658 this patch is no longer needed | |
# Instead, you need to run ./configure like below (not tested yet) | |
TF_UNOFFICIAL_SETTING=1 ./configure | |
bazel build -c opt --config=cuda //tensorflow/cc:tutorials_example_trainer | |
# Build Python package | |
bazel build -c opt --config=cuda //tensorflow/tools/pip_package:build_pip_package | |
bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg | |
export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc2-cp27-none-linux_x86_64.whl | |
sudo pip install --upgrade $TF_BINARY_URL | |
#sudo pip install /tmp/tensorflow_pkg/tensorflow-0.11.0-cp27-none-linux_x86_64.whl | |
# Test it! | |
cd ~ | |
git clone https://gist.github.com/j-min/baae1aa56e861cab9831b3722755ae6d | |
python baae1aa56e861cab9831b3722755ae6d/test_gpu.py | |
# On a g2.2xlarge: step 100, loss = 4.50 (325.2 examples/sec; 0.394 sec/batch) | |
# On a g2.8xlarge: step 100, loss = 4.49 (337.9 examples/sec; 0.379 sec/batch) | |
# doesn't seem like it is able to use the 4 GPU cards unfortunately :( |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment