-
-
Save mfcabrera/d921b9bcb5789e2d3f50393831f3f7b8 to your computer and use it in GitHub Desktop.
Installing TensorFlow on EC2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Note – this is not a bash script (some of the steps require reboot) | |
# I named it .sh just so Github does correct syntax highlighting. | |
# This install Tensorflow 0.11, Cuda 8.0 and cudnn-8.0 | |
# The CUDA part is mostly based on this excellent blog post: | |
# http://tleyden.github.io/blog/2014/10/25/cuda-6-dot-5-on-aws-gpu-instance-running-ubuntu-14-dot-04/ | |
# I extened Erick using additional instructions from http://ramhiser.com/2016/01/05/installing-tensorflow-on-an-aws-ec2-instance-with-gpu-support/ | |
# Install various packages | |
sudo apt-get update | |
sudo apt-get upgrade -y # choose “install package maintainers version” | |
sudo apt-get install -y build-essential python-pip python-dev git python-numpy swig python-dev default-jdk zip zlib1g-dev | |
sudo apt-get install -y build-essential git python-pip libfreetype6-dev libxft-dev libncurses-dev libopenblas-dev gfortran python-matplotlib libblas-dev liblapack-dev libatlas-base-dev python-dev python-pydot linux-headers-generic linux-image-extra-virtual unzip python-numpy swig python-pandas python-sklearn unzip wget pkg-config zip g++ zlib1g-dev | |
# Blacklist Noveau which has some kind of conflict with the nvidia driver | |
echo -e "blacklist nouveau\nblacklist lbm-nouveau\noptions nouveau modeset=0\nalias nouveau off\nalias lbm-nouveau off\n" | sudo tee /etc/modprobe.d/blacklist-nouveau.conf | |
echo options nouveau modeset=0 | sudo tee -a /etc/modprobe.d/nouveau-kms.conf | |
sudo update-initramfs -u | |
sudo reboot # Reboot (annoying you have to do this in 2015!) | |
# Some other annoying thing we have to do | |
# sudo apt-get install -y linux-image-extra-virtual | |
#sudo reboot # Not sure why this is needed | |
# Install latest Linux headers | |
sudo apt-get install -y linux-source linux-headers-`uname -r` | |
# Install CUDA 8.0 (note – don't use any other version) | |
mkdir packages | |
cd packages | |
wget https://developer.nvidia.com/compute/cuda/8.0/prod/local_installers/cuda-repo-ubuntu1604-8-0-local_8.0.44-1_amd64-deb | |
sudo dpkg -i cuda-repo-ubuntu1604-8-0-local_8.0.44-1_amd64-deb | |
rm cuda-repo-ubuntu1604-8-0-local_8.0.44-1_amd64-deba | |
sudo apt-get update | |
sudo apt-get install -y cuda | |
# chmod +x cuda_7.0.28_linux.run | |
# ./cuda_7.0.28_linux.run -extract=`pwd`/nvidia_installers | |
# cd nvidia_installers | |
# sudo ./NVIDIA-Linux-x86_64-346.46.run | |
# sudo modprobe nvidia | |
# sudo ./cuda-linux64-rel-7.0.28-19326674.run | |
cd | |
# After filling out an annoying questionnaire, you’ll download a file named cudnn-8.0-linux-x64-v2.tgz. You need to transfer it to your EC2 instance: I did this by adding it to my Dropbox folder and using wget to upload it. Once you have uploaded it to your home directory, run the following: | |
# Install CUDA NN 8.0 | |
tar -vxzf cudnn-8.0-linux-x64-v5.0-ga.tgz | |
sudo cp cuda/lib64/libcudnn* /usr/local/cuda/lib64 | |
sudo cp cuda/include/cudnn.h /usr/local/cuda/include/ | |
# Next up, we’ll add some environment variables. You may wish to add these to your ~/.bashrc. | |
export CUDA_HOME=/usr/local/cuda | |
export CUDA_ROOT=/usr/local/cuda | |
export PATH=$PATH:$CUDA_ROOT/bin | |
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CUDA_ROOT/lib64 | |
# 1. Install JDK 8 | |
sudo add-apt-repository -y ppa:webupd8team/java | |
sudo apt-get update | |
# Hack to silently agree license agreement | |
echo debconf shared/accepted-oracle-license-v1-1 select true | sudo debconf-set-selections | |
echo debconf shared/accepted-oracle-license-v1-1 seen true | sudo debconf-set-selections | |
sudo apt-get install -y oracle-java8-installer | |
# Note: You might need to sudo apt-get install software-properties-common if you don't have the add-apt-repository command. See here. | |
#sudo apt-get install openjdk-8-jdk. Inst | |
# all other required packages | |
sudo apt-get install pkg-config zip g++ zlib1g-dev unzip | |
# At this point the root mount is getting a bit full | |
# I had a lot of issues where the disk would fill up and then Bazel would end up in this weird state complaining about random things | |
# Make sure you don't run out of disk space when building Tensorflow! | |
sudo mkdir /mnt/tmp | |
sudo chmod 777 /mnt/tmp | |
sudo rm -rf /tmp | |
sudo ln -s /mnt/tmp /tmp | |
# Note that /mnt is not saved when building an AMI, so don't put anything crucial on it | |
# Install Bazel | |
cd /tmp | |
wget https://github.com/bazelbuild/bazel/releases/download/0.4.0/bazel-0.4.0-jdk7-installer-linux-x86_64.sh | |
sudo bash bazel-0.4.0-jdk7-installer-linux-x86_64.sh | |
# Install Tensorflow | |
sudo apt-get install pkg-config zip g++ zlib1g-dev | |
git clone --recurse-submodules https://github.com/tensorflow/tensorflow | |
git checkout tags/v0.11.0 | |
TF_UNOFFICIAL_SETTING=1 ./configure | |
# Please specify a list of comma-separated Cuda compute capabilities you want to build with. | |
# You can find the compute capability of your device at: https://developer.nvidia.com/cuda-gpus. | |
# Please note that each additional compute capability significantly increases your build time and binary size. | |
# [Default is: "3.5,5.2"]: 3.0 | |
# Install TensorFlow | |
cd /mnt/tmp | |
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64" | |
export CUDA_HOME=/usr/local/cuda | |
git clone --recurse-submodules https://github.com/tensorflow/tensorflow | |
cd tensorflow | |
# Patch to support older K520 devices on AWS | |
# wget "https://gist.githubusercontent.com/infojunkie/cb6d1a4e8bf674c6e38e/raw/5e01e5b2b1f7afd3def83810f8373fbcf6e47e02/cuda_30.patch" | |
# git apply cuda_30.patch | |
# According to https://github.com/tensorflow/tensorflow/issues/25#issuecomment-156234658 this patch is no longer needed | |
# Instead, you need to run ./configure like below (not tested yet) | |
TF_UNOFFICIAL_SETTING=1 ./configure | |
bazel build -c opt --config=cuda //tensorflow/cc:tutorials_example_trainer | |
bazel build -c opt --config=cuda //tensorflow/tools/pip_package:build_pip_package | |
bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg | |
sudo pip install --upgrade /tmp/tensorflow_pkg/tensorflow-0.11.0-cp27-cp27mu-linux_x86_64.whl | |
# test in a python | |
# import tensorflow as tf | |
# tf_session = tf.Session() | |
# x = tf.constant(1) | |
# y = tf.constant(1) | |
# tf_session.run(x + y) | |
# Test it wit CIFAR | |
cd tensorflow/models/image/cifar10/ | |
python cifar10_multi_gpu_train.py | |
# You can also check that TensorFlow is working by training a CNN on the MNIST data set. | |
python ~/tensorflow/tensorflow/models/image/mnist/convolutional.py |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment