mfcabrera · November 20, 2016 20:20
diff --git a/install-tensorflow.sh b/install-tensorflow.sh
 # Note – this is not a bash script (some of the steps require reboot)
 # I named it .sh just so Github does correct syntax highlighting.
 # This install Tensorflow 0.11, Cuda 8.0 and cudnn-8.0
 # The CUDA part is mostly based on this excellent blog post:
 # http://tleyden.github.io/blog/2014/10/25/cuda-6-dot-5-on-aws-gpu-instance-running-ubuntu-14-dot-04/
 # I extened Erick using additional instructions from http://ramhiser.com/2016/01/05/installing-tensorflow-on-an-aws-ec2-instance-with-gpu-support/

 # Install various packages
 sudo apt-get update
 sudo apt-get upgrade -y # choose “install package maintainers version”
 sudo apt-get install -y build-essential python-pip python-dev git python-numpy swig python-dev default-jdk zip zlib1g-dev

 sudo apt-get install -y build-essential git python-pip libfreetype6-dev libxft-dev libncurses-dev libopenblas-dev gfortran python-matplotlib libblas-dev liblapack-dev libatlas-base-dev python-dev python-pydot linux-headers-generic linux-image-extra-virtual unzip python-numpy swig python-pandas python-sklearn unzip wget pkg-config zip g++ zlib1g-dev


 # Blacklist Noveau which has some kind of conflict with the nvidia driver
 echo -e "blacklist nouveau\nblacklist lbm-nouveau\noptions nouveau modeset=0\nalias nouveau off\nalias lbm-nouveau off\n" | sudo tee /etc/modprobe.d/blacklist-nouveau.conf
 echo options nouveau modeset=0 | sudo tee -a /etc/modprobe.d/nouveau-kms.conf
 sudo update-initramfs -u
 sudo reboot # Reboot (annoying you have to do this in 2015!)

 # Some other annoying thing we have to do
 # sudo apt-get install -y linux-image-extra-virtual
 #sudo reboot # Not sure why this is needed

 # Install latest Linux headers
 sudo apt-get install -y linux-source linux-headers-`uname -r` 

 # Install CUDA 8.0 (note – don't use any other version)
 mkdir packages 
 cd packages
 wget https://developer.nvidia.com/compute/cuda/8.0/prod/local_installers/cuda-repo-ubuntu1604-8-0-local_8.0.44-1_amd64-deb
 sudo dpkg -i cuda-repo-ubuntu1604-8-0-local_8.0.44-1_amd64-deb
 rm cuda-repo-ubuntu1604-8-0-local_8.0.44-1_amd64-deba
 sudo apt-get update
 sudo apt-get install -y cuda

 # chmod +x cuda_7.0.28_linux.run
 # ./cuda_7.0.28_linux.run -extract=`pwd`/nvidia_installers
 # cd nvidia_installers
 # sudo ./NVIDIA-Linux-x86_64-346.46.run 
 # sudo modprobe nvidia
 # sudo ./cuda-linux64-rel-7.0.28-19326674.run 
 cd

 # After filling out an annoying questionnaire, you’ll download a file named cudnn-8.0-linux-x64-v2.tgz. You need to transfer it to your EC2 instance: I did this by adding it to my Dropbox folder and using wget to upload it. Once you have uploaded it to your home directory, run the following:
 # Install CUDA NN 8.0
 tar -vxzf cudnn-8.0-linux-x64-v5.0-ga.tgz
 sudo cp cuda/lib64/libcudnn* /usr/local/cuda/lib64
 sudo cp cuda/include/cudnn.h /usr/local/cuda/include/

 # Next up, we’ll add some environment variables. You may wish to add these to your ~/.bashrc.
 export CUDA_HOME=/usr/local/cuda
 export CUDA_ROOT=/usr/local/cuda
 export PATH=$PATH:$CUDA_ROOT/bin
 export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CUDA_ROOT/lib64

 # 1. Install JDK 8
 sudo add-apt-repository -y ppa:webupd8team/java
 sudo apt-get update
 # Hack to silently agree license agreement
 echo debconf shared/accepted-oracle-license-v1-1 select true | sudo debconf-set-selections
 echo debconf shared/accepted-oracle-license-v1-1 seen true | sudo debconf-set-selections
 sudo apt-get install -y oracle-java8-installer
 # Note: You might need to sudo apt-get install software-properties-common if you don't have the add-apt-repository command. See here.

 #sudo apt-get install openjdk-8-jdk. Inst
 # all other required packages

 sudo apt-get install pkg-config zip g++ zlib1g-dev unzip



 # At this point the root mount is getting a bit full
 # I had a lot of issues where the disk would fill up and then Bazel would end up in this weird state complaining about random things
 # Make sure you don't run out of disk space when building Tensorflow!
 sudo mkdir /mnt/tmp
 sudo chmod 777 /mnt/tmp
 sudo rm -rf /tmp
 sudo ln -s /mnt/tmp /tmp
 # Note that /mnt is not saved when building an AMI, so don't put anything crucial on it

 # Install Bazel
 cd /tmp
 wget https://github.com/bazelbuild/bazel/releases/download/0.4.0/bazel-0.4.0-jdk7-installer-linux-x86_64.sh
 sudo bash bazel-0.4.0-jdk7-installer-linux-x86_64.sh

 # Install Tensorflow
 sudo apt-get install pkg-config zip g++ zlib1g-dev
 git clone --recurse-submodules https://github.com/tensorflow/tensorflow
 git checkout tags/v0.11.0
 TF_UNOFFICIAL_SETTING=1 ./configure

 # Please specify a list of comma-separated Cuda compute capabilities you want to build with.
 # You can find the compute capability of your device at: https://developer.nvidia.com/cuda-gpus.
 # Please note that each additional compute capability significantly increases your build time and binary size.
 # [Default is: "3.5,5.2"]: 3.0


 # Install TensorFlow
 cd /mnt/tmp
 export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64"
 export CUDA_HOME=/usr/local/cuda
 git clone --recurse-submodules https://github.com/tensorflow/tensorflow
 cd tensorflow
 # Patch to support older K520 devices on AWS
 # wget "https://gist.githubusercontent.com/infojunkie/cb6d1a4e8bf674c6e38e/raw/5e01e5b2b1f7afd3def83810f8373fbcf6e47e02/cuda_30.patch"
 # git apply cuda_30.patch
 # According to https://github.com/tensorflow/tensorflow/issues/25#issuecomment-156234658 this patch is no longer needed
 # Instead, you need to run ./configure like below (not tested yet)
 TF_UNOFFICIAL_SETTING=1 ./configure
 bazel build -c opt --config=cuda //tensorflow/cc:tutorials_example_trainer
 bazel build -c opt --config=cuda //tensorflow/tools/pip_package:build_pip_package
 bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
 sudo pip install --upgrade /tmp/tensorflow_pkg/tensorflow-0.11.0-cp27-cp27mu-linux_x86_64.whl

 # test in a python
 # import tensorflow as tf
 # tf_session = tf.Session()
 # x = tf.constant(1)
 # y = tf.constant(1)
 # tf_session.run(x + y)

 # Test it wit CIFAR
 cd tensorflow/models/image/cifar10/
 python cifar10_multi_gpu_train.py 

 # You can also check that TensorFlow is working by training a CNN on the MNIST data set.
 python ~/tensorflow/tensorflow/models/image/mnist/convolutional.py
	# Note – this is not a bash script (some of the steps require reboot)
	# I named it .sh just so Github does correct syntax highlighting.
	# This install Tensorflow 0.11, Cuda 8.0 and cudnn-8.0
	# The CUDA part is mostly based on this excellent blog post:
	# http://tleyden.github.io/blog/2014/10/25/cuda-6-dot-5-on-aws-gpu-instance-running-ubuntu-14-dot-04/
	# I extened Erick using additional instructions from http://ramhiser.com/2016/01/05/installing-tensorflow-on-an-aws-ec2-instance-with-gpu-support/

	# Install various packages
	sudo apt-get update
	sudo apt-get upgrade -y # choose “install package maintainers version”
	sudo apt-get install -y build-essential python-pip python-dev git python-numpy swig python-dev default-jdk zip zlib1g-dev

	sudo apt-get install -y build-essential git python-pip libfreetype6-dev libxft-dev libncurses-dev libopenblas-dev gfortran python-matplotlib libblas-dev liblapack-dev libatlas-base-dev python-dev python-pydot linux-headers-generic linux-image-extra-virtual unzip python-numpy swig python-pandas python-sklearn unzip wget pkg-config zip g++ zlib1g-dev


	# Blacklist Noveau which has some kind of conflict with the nvidia driver
	echo -e "blacklist nouveau\nblacklist lbm-nouveau\noptions nouveau modeset=0\nalias nouveau off\nalias lbm-nouveau off\n" \| sudo tee /etc/modprobe.d/blacklist-nouveau.conf
	echo options nouveau modeset=0 \| sudo tee -a /etc/modprobe.d/nouveau-kms.conf
	sudo update-initramfs -u
	sudo reboot # Reboot (annoying you have to do this in 2015!)

	# Some other annoying thing we have to do
	# sudo apt-get install -y linux-image-extra-virtual
	#sudo reboot # Not sure why this is needed

	# Install latest Linux headers
	sudo apt-get install -y linux-source linux-headers-`uname -r`

	# Install CUDA 8.0 (note – don't use any other version)
	mkdir packages
	cd packages
	wget https://developer.nvidia.com/compute/cuda/8.0/prod/local_installers/cuda-repo-ubuntu1604-8-0-local_8.0.44-1_amd64-deb
	sudo dpkg -i cuda-repo-ubuntu1604-8-0-local_8.0.44-1_amd64-deb
	rm cuda-repo-ubuntu1604-8-0-local_8.0.44-1_amd64-deba
	sudo apt-get update
	sudo apt-get install -y cuda

	# chmod +x cuda_7.0.28_linux.run
	# ./cuda_7.0.28_linux.run -extract=`pwd`/nvidia_installers
	# cd nvidia_installers
	# sudo ./NVIDIA-Linux-x86_64-346.46.run
	# sudo modprobe nvidia
	# sudo ./cuda-linux64-rel-7.0.28-19326674.run
	cd

	# After filling out an annoying questionnaire, you’ll download a file named cudnn-8.0-linux-x64-v2.tgz. You need to transfer it to your EC2 instance: I did this by adding it to my Dropbox folder and using wget to upload it. Once you have uploaded it to your home directory, run the following:
	# Install CUDA NN 8.0
	tar -vxzf cudnn-8.0-linux-x64-v5.0-ga.tgz
	sudo cp cuda/lib64/libcudnn* /usr/local/cuda/lib64
	sudo cp cuda/include/cudnn.h /usr/local/cuda/include/

	# Next up, we’ll add some environment variables. You may wish to add these to your ~/.bashrc.
	export CUDA_HOME=/usr/local/cuda
	export CUDA_ROOT=/usr/local/cuda
	export PATH=$PATH:$CUDA_ROOT/bin
	export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CUDA_ROOT/lib64

	# 1. Install JDK 8
	sudo add-apt-repository -y ppa:webupd8team/java
	sudo apt-get update
	# Hack to silently agree license agreement
	echo debconf shared/accepted-oracle-license-v1-1 select true \| sudo debconf-set-selections
	echo debconf shared/accepted-oracle-license-v1-1 seen true \| sudo debconf-set-selections
	sudo apt-get install -y oracle-java8-installer
	# Note: You might need to sudo apt-get install software-properties-common if you don't have the add-apt-repository command. See here.

	#sudo apt-get install openjdk-8-jdk. Inst
	# all other required packages

	sudo apt-get install pkg-config zip g++ zlib1g-dev unzip



	# At this point the root mount is getting a bit full
	# I had a lot of issues where the disk would fill up and then Bazel would end up in this weird state complaining about random things
	# Make sure you don't run out of disk space when building Tensorflow!
	sudo mkdir /mnt/tmp
	sudo chmod 777 /mnt/tmp
	sudo rm -rf /tmp
	sudo ln -s /mnt/tmp /tmp
	# Note that /mnt is not saved when building an AMI, so don't put anything crucial on it

	# Install Bazel
	cd /tmp
	wget https://github.com/bazelbuild/bazel/releases/download/0.4.0/bazel-0.4.0-jdk7-installer-linux-x86_64.sh
	sudo bash bazel-0.4.0-jdk7-installer-linux-x86_64.sh

	# Install Tensorflow
	sudo apt-get install pkg-config zip g++ zlib1g-dev
	git clone --recurse-submodules https://github.com/tensorflow/tensorflow
	git checkout tags/v0.11.0
	TF_UNOFFICIAL_SETTING=1 ./configure

	# Please specify a list of comma-separated Cuda compute capabilities you want to build with.
	# You can find the compute capability of your device at: https://developer.nvidia.com/cuda-gpus.
	# Please note that each additional compute capability significantly increases your build time and binary size.
	# [Default is: "3.5,5.2"]: 3.0


	# Install TensorFlow
	cd /mnt/tmp
	export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64"
	export CUDA_HOME=/usr/local/cuda
	git clone --recurse-submodules https://github.com/tensorflow/tensorflow
	cd tensorflow
	# Patch to support older K520 devices on AWS
	# wget "https://gist.githubusercontent.com/infojunkie/cb6d1a4e8bf674c6e38e/raw/5e01e5b2b1f7afd3def83810f8373fbcf6e47e02/cuda_30.patch"
	# git apply cuda_30.patch
	# According to https://github.com/tensorflow/tensorflow/issues/25#issuecomment-156234658 this patch is no longer needed
	# Instead, you need to run ./configure like below (not tested yet)
	TF_UNOFFICIAL_SETTING=1 ./configure
	bazel build -c opt --config=cuda //tensorflow/cc:tutorials_example_trainer
	bazel build -c opt --config=cuda //tensorflow/tools/pip_package:build_pip_package
	bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
	sudo pip install --upgrade /tmp/tensorflow_pkg/tensorflow-0.11.0-cp27-cp27mu-linux_x86_64.whl

	# test in a python
	# import tensorflow as tf
	# tf_session = tf.Session()
	# x = tf.constant(1)
	# y = tf.constant(1)
	# tf_session.run(x + y)

	# Test it wit CIFAR
	cd tensorflow/models/image/cifar10/
	python cifar10_multi_gpu_train.py

	# You can also check that TensorFlow is working by training a CNN on the MNIST data set.
	python ~/tensorflow/tensorflow/models/image/mnist/convolutional.py