Last active
April 7, 2021 16:22
-
-
Save tsuchm/bf61370ccb2fac639c811d77d0c61fa7 to your computer and use it in GitHub Desktop.
Ansible Playbook to install nvidia-docker2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Package: libnccl-dev | |
Pin: version *cuda9.1 | |
Pin-Priority: 999 | |
Package: libnccl-dev | |
Pin: version *cuda9.0 | |
Pin-Priority: 999 | |
Package: libnccl2 | |
Pin: version *cuda9.1 | |
Pin-Priority: 999 | |
Package: libnccl2 | |
Pin: version *cuda9.0 | |
Pin-Priority: 999 | |
Package: libcudnn7 | |
Pin: version *cuda9.1 | |
Pin-Priority: 999 | |
Package: libcudnn7 | |
Pin: version *cuda9.0 | |
Pin-Priority: 999 | |
Package: libcudnn7-dev | |
Pin: version *cuda9.1 | |
Pin-Priority: 999 | |
Package: libcudnn7-dev | |
Pin: version *cuda9.0 | |
Pin-Priority: 999 | |
Package: libcudnn6 | |
Pin: version *cuda8.0 | |
Pin-Priority: 999 | |
Package: libcudnn6-dev | |
Pin: version *cuda8.0 | |
Pin-Priority: 999 | |
Package: libcudnn5 | |
Pin: version *cuda8.0 | |
Pin-Priority: 999 | |
Package: libcudnn5-dev | |
Pin: version *cuda8.0 | |
Pin-Priority: 999 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
- name: Enable Backports repository | |
apt_repository: | |
repo: deb http://httpredir.debian.org/debian stretch-backports main contrib non-free | |
state: present | |
update_cache: yes | |
filename: stretch-backports | |
when: ansible_distribution_release == 'stretch' | |
- name: Install NVidia driver | |
apt: name={{item}} state=latest default_release=stretch-backports update_cache=yes | |
with_items: | |
- nvidia-egl-icd | |
- nvidia-driver | |
- nvidia-smi | |
register: nvidia_driver_apt | |
- name: Reboot system to refresh NVidia driver | |
shell: sleep 2 && reboot | |
async: 1 | |
poll: 0 | |
when: nvidia_driver_apt.changed == True | |
- name: Wait system resume | |
local_action: wait_for host={{inventory_hostname}} port=22 delay=30 | |
when: nvidia_driver_apt.changed == True | |
become: False | |
- name: Check NVidia driver | |
shell: nvidia-smi || true | |
register: nvidia_smi_output | |
failed_when: "'has failed' in nvidia_smi_output.stdout" | |
when: nvidia_driver_apt.changed == True | |
- name: Enable Jessie repositories | |
apt_repository: repo={{item}} state=present update_cache=no filename=jessie | |
with_items: | |
- deb http://ftp.jp.debian.org/debian/ jessie main contrib non-free | |
- deb http://ftp.jp.debian.org/debian/ jessie-updates main contrib non-free | |
- deb http://security.debian.org/debian-security jessie/updates main contrib non-free | |
when: ansible_distribution_release == 'stretch' | |
- name: Install GCC-4.9 | |
apt: name={{item}} state=latest | |
with_items: | |
- gcc-4.9 | |
- g++-4.9 | |
- name: Install CUDA | |
apt: name=nvidia-cuda-toolkit default_release=stretch-backports state=latest | |
- name: Add Nvidia Machine Learning repository key | |
apt_key: | |
url: http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/7fa2af80.pub | |
state: present | |
- name: Enable Nvidia Machine Learning repository | |
apt_repository: | |
repo: deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 / | |
state: present | |
update_cache: yes | |
filename: nvidia-machine-learning | |
- name: Put preferences for Nvidia Machine Learning repository. | |
copy: src=apt_preferences dest=/etc/apt/preferences.d/nvidia owner=root group=root mode=0644 | |
- name: Install cuDNN and NCCL | |
apt: name={{item}} state=latest update_cache=yes | |
with_items: | |
- libcudnn7 | |
- libcudnn7-dev | |
- libcudnn6 | |
- libcudnn6-dev | |
- libcudnn5 | |
- libcudnn5-dev | |
- libnccl2 | |
- libnccl-dev | |
- name: Install cupti libraries, which is necessary to build TensorFlow. | |
apt: name={{item}} state=latest default_release=stretch-backports update_cache=yes | |
with_items: | |
- libcupti-dev | |
- libcupti9.1 | |
# Because both Docker repository and NVidia-Docker repository use HTTPS, | |
# apt-transport-https package is necessary. | |
- name: Install apt-transport-https | |
apt: name=apt-transport-https state=present | |
- name: Add Docker repository key | |
apt_key: | |
url: https://download.docker.com/linux/debian/gpg | |
state: present | |
- name: Add Docker repository source | |
apt_repository: | |
repo: deb [arch=amd64] https://download.docker.com/linux/debian {{ansible_distribution_release}} stable | |
state: present | |
update_cache: no | |
filename: docker | |
- name: Add NVidia-Docker repository key | |
apt_key: | |
url: https://nvidia.github.io/nvidia-docker/gpgkey | |
state: present | |
- name: Add NVidia-Docker repository source | |
get_url: | |
url: https://nvidia.github.io/nvidia-docker/debian{{ansible_distribution_major_version}}/nvidia-docker.list | |
dest: /etc/apt/sources.list.d/nvidia-docker.list | |
owner: root | |
group: root | |
mode: 0644 | |
- name: Install nvidia-docker2 | |
apt: name=nvidia-docker2 state=latest update_cache=yes | |
register: nvidia_docker_apt | |
- name: Reload Docker configuration | |
service: name=docker state=reloaded | |
when: nvidia_docker_apt.changed == True | |
## Test nvidia-smi with the latest official CUDA image | |
#docker run --runtime=nvidia --rm nvidia/cuda nvidia-smi | |
## When using Debian GNU/Linux 9.4 (Stretch) as your host machine, | |
## CUDA version mismatching causes an error. In order to avoid this | |
## problem, specify the tag of the CUDA image as follows: | |
# docker run --runtime=nvidia --rm nvidia/cuda:8.0-devel nvidia-smi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment