Remove previous NVIDIA drivers if needed:

sudo dpkg -P $(dpkg -l | grep nvidia-driver | awk '{print $2}')

sudo apt autoremove

sudo lshw -C display

NVIDIA Apmere cards including 3070, 3080 and 3090 dos not work with CUDA 10.
You have to use CUDA 11.0 or higher.
Right now, the only way to do so is by installing tf-nightly or building yourself.
Works with TensorFlow version 2.5

##Install proper NVIDIA driver:

sudo ubuntu-drivers devices

sudo ubuntu-drivers autoinstall

Install CUDA toolkit: 10.1

wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin

sudo mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600

sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub

sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ /"

sudo apt-get update

sudo apt-get -y install cuda-10.1

Install CUDA toolkit: 11.2



wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin

sudo mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600

sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub

sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ /"

sudo apt-get update
sudo apt-get -y install cuda-11.2

Install CUDNN dynamic libraries:

Download CUDNN from : [GoogleDrive] (https://drive.google.com/file/d/1tepluwCf-5FgKQy8DvK4uivzSHZehkHa/view?usp=sharing)

 

 tar -xzvf cudnn-10.1-linux-x64-v7.6.5.32.tgz

 sudo cp cuda/include/cudnn*.h /usr/local/cuda/include
 
 sudo cp -P cuda/lib64/libcudnn* /usr/local/cuda/lib64
 
 sudo chmod a+r /usr/local/cuda/include/cudnn*.h /usr/local/cuda/lib64/libcudnn*

OR use Anaconda:

Create virtual environment 'cuda': conda create -n cuda -c nvidia -c conda-forge -c defaults python=3.8 cudatoolkit=10.1
Install compatible version of CUDNN: conda install -c anaconda cudnn=7.6.5

Update environment variable

vim ~/.bashrc

Append the following lines:


# CUDA related exports
export PATH=/usr/local/cuda-10.1/bin${PATH:+:${PATH}}
export LD_LIBRARY_PATH=/usr/local/cuda-10.1/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}

export PATH=/usr/local/cuda-11.2/bin${PATH:+:${PATH}}
export LD_LIBRARY_PATH=/usr/local/cuda-11.2/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}

# cubalas

export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/local/cuda-10.2/targets/x86_64-linux/include/
export PATH=/usr/local/cuda-10.2/targets/x86_64-linux/include/${PATH:+:${PATH}}

export LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
export PATH="/usr/local/cuda/bin:${PATH:+:${PATH}}"
export LIBRARY_PATH="/usr/local/cuda-10.1/lib64:${LIBRARY_PATH:+:${LIBRARY_PATH}}


export PATH=/usr/local/cuda-11.1/bin${PATH:+:${PATH}}
export LD_LIBRARY_PATH=/usr/local/cuda-11.1/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
"

Create CUDA library Path file and append the following text:

sudo touch /etc/profile.d/cuda.sh

export PATH=/usr/local/cuda-11.2/bin:$PATH
export CUDADIR=/usr/local/cuda-11.2

Update changes to ENV valiable:

source ~/.bashrc

source /etc/profile.d/cuda.sh

The NVCC should have the following config:

nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Sun_Jul_28_19:07:16_PDT_2019
Cuda compilation tools, release 10.1, V10.1.243

Test Tensorflow GPU:

Create anaconda environment:

conda create -n tf pip python=3.8

Install dependencies to tf environment:

conda activate tf

pip install --ignore-installed --upgrade tensorflow==2.5

Run the following python code:

import tensorflow as tf
import timeit

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  print(
      '\n\nThis error most likely means that this notebook is not '
      'configured to use a GPU.  Change this in Notebook Settings via the '
      'command palette (cmd/ctrl-shift-P) or the Edit menu.\n\n')
  raise SystemError('GPU device not found')

def cpu():
  with tf.device('/cpu:0'):
    random_image_cpu = tf.random.normal((100, 100, 100, 3))
    net_cpu = tf.keras.layers.Conv2D(32, 7)(random_image_cpu)
    return tf.math.reduce_sum(net_cpu)

def gpu():
  with tf.device('/device:GPU:0'):
    random_image_gpu = tf.random.normal((100, 100, 100, 3))
    net_gpu = tf.keras.layers.Conv2D(32, 7)(random_image_gpu)
    return tf.math.reduce_sum(net_gpu)
  
# We run each op once to warm up; see: https://stackoverflow.com/a/45067900
cpu()
gpu()

# Run the op several times.
print('Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images '
      '(batch x height x width x channel). Sum of ten runs.')
print('CPU (s):')
cpu_time = timeit.timeit('cpu()', number=10, setup="from __main__ import cpu")
print(cpu_time)
print('GPU (s):')
gpu_time = timeit.timeit('gpu()', number=10, setup="from __main__ import gpu")
print(gpu_time)
print('GPU speedup over CPU: {}x'.format(int(cpu_time/gpu_time)))

print("TensorFlow Version: " + str(tf.__version__))

Output For Ryzen Threadripper 1900X and GeForce RTX 3070:

CPU (s):
0.766528123000171
GPU (s):
0.025774184992769733
GPU speedup over CPU: 29x
TensorFlow Version: 2.5.0

shawonis08/LinuxCUDAtoolkits.md