Skip to content

Instantly share code, notes, and snippets.

@DocMinus
Last active March 22, 2021 09:29
Show Gist options
  • Save DocMinus/06d4c3689dba10962353d76a59edecea to your computer and use it in GitHub Desktop.
Save DocMinus/06d4c3689dba10962353d76a59edecea to your computer and use it in GitHub Desktop.
Tensorflow Cuda GPU vs CPU
#!/usr/bin/env python3
# coding: utf-8
# Test Cuda availability and benchmark CPU vs GPU
#
# see for example: https://colab.research.google.com/notebooks/gpu.ipynb
import tensorflow as tf
import timeit
device_name = tf.test.gpu_device_name()
# if device_name != '/device:GPU:0':
if not tf.config.list_physical_devices('GPU'):
raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))
# Test CPU vs GPU
# 2 functions that calculate random matrices
def cpu():
with tf.device('/cpu:0'):
random_image_cpu = tf.random.normal((100, 100, 100, 3))
net_cpu = tf.keras.layers.Conv2D(32, 7)(random_image_cpu)
return tf.math.reduce_sum(net_cpu)
def gpu():
with tf.device('/device:GPU:0'):
random_image_gpu = tf.random.normal((100, 100, 100, 3))
net_gpu = tf.keras.layers.Conv2D(32, 7)(random_image_gpu)
return tf.math.reduce_sum(net_gpu)
# Run each op once to "warm up"; see: https://stackoverflow.com/a/45067900
cpu()
gpu()
# Finally, run the op several times & output formatted result, the latter a matter of taste
print('\nTime (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images '
'(batch x height x width x channel). \n Sum of ten runs.')
print('CPU (s):', end=" ")
cpu_time = timeit.timeit(stmt=cpu, number=10, setup="from __main__ import cpu")
print("{:.4f}".format(cpu_time))
print('GPU (s):', end=" ")
gpu_time = timeit.timeit(stmt=gpu, number=10, setup="from __main__ import gpu")
print("{:.4f}".format(gpu_time))
print('GPU speedup over CPU: {}x'.format(int(cpu_time / gpu_time)))
'''
comparison outputs
Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images (batch x height x width x channel).
Sum of ten runs.
Mx150, win10 (vs i7):
CPU (s): 2.0285
GPU (s): 0.4343
GPU speedup over CPU: 4x
2060S, win10 (vs Ryzen 5 3600):
CPU (s): 0.7966
GPU (s): 0.0775
GPU speedup over CPU: 10x
1050Ti, Ubuntu20 (vs Xeon W3550):
CPU (s): 4.0058:
GPU (s): 0.1734:
GPU speedup over CPU: 23x:
1080Ti, Ubuntu18 (vs Xeon E5 2620 V3):
CPU (s): 0.7239
GPU (s): 0.0535
GPU speedup over CPU: 13x
'''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment