Last active
March 22, 2021 09:29
-
-
Save DocMinus/06d4c3689dba10962353d76a59edecea to your computer and use it in GitHub Desktop.
Tensorflow Cuda GPU vs CPU
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# coding: utf-8 | |
# Test Cuda availability and benchmark CPU vs GPU | |
# | |
# see for example: https://colab.research.google.com/notebooks/gpu.ipynb | |
import tensorflow as tf | |
import timeit | |
device_name = tf.test.gpu_device_name() | |
# if device_name != '/device:GPU:0': | |
if not tf.config.list_physical_devices('GPU'): | |
raise SystemError('GPU device not found') | |
print('Found GPU at: {}'.format(device_name)) | |
# Test CPU vs GPU | |
# 2 functions that calculate random matrices | |
def cpu(): | |
with tf.device('/cpu:0'): | |
random_image_cpu = tf.random.normal((100, 100, 100, 3)) | |
net_cpu = tf.keras.layers.Conv2D(32, 7)(random_image_cpu) | |
return tf.math.reduce_sum(net_cpu) | |
def gpu(): | |
with tf.device('/device:GPU:0'): | |
random_image_gpu = tf.random.normal((100, 100, 100, 3)) | |
net_gpu = tf.keras.layers.Conv2D(32, 7)(random_image_gpu) | |
return tf.math.reduce_sum(net_gpu) | |
# Run each op once to "warm up"; see: https://stackoverflow.com/a/45067900 | |
cpu() | |
gpu() | |
# Finally, run the op several times & output formatted result, the latter a matter of taste | |
print('\nTime (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images ' | |
'(batch x height x width x channel). \n Sum of ten runs.') | |
print('CPU (s):', end=" ") | |
cpu_time = timeit.timeit(stmt=cpu, number=10, setup="from __main__ import cpu") | |
print("{:.4f}".format(cpu_time)) | |
print('GPU (s):', end=" ") | |
gpu_time = timeit.timeit(stmt=gpu, number=10, setup="from __main__ import gpu") | |
print("{:.4f}".format(gpu_time)) | |
print('GPU speedup over CPU: {}x'.format(int(cpu_time / gpu_time))) | |
''' | |
comparison outputs | |
Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images (batch x height x width x channel). | |
Sum of ten runs. | |
Mx150, win10 (vs i7): | |
CPU (s): 2.0285 | |
GPU (s): 0.4343 | |
GPU speedup over CPU: 4x | |
2060S, win10 (vs Ryzen 5 3600): | |
CPU (s): 0.7966 | |
GPU (s): 0.0775 | |
GPU speedup over CPU: 10x | |
1050Ti, Ubuntu20 (vs Xeon W3550): | |
CPU (s): 4.0058: | |
GPU (s): 0.1734: | |
GPU speedup over CPU: 23x: | |
1080Ti, Ubuntu18 (vs Xeon E5 2620 V3): | |
CPU (s): 0.7239 | |
GPU (s): 0.0535 | |
GPU speedup over CPU: 13x | |
''' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment