DocMinus · March 22, 2021 09:29
diff --git a/tf_GPU_vs_CPU.py b/tf_GPU_vs_CPU.py
 #!/usr/bin/env python3
 # coding: utf-8

 # Test Cuda availability and benchmark CPU vs GPU
 #
 # see for example: https://colab.research.google.com/notebooks/gpu.ipynb

 import tensorflow as tf
 import timeit

 device_name = tf.test.gpu_device_name()
 # if device_name != '/device:GPU:0':
 if not tf.config.list_physical_devices('GPU'):
    raise SystemError('GPU device not found')

 print('Found GPU at: {}'.format(device_name))


 # Test CPU vs GPU
 # 2 functions that calculate random matrices

 def cpu():
    with tf.device('/cpu:0'):
        random_image_cpu = tf.random.normal((100, 100, 100, 3))
        net_cpu = tf.keras.layers.Conv2D(32, 7)(random_image_cpu)
        return tf.math.reduce_sum(net_cpu)


 def gpu():
    with tf.device('/device:GPU:0'):
        random_image_gpu = tf.random.normal((100, 100, 100, 3))
        net_gpu = tf.keras.layers.Conv2D(32, 7)(random_image_gpu)
        return tf.math.reduce_sum(net_gpu)


 # Run each op once to "warm up"; see: https://stackoverflow.com/a/45067900
 cpu()
 gpu()

 # Finally, run the op several times & output formatted result, the latter a matter of taste
 print('\nTime (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images '
      '(batch x height x width x channel). \n Sum of ten runs.')

 print('CPU (s):', end=" ")
 cpu_time = timeit.timeit(stmt=cpu, number=10, setup="from __main__ import cpu")
 print("{:.4f}".format(cpu_time))

 print('GPU (s):', end=" ")
 gpu_time = timeit.timeit(stmt=gpu, number=10, setup="from __main__ import gpu")
 print("{:.4f}".format(gpu_time))
 print('GPU speedup over CPU: {}x'.format(int(cpu_time / gpu_time)))

 '''
 comparison outputs
 Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images (batch x height x width x channel).
 Sum of ten runs.

 Mx150, win10 (vs i7):
 CPU (s): 2.0285
 GPU (s): 0.4343
 GPU speedup over CPU: 4x

 2060S, win10 (vs Ryzen 5 3600):
 CPU (s): 0.7966
 GPU (s): 0.0775
 GPU speedup over CPU: 10x

 1050Ti, Ubuntu20 (vs Xeon W3550):
 CPU (s): 4.0058:
 GPU (s): 0.1734:
 GPU speedup over CPU: 23x:

 1080Ti, Ubuntu18 (vs Xeon E5 2620 V3):
 CPU (s): 0.7239
 GPU (s): 0.0535
 GPU speedup over CPU: 13x
 '''
	#!/usr/bin/env python3
	# coding: utf-8

	# Test Cuda availability and benchmark CPU vs GPU
	#
	# see for example: https://colab.research.google.com/notebooks/gpu.ipynb

	import tensorflow as tf
	import timeit

	device_name = tf.test.gpu_device_name()
	# if device_name != '/device:GPU:0':
	if not tf.config.list_physical_devices('GPU'):
	raise SystemError('GPU device not found')

	print('Found GPU at: {}'.format(device_name))


	# Test CPU vs GPU
	# 2 functions that calculate random matrices

	def cpu():
	with tf.device('/cpu:0'):
	random_image_cpu = tf.random.normal((100, 100, 100, 3))
	net_cpu = tf.keras.layers.Conv2D(32, 7)(random_image_cpu)
	return tf.math.reduce_sum(net_cpu)


	def gpu():
	with tf.device('/device:GPU:0'):
	random_image_gpu = tf.random.normal((100, 100, 100, 3))
	net_gpu = tf.keras.layers.Conv2D(32, 7)(random_image_gpu)
	return tf.math.reduce_sum(net_gpu)


	# Run each op once to "warm up"; see: https://stackoverflow.com/a/45067900
	cpu()
	gpu()

	# Finally, run the op several times & output formatted result, the latter a matter of taste
	print('\nTime (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images '
	'(batch x height x width x channel). \n Sum of ten runs.')

	print('CPU (s):', end=" ")
	cpu_time = timeit.timeit(stmt=cpu, number=10, setup="from __main__ import cpu")
	print("{:.4f}".format(cpu_time))

	print('GPU (s):', end=" ")
	gpu_time = timeit.timeit(stmt=gpu, number=10, setup="from __main__ import gpu")
	print("{:.4f}".format(gpu_time))
	print('GPU speedup over CPU: {}x'.format(int(cpu_time / gpu_time)))

	'''
	comparison outputs
	Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images (batch x height x width x channel).
	Sum of ten runs.

	Mx150, win10 (vs i7):
	CPU (s): 2.0285
	GPU (s): 0.4343
	GPU speedup over CPU: 4x

	2060S, win10 (vs Ryzen 5 3600):
	CPU (s): 0.7966
	GPU (s): 0.0775
	GPU speedup over CPU: 10x

	1050Ti, Ubuntu20 (vs Xeon W3550):
	CPU (s): 4.0058:
	GPU (s): 0.1734:
	GPU speedup over CPU: 23x:

	1080Ti, Ubuntu18 (vs Xeon E5 2620 V3):
	CPU (s): 0.7239
	GPU (s): 0.0535
	GPU speedup over CPU: 13x
	'''