Created
January 6, 2017 08:54
-
-
Save jaeoh2/8fbd3276292c7b84ffd50485611bd0c3 to your computer and use it in GitHub Desktop.
Tensorflow Basic performance comparison (CPU vs GPU vs Multi-GPU) based on https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/5_MultiGPU/multigpu_basics.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
''' | |
Basic Multi GPU computation example using TensorFlow library. | |
Author: Aymeric Damien | |
Project: https://github.com/aymericdamien/TensorFlow-Examples/ | |
''' | |
''' | |
This tutorial requires your machine to have 2 GPUs | |
"/cpu:0": The CPU of your machine. | |
"/gpu:0": The first GPU of your machine | |
"/gpu:1": The second GPU of your machine | |
''' | |
import numpy as np | |
import tensorflow as tf | |
import datetime | |
# Processing Units logs | |
log_device_placement = True | |
# Num of multiplications to perform | |
n = 10 | |
''' | |
Example: compute A^n + B^n on 2 GPUs | |
Results on 8 cores with 2 GTX-980: | |
* Single GPU computation time: 0:00:11.277449 | |
* Multi GPU computation time: 0:00:07.131701 | |
''' | |
# Create random large matrix | |
A = np.random.rand(10000, 10000).astype('float32') | |
B = np.random.rand(10000, 10000).astype('float32') | |
# Create a graph to store results | |
c1 = [] | |
c2 = [] | |
def matpow(M, n): | |
if n < 1: #Abstract cases where n < 1 | |
return M | |
else: | |
return tf.matmul(M, matpow(M, n-1)) | |
''' | |
Single CPU computing | |
''' | |
with tf.device('/cpu:0'): | |
a = tf.placeholder(tf.float32, [10000, 10000]) | |
b = tf.placeholder(tf.float32, [10000, 10000]) | |
# Compute A^n and B^n and store results in c1 | |
c1.append(matpow(a, n)) | |
c1.append(matpow(b, n)) | |
with tf.device('/cpu:0'): | |
sum = tf.add_n(c1) #Addition of all elements in c1, i.e. A^n + B^n | |
t1_0 = datetime.datetime.now() | |
with tf.Session(config=tf.ConfigProto(log_device_placement=log_device_placement)) as sess: | |
# Run the op. | |
sess.run(sum, {a:A, b:B}) | |
t2_0 = datetime.datetime.now() | |
''' | |
Single GPU computing | |
''' | |
with tf.device('/gpu:0'): | |
a = tf.placeholder(tf.float32, [10000, 10000]) | |
b = tf.placeholder(tf.float32, [10000, 10000]) | |
# Compute A^n and B^n and store results in c1 | |
c1.append(matpow(a, n)) | |
c1.append(matpow(b, n)) | |
with tf.device('/cpu:0'): | |
sum = tf.add_n(c1) #Addition of all elements in c1, i.e. A^n + B^n | |
t1_1 = datetime.datetime.now() | |
with tf.Session(config=tf.ConfigProto(log_device_placement=log_device_placement)) as sess: | |
# Run the op. | |
sess.run(sum, {a:A, b:B}) | |
t2_1 = datetime.datetime.now() | |
''' | |
Multi GPU computing | |
''' | |
# GPU:0 computes A^n | |
with tf.device('/gpu:0'): | |
# Compute A^n and store result in c2 | |
a = tf.placeholder(tf.float32, [10000, 10000]) | |
c2.append(matpow(a, n)) | |
# GPU:1 computes B^n | |
with tf.device('/gpu:1'): | |
# Compute B^n and store result in c2 | |
b = tf.placeholder(tf.float32, [10000, 10000]) | |
c2.append(matpow(b, n)) | |
with tf.device('/cpu:0'): | |
sum = tf.add_n(c2) #Addition of all elements in c2, i.e. A^n + B^n | |
t1_2 = datetime.datetime.now() | |
with tf.Session(config=tf.ConfigProto(log_device_placement=log_device_placement)) as sess: | |
# Run the op. | |
sess.run(sum, {a:A, b:B}) | |
t2_2 = datetime.datetime.now() | |
print("Single CPU computation time: " + str(t2_0-t1_0)) | |
print("Single GPU computation time: " + str(t2_1-t1_1)) | |
print("Multi GPU computation time: " + str(t2_2-t1_2)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment