wget https://gist.github.com/Willian-Zhang/290dceb96679c8f413e42491c92722b0/raw/mnist-cnn.py
python3 mnist-cnn.py
Last active
May 11, 2018 08:55
-
-
Save Willian-Zhang/290dceb96679c8f413e42491c92722b0 to your computer and use it in GitHub Desktop.
Tensorflow Benchmark
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'''Trains a simple convnet on the MNIST dataset. | |
Gets to 99.25% test accuracy after 12 epochs | |
(there is still a lot of margin for parameter tuning). | |
16 seconds per epoch on a GRID K520 GPU. | |
''' | |
from __future__ import print_function | |
import keras | |
from keras.datasets import mnist | |
from keras.models import Sequential | |
from keras.layers import Dense, Dropout, Flatten | |
from keras.layers import Conv2D, MaxPooling2D | |
from keras import backend as K | |
batch_size = 128 | |
num_classes = 10 | |
epochs = 12 | |
# input image dimensions | |
img_rows, img_cols = 28, 28 | |
# the data, shuffled and split between train and test sets | |
(x_train, y_train), (x_test, y_test) = mnist.load_data() | |
if K.image_data_format() == 'channels_first': | |
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) | |
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) | |
input_shape = (1, img_rows, img_cols) | |
else: | |
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) | |
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) | |
input_shape = (img_rows, img_cols, 1) | |
x_train = x_train.astype('float32') | |
x_test = x_test.astype('float32') | |
x_train /= 255 | |
x_test /= 255 | |
print('x_train shape:', x_train.shape) | |
print(x_train.shape[0], 'train samples') | |
print(x_test.shape[0], 'test samples') | |
# convert class vectors to binary class matrices | |
y_train = keras.utils.to_categorical(y_train, num_classes) | |
y_test = keras.utils.to_categorical(y_test, num_classes) | |
model = Sequential() | |
model.add(Conv2D(32, kernel_size=(3, 3), | |
activation='relu', | |
input_shape=input_shape)) | |
model.add(Conv2D(64, (3, 3), activation='relu')) | |
model.add(MaxPooling2D(pool_size=(2, 2))) | |
model.add(Dropout(0.25)) | |
model.add(Flatten()) | |
model.add(Dense(128, activation='relu')) | |
model.add(Dropout(0.5)) | |
model.add(Dense(num_classes, activation='softmax')) | |
model.compile(loss=keras.losses.categorical_crossentropy, | |
optimizer=keras.optimizers.Adadelta(), | |
metrics=['accuracy']) | |
model.fit(x_train, y_train, | |
batch_size=batch_size, | |
epochs=epochs, | |
verbose=1, | |
validation_data=(x_test, y_test)) | |
score = model.evaluate(x_test, y_test, verbose=0) | |
print('Test loss:', score[0]) | |
print('Test accuracy:', score[1]) |
self-compiled version
Using TensorFlow backend.
x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples
WARNING:tensorflow:From /Users/willian/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:1064: calling reduce_prod (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version.
Instructions for updating:
keep_dims is deprecated, use keepdims instead
WARNING:tensorflow:From /Users/willian/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:2578: calling reduce_sum (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version.
Instructions for updating:
keep_dims is deprecated, use keepdims instead
WARNING:tensorflow:From /Users/willian/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:1153: calling reduce_mean (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version.
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Train on 60000 samples, validate on 10000 samples
Epoch 1/12
60000/60000 [==============================] - 86s - loss: 0.3290 - acc: 0.9008 - val_loss: 0.0741 - val_acc: 0.9773
Epoch 2/12
60000/60000 [==============================] - 91s - loss: 0.1098 - acc: 0.9674 - val_loss: 0.0500 - val_acc: 0.9838
TestResult
CPU rMBP13'
Using TensorFlow backend.
Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
11493376/11490434 [==============================] - 1s 0us/step
x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples
Train on 60000 samples, validate on 10000 samples
Epoch 1/12
2018-02-16 20:59:20.859538: I tensorflow/core/platform/cpu_feature_guard.cc:137] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.2 AVX AVX2 FMA
60000/60000 [==============================] - 188s 3ms/step - loss: 0.2558 - acc: 0.9210 - val_loss: 0.0608 - val_acc: 0.9807
Epoch 2/12
60000/60000 [==============================] - 182s 3ms/step - loss: 0.0866 - acc: 0.9742 - val_loss: 0.0391 - val_acc: 0.9863
eGPU with GTX1080Ti
Using TensorFlow backend.
x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples
WARNING:tensorflow:From /Users/willian/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:1064: calling reduce_prod (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version.
Instructions for updating:
keep_dims is deprecated, use keepdims instead
WARNING:tensorflow:From /Users/willian/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:2578: calling reduce_sum (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version.
Instructions for updating:
keep_dims is deprecated, use keepdims instead
WARNING:tensorflow:From /Users/willian/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:1153: calling reduce_mean (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version.
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Train on 60000 samples, validate on 10000 samples
Epoch 1/12
2018-02-17 00:52:02.848657: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:859] OS X does not support NUMA - returning NUMA node zero
2018-02-17 00:52:02.848804: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1105] Found device 0 with properties:
name: GeForce GTX 1080 Ti major: 6 minor: 1 memoryClockRate(GHz): 1.645
pciBusID: 0000:46:00.0
totalMemory: 11.00GiB freeMemory: 10.81GiB
2018-02-17 00:52:02.848823: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1195] Creating TensorFlow device (/device:GPU:0) -> (device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:46:00.0, compute capability: 6.1)
2018-02-17 00:52:03.195981: E tensorflow/stream_executor/cuda/cuda_driver.cc:936] failed to allocate 10.22G (10976612096 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY
2018-02-17 00:52:03.196116: E tensorflow/stream_executor/cuda/cuda_driver.cc:936] failed to allocate 9.20G (9878950912 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY
60000/60000 [==============================] - 8s - loss: 0.3565 - acc: 0.8917 - val_loss: 0.0835 - val_acc: 0.9748
Epoch 2/12
60000/60000 [==============================] - 3s - loss: 0.1216 - acc: 0.9645 - val_loss: 0.0545 - val_acc: 0.9832
Epoch 3/12
60000/60000 [==============================] - 3s - loss: 0.0903 - acc: 0.9733 - val_loss: 0.0449 - val_acc: 0.9853
Epoch 4/12
60000/60000 [==============================] - 3s - loss: 0.0762 - acc: 0.9772 - val_loss: 0.0430 - val_acc: 0.9858
Epoch 5/12
60000/60000 [==============================] - 3s - loss: 0.0644 - acc: 0.9808 - val_loss: 0.0367 - val_acc: 0.9875
Epoch 6/12
60000/60000 [==============================] - 3s - loss: 0.0591 - acc: 0.9827 - val_loss: 0.0356 - val_acc: 0.9887
Epoch 7/12
60000/60000 [==============================] - 3s - loss: 0.0534 - acc: 0.9844 - val_loss: 0.0380 - val_acc: 0.9868
Epoch 8/12
60000/60000 [==============================] - 3s - loss: 0.0486 - acc: 0.9860 - val_loss: 0.0318 - val_acc: 0.9894
Epoch 9/12
60000/60000 [==============================] - 3s - loss: 0.0467 - acc: 0.9861 - val_loss: 0.0306 - val_acc: 0.9899
Epoch 10/12
60000/60000 [==============================] - 3s - loss: 0.0426 - acc: 0.9874 - val_loss: 0.0304 - val_acc: 0.9901
Epoch 11/12
60000/60000 [==============================] - 3s - loss: 0.0408 - acc: 0.9875 - val_loss: 0.0302 - val_acc: 0.9904
Epoch 12/12
60000/60000 [==============================] - 3s - loss: 0.0399 - acc: 0.9883 - val_loss: 0.0282 - val_acc: 0.9907
Test loss: 0.028239598831439797
Test accuracy: 0.9907
Multiple GPU version
'''Trains a simple convnet on the MNIST dataset.
Gets to 99.25% test accuracy after 12 epochs
(there is still a lot of margin for parameter tuning).
16 seconds per epoch on a GRID K520 GPU.
'''
from __future__ import print_function
import tensorflow as tf
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.utils import multi_gpu_model
batch_size = 128
num_classes = 10
epochs = 12
# input image dimensions
img_rows, img_cols = 28, 28
# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
with tf.device('/cpu:0'):
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
# Replicates `model` on all the GPUs.
try:
parallel_model = multi_gpu_model(model, gpus=None)
except:
raise AssertionError('GPU allocate error')
# This `fit` call will be distributed on 8 GPUs.
# Since the batch size is 256, each GPU will process 32 samples.
parallel_model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
parallel_model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test))
score = parallel_model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
eGPU w/ 1080Ti w/ TF1.8
Using TensorFlow backend.
x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples
Train on 60000 samples, validate on 10000 samples
Epoch 1/12
2018-05-11 04:51:10.335377: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:859] OS X does not support NUMA - returning NUMA node zero
2018-05-11 04:51:10.336052: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1356] Found device 0 with properties:
name: GeForce GTX 1080 Ti major: 6 minor: 1 memoryClockRate(GHz): 1.645
pciBusID: 0000:c4:00.0
totalMemory: 11.00GiB freeMemory: 9.37GiB
2018-05-11 04:51:10.336075: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1435] Adding visible gpu devices: 0
2018-05-11 04:51:11.063831: I tensorflow/core/common_runtime/gpu/gpu_device.cc:923] Device interconnect StreamExecutor with strength 1 edge matrix:
2018-05-11 04:51:11.063856: I tensorflow/core/common_runtime/gpu/gpu_device.cc:929] 0
2018-05-11 04:51:11.063864: I tensorflow/core/common_runtime/gpu/gpu_device.cc:942] 0: N
2018-05-11 04:51:11.064768: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1053] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 9065 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:c4:00.0, compute capability: 6.1)
2018-05-11 04:51:11.534095: E tensorflow/core/grappler/clusters/utils.cc:127] Not found: TF GPU device with id 0 was not registered
2018-05-11 04:51:11.579370: E tensorflow/core/grappler/clusters/utils.cc:127] Not found: TF GPU device with id 0 was not registered
2018-05-11 04:51:11.644835: E tensorflow/core/grappler/clusters/utils.cc:127] Not found: TF GPU device with id 0 was not registered
59264/60000 [============================>.] - ETA: 0s - loss: 0.2604 - acc: 0.92082018-05-11 04:51:19.228205: E tensorflow/core/grappler/clusters/utils.cc:127] Not found: TF GPU device with id 0 was not registered
60000/60000 [==============================] - 10s 159us/step - loss: 0.2588 - acc: 0.9213 - val_loss: 0.0561 - val_acc: 0.9829
Epoch 2/12
60000/60000 [==============================] - 4s 66us/step - loss: 0.0875 - acc: 0.9742 - val_loss: 0.0427 - val_acc: 0.9857
Epoch 3/12
60000/60000 [==============================] - 4s 67us/step - loss: 0.0662 - acc: 0.9803 - val_loss: 0.0356 - val_acc: 0.9875
Epoch 4/12
60000/60000 [==============================] - 4s 67us/step - loss: 0.0549 - acc: 0.9839 - val_loss: 0.0325 - val_acc: 0.9896
Epoch 5/12
60000/60000 [==============================] - 4s 67us/step - loss: 0.0471 - acc: 0.9859 - val_loss: 0.0309 - val_acc: 0.9901
Epoch 6/12
60000/60000 [==============================] - 4s 68us/step - loss: 0.0421 - acc: 0.9873 - val_loss: 0.0297 - val_acc: 0.9903
Epoch 7/12
60000/60000 [==============================] - 4s 67us/step - loss: 0.0377 - acc: 0.9884 - val_loss: 0.0259 - val_acc: 0.9908
Epoch 8/12
60000/60000 [==============================] - 4s 67us/step - loss: 0.0357 - acc: 0.9883 - val_loss: 0.0285 - val_acc: 0.9908
Epoch 9/12
60000/60000 [==============================] - 4s 68us/step - loss: 0.0315 - acc: 0.9904 - val_loss: 0.0327 - val_acc: 0.9901
Epoch 10/12
60000/60000 [==============================] - 4s 67us/step - loss: 0.0288 - acc: 0.9910 - val_loss: 0.0272 - val_acc: 0.9911
Epoch 11/12
60000/60000 [==============================] - 4s 67us/step - loss: 0.0282 - acc: 0.9912 - val_loss: 0.0248 - val_acc: 0.9920
Epoch 12/12
60000/60000 [==============================] - 4s 66us/step - loss: 0.0255 - acc: 0.9923 - val_loss: 0.0283 - val_acc: 0.9912
Test loss: 0.028254894825743667
Test accuracy: 0.9912
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
pip version