Last active
May 1, 2020 06:57
-
-
Save fo40225/3abde68ef95b0678ff9154059c23cd3d to your computer and use it in GitHub Desktop.
Radeon VII ROCm tensorflow
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sudo apt update | |
sudo apt install libnuma-dev | |
wget -q -O - http://repo.radeon.com/rocm/apt/debian/rocm.gpg.key | sudo apt-key add - | |
echo 'deb [arch=amd64] http://repo.radeon.com/rocm/apt/debian/ xenial main' | sudo tee /etc/apt/sources.list.d/rocm.list | |
sudo apt update | |
sudo apt install rocm-dkms | |
sudo usermod -a -G video $LOGNAME | |
sudo reboot | |
export PATH=/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64:$PATH | |
rocminfo | |
clinfo | |
rocm-smi | |
# install docker | |
curl -sSL https://get.docker.com/ | sh | |
alias drun='sudo docker run -it --network=host --device=/dev/kfd --device=/dev/dri --ipc=host --shm-size 16G --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -v $HOME/dockerx:/dockerx' | |
drun rocm/tensorflow:rocm3.1-tf2.1-python3 | |
root@ubuntu:/root# cd benchmarks/scripts/tf_cnn_benchmarks/ | |
root@ubuntu:/root/benchmarks/scripts/tf_cnn_benchmarks# python3 tf_cnn_benchmarks.py \ | |
> --data_format=NHWC --batch_size=64 --num_batches=100 \ | |
> --model=resnet50 --optimizer=sgd --variable_update=replicated \ | |
> --use_fp16=False --distortions=False --local_parameter_device=gpu \ | |
> --num_gpus=1 --display_every=10 | |
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/compat/v2_compat.py:88: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
non-resource variables are not supported in the long term | |
2020-04-24 14:56:17.566798: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA | |
2020-04-24 14:56:17.570801: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3593430000 Hz | |
2020-04-24 14:56:17.570880: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x434cce0 initialized for platform Host (this does not guarantee that XLA will be used). Devices: | |
2020-04-24 14:56:17.570893: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version | |
2020-04-24 14:56:17.571748: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libhip_hcc.so | |
2020-04-24 14:56:17.598090: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1573] Found device 0 with properties: | |
pciBusID: 0000:0a:00.0 name: Vega 20 ROCm AMD GPU ISA: gfx906 | |
coreClock: 1.801GHz coreCount: 60 deviceMemorySize: 15.98GiB deviceMemoryBandwidth: -1B/s | |
2020-04-24 14:56:17.623219: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so | |
2020-04-24 14:56:17.624137: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so | |
2020-04-24 14:56:17.624872: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocfft.so | |
2020-04-24 14:56:17.625008: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocrand.so | |
2020-04-24 14:56:17.625081: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0 | |
2020-04-24 14:56:17.625151: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix: | |
2020-04-24 14:56:17.625160: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 0 | |
2020-04-24 14:56:17.625165: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0: N | |
2020-04-24 14:56:17.625290: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 15306 MB memory) -> physical GPU (device: 0, name: Vega 20, pci bus id: 0000:0a:00.0) | |
TensorFlow: 2.1 | |
Model: resnet50 | |
Dataset: imagenet (synthetic) | |
Mode: training | |
SingleSess: False | |
Batch size: 64 global | |
64 per device | |
Num batches: 100 | |
Num epochs: 0.00 | |
Devices: ['/gpu:0'] | |
NUMA bind: False | |
Data format: NHWC | |
Optimizer: sgd | |
Variables: replicated | |
AllReduce: None | |
========== | |
Generating training model | |
WARNING:tensorflow:From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Use `tf.keras.layers.Conv2D` instead. | |
W0424 14:56:17.650676 139734056560448 deprecation.py:323] From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Use `tf.keras.layers.Conv2D` instead. | |
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Please use `layer.__call__` method instead. | |
W0424 14:56:17.652061 139734056560448 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Please use `layer.__call__` method instead. | |
WARNING:tensorflow:From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Use keras.layers.MaxPooling2D instead. | |
W0424 14:56:17.666535 139734056560448 deprecation.py:323] From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Use keras.layers.MaxPooling2D instead. | |
Initializing graph | |
WARNING:tensorflow:From /root/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2267: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Please switch to tf.train.MonitoredTrainingSession | |
W0424 14:56:18.737229 139734056560448 deprecation.py:323] From /root/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2267: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Please switch to tf.train.MonitoredTrainingSession | |
2020-04-24 14:56:18.924195: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1573] Found device 0 with properties: | |
pciBusID: 0000:0a:00.0 name: Vega 20 ROCm AMD GPU ISA: gfx906 | |
coreClock: 1.801GHz coreCount: 60 deviceMemorySize: 15.98GiB deviceMemoryBandwidth: -1B/s | |
2020-04-24 14:56:18.924246: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so | |
2020-04-24 14:56:18.924254: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so | |
2020-04-24 14:56:18.924260: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocfft.so | |
2020-04-24 14:56:18.924266: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocrand.so | |
2020-04-24 14:56:18.924307: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0 | |
2020-04-24 14:56:18.924317: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix: | |
2020-04-24 14:56:18.924320: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 0 | |
2020-04-24 14:56:18.924322: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0: N | |
2020-04-24 14:56:18.924367: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 15306 MB memory) -> physical GPU (device: 0, name: Vega 20, pci bus id: 0000:0a:00.0) | |
INFO:tensorflow:Running local_init_op. | |
I0424 14:56:23.854503 139734056560448 session_manager.py:504] Running local_init_op. | |
INFO:tensorflow:Done running local_init_op. | |
I0424 14:56:23.888032 139734056560448 session_manager.py:507] Done running local_init_op. | |
Running warm up | |
2020-04-24 14:56:24.726787: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so | |
2020-04-24 14:56:24.732073: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so | |
MIOpen(HIP): Warning [GetValues] Perf db record is obsolete or corrupt: 2,32,2,32,1,1. Performance may degrade. | |
MIOpen(HIP): Warning [GetValues] Perf db record is obsolete or corrupt: 1,16,2,4,4,1,8,4. Performance may degrade. | |
Done warm up | |
Step Img/sec total_loss | |
1 images/sec: 283.5 +/- 0.0 (jitter = 0.0) 7.695 | |
10 images/sec: 283.4 +/- 0.2 (jitter = 0.4) 8.122 | |
20 images/sec: 283.3 +/- 0.1 (jitter = 0.4) 8.039 | |
30 images/sec: 283.3 +/- 0.1 (jitter = 0.4) 7.974 | |
40 images/sec: 283.3 +/- 0.1 (jitter = 0.4) 7.808 | |
50 images/sec: 283.2 +/- 0.1 (jitter = 0.4) 7.640 | |
60 images/sec: 283.2 +/- 0.1 (jitter = 0.4) 7.610 | |
70 images/sec: 283.2 +/- 0.1 (jitter = 0.4) 8.212 | |
80 images/sec: 283.1 +/- 0.1 (jitter = 0.4) 7.701 | |
90 images/sec: 283.1 +/- 0.1 (jitter = 0.4) 7.823 | |
100 images/sec: 283.0 +/- 0.1 (jitter = 0.4) 7.737 | |
---------------------------------------------------------------- | |
total images/sec: 282.96 | |
---------------------------------------------------------------- | |
root@ubuntu:/root/benchmarks/scripts/tf_cnn_benchmarks# python3 tf_cnn_benchmarks.py \ | |
> --data_format=NHWC --batch_size=128 --num_batches=100 \ | |
> --model=resnet50 --optimizer=sgd --variable_update=replicated \ | |
> --use_fp16=False --distortions=False --local_parameter_device=gpu \ | |
> --num_gpus=1 --display_every=10 | |
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/compat/v2_compat.py:88: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
non-resource variables are not supported in the long term | |
2020-04-24 15:05:14.210578: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA | |
2020-04-24 15:05:14.214322: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3593430000 Hz | |
2020-04-24 15:05:14.214402: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x3754c40 initialized for platform Host (this does not guarantee that XLA will be used). Devices: | |
2020-04-24 15:05:14.214412: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version | |
2020-04-24 15:05:14.215307: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libhip_hcc.so | |
2020-04-24 15:05:14.241858: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1573] Found device 0 with properties: | |
pciBusID: 0000:0a:00.0 name: Vega 20 ROCm AMD GPU ISA: gfx906 | |
coreClock: 1.801GHz coreCount: 60 deviceMemorySize: 15.98GiB deviceMemoryBandwidth: -1B/s | |
2020-04-24 15:05:14.267235: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so | |
2020-04-24 15:05:14.268097: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so | |
2020-04-24 15:05:14.268802: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocfft.so | |
2020-04-24 15:05:14.268928: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocrand.so | |
2020-04-24 15:05:14.269003: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0 | |
2020-04-24 15:05:14.269070: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix: | |
2020-04-24 15:05:14.269076: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 0 | |
2020-04-24 15:05:14.269079: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0: N | |
2020-04-24 15:05:14.269180: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 15306 MB memory) -> physical GPU (device: 0, name: Vega 20, pci bus id: 0000:0a:00.0) | |
TensorFlow: 2.1 | |
Model: resnet50 | |
Dataset: imagenet (synthetic) | |
Mode: training | |
SingleSess: False | |
Batch size: 128 global | |
128 per device | |
Num batches: 100 | |
Num epochs: 0.01 | |
Devices: ['/gpu:0'] | |
NUMA bind: False | |
Data format: NHWC | |
Optimizer: sgd | |
Variables: replicated | |
AllReduce: None | |
========== | |
Generating training model | |
WARNING:tensorflow:From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Use `tf.keras.layers.Conv2D` instead. | |
W0424 15:05:14.293022 140533809186624 deprecation.py:323] From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Use `tf.keras.layers.Conv2D` instead. | |
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Please use `layer.__call__` method instead. | |
W0424 15:05:14.293999 140533809186624 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Please use `layer.__call__` method instead. | |
WARNING:tensorflow:From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Use keras.layers.MaxPooling2D instead. | |
W0424 15:05:14.308464 140533809186624 deprecation.py:323] From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Use keras.layers.MaxPooling2D instead. | |
Initializing graph | |
WARNING:tensorflow:From /root/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2267: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Please switch to tf.train.MonitoredTrainingSession | |
W0424 15:05:15.393247 140533809186624 deprecation.py:323] From /root/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2267: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Please switch to tf.train.MonitoredTrainingSession | |
2020-04-24 15:05:15.582519: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1573] Found device 0 with properties: | |
pciBusID: 0000:0a:00.0 name: Vega 20 ROCm AMD GPU ISA: gfx906 | |
coreClock: 1.801GHz coreCount: 60 deviceMemorySize: 15.98GiB deviceMemoryBandwidth: -1B/s | |
2020-04-24 15:05:15.582579: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so | |
2020-04-24 15:05:15.582590: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so | |
2020-04-24 15:05:15.582598: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocfft.so | |
2020-04-24 15:05:15.582607: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocrand.so | |
2020-04-24 15:05:15.582652: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0 | |
2020-04-24 15:05:15.582664: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix: | |
2020-04-24 15:05:15.582668: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 0 | |
2020-04-24 15:05:15.582672: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0: N | |
2020-04-24 15:05:15.582721: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 15306 MB memory) -> physical GPU (device: 0, name: Vega 20, pci bus id: 0000:0a:00.0) | |
INFO:tensorflow:Running local_init_op. | |
I0424 15:05:20.376449 140533809186624 session_manager.py:504] Running local_init_op. | |
INFO:tensorflow:Done running local_init_op. | |
I0424 15:05:20.408860 140533809186624 session_manager.py:507] Done running local_init_op. | |
Running warm up | |
2020-04-24 15:05:21.253770: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so | |
2020-04-24 15:05:21.264017: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so | |
MIOpen(HIP): Warning [GetValues] Perf db record is obsolete or corrupt: 1,32,1,16,1,1. Performance may degrade. | |
MIOpen(HIP): Warning [GetValues] Perf db record is obsolete or corrupt: 1,16,2,4,4,1,8,4. Performance may degrade. | |
Done warm up | |
Step Img/sec total_loss | |
1 images/sec: 293.3 +/- 0.0 (jitter = 0.0) 7.973 | |
10 images/sec: 293.2 +/- 0.1 (jitter = 0.2) 7.882 | |
20 images/sec: 293.0 +/- 0.1 (jitter = 0.3) 7.909 | |
30 images/sec: 292.9 +/- 0.1 (jitter = 0.3) 7.804 | |
40 images/sec: 292.9 +/- 0.1 (jitter = 0.3) 7.991 | |
50 images/sec: 292.8 +/- 0.1 (jitter = 0.3) 7.870 | |
60 images/sec: 292.7 +/- 0.1 (jitter = 0.3) 7.937 | |
70 images/sec: 292.7 +/- 0.0 (jitter = 0.3) 7.759 | |
80 images/sec: 292.7 +/- 0.0 (jitter = 0.4) 7.825 | |
90 images/sec: 292.7 +/- 0.0 (jitter = 0.4) 7.951 | |
100 images/sec: 292.6 +/- 0.0 (jitter = 0.4) 7.785 | |
---------------------------------------------------------------- | |
total images/sec: 292.60 | |
---------------------------------------------------------------- | |
root@ubuntu:/root/benchmarks/scripts/tf_cnn_benchmarks# python3 tf_cnn_benchmarks.py \ | |
> --data_format=NHWC --batch_size=128 --num_batches=100 \ | |
> --model=resnet50 --optimizer=sgd --variable_update=replicated \ | |
> --use_fp16=True --distortions=False --local_parameter_device=gpu \ | |
> --num_gpus=1 --display_every=10 | |
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/compat/v2_compat.py:88: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
non-resource variables are not supported in the long term | |
2020-04-24 15:12:40.404005: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA | |
2020-04-24 15:12:40.407715: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3593430000 Hz | |
2020-04-24 15:12:40.407788: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4339fd0 initialized for platform Host (this does not guarantee that XLA will be used). Devices: | |
2020-04-24 15:12:40.407798: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version | |
2020-04-24 15:12:40.408653: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libhip_hcc.so | |
2020-04-24 15:12:40.435867: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1573] Found device 0 with properties: | |
pciBusID: 0000:0a:00.0 name: Vega 20 ROCm AMD GPU ISA: gfx906 | |
coreClock: 1.801GHz coreCount: 60 deviceMemorySize: 15.98GiB deviceMemoryBandwidth: -1B/s | |
2020-04-24 15:12:40.461057: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so | |
2020-04-24 15:12:40.461940: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so | |
2020-04-24 15:12:40.462691: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocfft.so | |
2020-04-24 15:12:40.462831: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocrand.so | |
2020-04-24 15:12:40.462918: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0 | |
2020-04-24 15:12:40.462987: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix: | |
2020-04-24 15:12:40.462994: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 0 | |
2020-04-24 15:12:40.462999: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0: N | |
2020-04-24 15:12:40.463119: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 15306 MB memory) -> physical GPU (device: 0, name: Vega 20, pci bus id: 0000:0a:00.0) | |
TensorFlow: 2.1 | |
Model: resnet50 | |
Dataset: imagenet (synthetic) | |
Mode: training | |
SingleSess: False | |
Batch size: 128 global | |
128 per device | |
Num batches: 100 | |
Num epochs: 0.01 | |
Devices: ['/gpu:0'] | |
NUMA bind: False | |
Data format: NHWC | |
Optimizer: sgd | |
Variables: replicated | |
AllReduce: None | |
========== | |
Generating training model | |
WARNING:tensorflow:From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Use `tf.keras.layers.Conv2D` instead. | |
W0424 15:12:40.486687 140033467434816 deprecation.py:323] From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Use `tf.keras.layers.Conv2D` instead. | |
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Please use `layer.__call__` method instead. | |
W0424 15:12:40.487761 140033467434816 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Please use `layer.__call__` method instead. | |
WARNING:tensorflow:From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Use keras.layers.MaxPooling2D instead. | |
W0424 15:12:40.501842 140033467434816 deprecation.py:323] From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Use keras.layers.MaxPooling2D instead. | |
Initializing graph | |
WARNING:tensorflow:From /root/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2267: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Please switch to tf.train.MonitoredTrainingSession | |
W0424 15:12:41.723154 140033467434816 deprecation.py:323] From /root/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2267: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Please switch to tf.train.MonitoredTrainingSession | |
2020-04-24 15:12:41.931076: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1573] Found device 0 with properties: | |
pciBusID: 0000:0a:00.0 name: Vega 20 ROCm AMD GPU ISA: gfx906 | |
coreClock: 1.801GHz coreCount: 60 deviceMemorySize: 15.98GiB deviceMemoryBandwidth: -1B/s | |
2020-04-24 15:12:41.931126: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so | |
2020-04-24 15:12:41.931133: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so | |
2020-04-24 15:12:41.931138: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocfft.so | |
2020-04-24 15:12:41.931143: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocrand.so | |
2020-04-24 15:12:41.931182: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0 | |
2020-04-24 15:12:41.931193: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix: | |
2020-04-24 15:12:41.931196: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 0 | |
2020-04-24 15:12:41.931199: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0: N | |
2020-04-24 15:12:41.931243: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 15306 MB memory) -> physical GPU (device: 0, name: Vega 20, pci bus id: 0000:0a:00.0) | |
INFO:tensorflow:Running local_init_op. | |
I0424 15:12:46.866378 140033467434816 session_manager.py:504] Running local_init_op. | |
INFO:tensorflow:Done running local_init_op. | |
I0424 15:12:46.903136 140033467434816 session_manager.py:507] Done running local_init_op. | |
Running warm up | |
2020-04-24 15:12:47.946906: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so | |
2020-04-24 15:12:47.964438: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so | |
warning: <unknown>:0:0: loop not unrolled: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering | |
warning: <unknown>:0:0: loop not unrolled: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering | |
Done warm up | |
Step Img/sec total_loss | |
1 images/sec: 424.6 +/- 0.0 (jitter = 0.0) 7.830 | |
10 images/sec: 424.2 +/- 0.3 (jitter = 0.9) 7.928 | |
20 images/sec: 424.0 +/- 0.2 (jitter = 1.0) 7.880 | |
30 images/sec: 423.9 +/- 0.1 (jitter = 0.8) 7.788 | |
40 images/sec: 423.7 +/- 0.1 (jitter = 0.8) 7.769 | |
50 images/sec: 423.7 +/- 0.1 (jitter = 0.8) 7.842 | |
60 images/sec: 423.6 +/- 0.1 (jitter = 0.7) 7.803 | |
70 images/sec: 423.4 +/- 0.1 (jitter = 0.9) 7.781 | |
80 images/sec: 423.4 +/- 0.1 (jitter = 0.9) 7.743 | |
90 images/sec: 423.3 +/- 0.1 (jitter = 0.9) 7.882 | |
100 images/sec: 423.2 +/- 0.1 (jitter = 1.0) 7.922 | |
---------------------------------------------------------------- | |
total images/sec: 423.17 | |
---------------------------------------------------------------- | |
root@ubuntu:/root/benchmarks/scripts/tf_cnn_benchmarks# python3 tf_cnn_benchmarks.py \ | |
> --data_format=NHWC --batch_size=256 --num_batches=100 \ | |
> --model=resnet50 --optimizer=sgd --variable_update=replicated \ | |
> --use_fp16=True --distortions=False --local_parameter_device=gpu \ | |
> --num_gpus=1 --display_every=10 | |
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/compat/v2_compat.py:88: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
non-resource variables are not supported in the long term | |
2020-04-24 15:18:27.043334: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA | |
2020-04-24 15:18:27.047223: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3593430000 Hz | |
2020-04-24 15:18:27.047298: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4bc35d0 initialized for platform Host (this does not guarantee that XLA will be used). Devices: | |
2020-04-24 15:18:27.047310: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version | |
2020-04-24 15:18:27.048169: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libhip_hcc.so | |
2020-04-24 15:18:27.075585: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1573] Found device 0 with properties: | |
pciBusID: 0000:0a:00.0 name: Vega 20 ROCm AMD GPU ISA: gfx906 | |
coreClock: 1.801GHz coreCount: 60 deviceMemorySize: 15.98GiB deviceMemoryBandwidth: -1B/s | |
2020-04-24 15:18:27.101432: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so | |
2020-04-24 15:18:27.102312: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so | |
2020-04-24 15:18:27.103067: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocfft.so | |
2020-04-24 15:18:27.103203: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocrand.so | |
2020-04-24 15:18:27.103287: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0 | |
2020-04-24 15:18:27.103364: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix: | |
2020-04-24 15:18:27.103372: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 0 | |
2020-04-24 15:18:27.103376: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0: N | |
2020-04-24 15:18:27.103480: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 15306 MB memory) -> physical GPU (device: 0, name: Vega 20, pci bus id: 0000:0a:00.0) | |
TensorFlow: 2.1 | |
Model: resnet50 | |
Dataset: imagenet (synthetic) | |
Mode: training | |
SingleSess: False | |
Batch size: 256 global | |
256 per device | |
Num batches: 100 | |
Num epochs: 0.02 | |
Devices: ['/gpu:0'] | |
NUMA bind: False | |
Data format: NHWC | |
Optimizer: sgd | |
Variables: replicated | |
AllReduce: None | |
========== | |
Generating training model | |
WARNING:tensorflow:From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Use `tf.keras.layers.Conv2D` instead. | |
W0424 15:18:27.126988 139747064190784 deprecation.py:323] From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Use `tf.keras.layers.Conv2D` instead. | |
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Please use `layer.__call__` method instead. | |
W0424 15:18:27.128048 139747064190784 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Please use `layer.__call__` method instead. | |
WARNING:tensorflow:From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Use keras.layers.MaxPooling2D instead. | |
W0424 15:18:27.143269 139747064190784 deprecation.py:323] From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Use keras.layers.MaxPooling2D instead. | |
Initializing graph | |
WARNING:tensorflow:From /root/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2267: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Please switch to tf.train.MonitoredTrainingSession | |
W0424 15:18:28.423727 139747064190784 deprecation.py:323] From /root/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2267: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Please switch to tf.train.MonitoredTrainingSession | |
2020-04-24 15:18:28.641239: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1573] Found device 0 with properties: | |
pciBusID: 0000:0a:00.0 name: Vega 20 ROCm AMD GPU ISA: gfx906 | |
coreClock: 1.801GHz coreCount: 60 deviceMemorySize: 15.98GiB deviceMemoryBandwidth: -1B/s | |
2020-04-24 15:18:28.641290: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so | |
2020-04-24 15:18:28.641298: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so | |
2020-04-24 15:18:28.641304: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocfft.so | |
2020-04-24 15:18:28.641311: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocrand.so | |
2020-04-24 15:18:28.641354: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0 | |
2020-04-24 15:18:28.641364: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix: | |
2020-04-24 15:18:28.641368: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 0 | |
2020-04-24 15:18:28.641370: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0: N | |
2020-04-24 15:18:28.641417: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 15306 MB memory) -> physical GPU (device: 0, name: Vega 20, pci bus id: 0000:0a:00.0) | |
INFO:tensorflow:Running local_init_op. | |
I0424 15:18:33.533480 139747064190784 session_manager.py:504] Running local_init_op. | |
INFO:tensorflow:Done running local_init_op. | |
I0424 15:18:33.571991 139747064190784 session_manager.py:507] Done running local_init_op. | |
Running warm up | |
2020-04-24 15:18:34.626119: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so | |
2020-04-24 15:18:34.640218: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so | |
Done warm up | |
Step Img/sec total_loss | |
1 images/sec: 444.6 +/- 0.0 (jitter = 0.0) 7.930 | |
10 images/sec: 445.0 +/- 0.2 (jitter = 0.5) 7.887 | |
20 images/sec: 444.8 +/- 0.1 (jitter = 0.6) 7.833 | |
30 images/sec: 444.6 +/- 0.1 (jitter = 0.5) 7.849 | |
40 images/sec: 444.4 +/- 0.1 (jitter = 0.6) 7.932 | |
50 images/sec: 444.2 +/- 0.1 (jitter = 0.7) 7.951 | |
60 images/sec: 444.1 +/- 0.1 (jitter = 0.7) 7.858 | |
70 images/sec: 444.1 +/- 0.1 (jitter = 0.6) 7.816 | |
80 images/sec: 444.0 +/- 0.1 (jitter = 0.5) 7.830 | |
90 images/sec: 443.9 +/- 0.1 (jitter = 0.5) 7.865 | |
100 images/sec: 443.8 +/- 0.1 (jitter = 0.5) 7.846 | |
---------------------------------------------------------------- | |
total images/sec: 443.81 | |
---------------------------------------------------------------- | |
root@ubuntu:/root/benchmarks/scripts/tf_cnn_benchmarks# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment