fo40225 · May 1, 2020 06:57
diff --git a/RadeonVII-tensorflow.txt b/RadeonVII-tensorflow.txt
 sudo apt update
 sudo apt install libnuma-dev
 wget -q -O - http://repo.radeon.com/rocm/apt/debian/rocm.gpg.key | sudo apt-key add -
 echo 'deb [arch=amd64] http://repo.radeon.com/rocm/apt/debian/ xenial main' | sudo tee /etc/apt/sources.list.d/rocm.list
 sudo apt update
 sudo apt install rocm-dkms
 sudo usermod -a -G video $LOGNAME
 sudo reboot

 export PATH=/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64:$PATH

 rocminfo
 clinfo
 rocm-smi

 # install docker
 curl -sSL https://get.docker.com/ | sh

 alias drun='sudo docker run -it --network=host --device=/dev/kfd --device=/dev/dri --ipc=host --shm-size 16G --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -v $HOME/dockerx:/dockerx'

 drun rocm/tensorflow:rocm3.1-tf2.1-python3

 root@ubuntu:/root# cd benchmarks/scripts/tf_cnn_benchmarks/
 root@ubuntu:/root/benchmarks/scripts/tf_cnn_benchmarks# python3 tf_cnn_benchmarks.py \
 >  --data_format=NHWC --batch_size=64 --num_batches=100 \
 >  --model=resnet50 --optimizer=sgd --variable_update=replicated \
 >  --use_fp16=False --distortions=False --local_parameter_device=gpu \
 >  --num_gpus=1 --display_every=10
 WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/compat/v2_compat.py:88: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.
 Instructions for updating:
 non-resource variables are not supported in the long term
 2020-04-24 14:56:17.566798: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA
 2020-04-24 14:56:17.570801: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3593430000 Hz
 2020-04-24 14:56:17.570880: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x434cce0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
 2020-04-24 14:56:17.570893: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
 2020-04-24 14:56:17.571748: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libhip_hcc.so
 2020-04-24 14:56:17.598090: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1573] Found device 0 with properties:
 pciBusID: 0000:0a:00.0 name: Vega 20     ROCm AMD GPU ISA: gfx906
 coreClock: 1.801GHz coreCount: 60 deviceMemorySize: 15.98GiB deviceMemoryBandwidth: -1B/s
 2020-04-24 14:56:17.623219: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so
 2020-04-24 14:56:17.624137: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so
 2020-04-24 14:56:17.624872: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocfft.so
 2020-04-24 14:56:17.625008: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocrand.so
 2020-04-24 14:56:17.625081: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0
 2020-04-24 14:56:17.625151: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix:
 2020-04-24 14:56:17.625160: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102]      0
 2020-04-24 14:56:17.625165: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0:   N
 2020-04-24 14:56:17.625290: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 15306 MB memory) -> physical GPU (device: 0, name: Vega 20, pci bus id: 0000:0a:00.0)
 TensorFlow:  2.1
 Model:       resnet50
 Dataset:     imagenet (synthetic)
 Mode:        training
 SingleSess:  False
 Batch size:  64 global
             64 per device
 Num batches: 100
 Num epochs:  0.00
 Devices:     ['/gpu:0']
 NUMA bind:   False
 Data format: NHWC
 Optimizer:   sgd
 Variables:   replicated
 AllReduce:   None
 ==========
 Generating training model
 WARNING:tensorflow:From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use `tf.keras.layers.Conv2D` instead.
 W0424 14:56:17.650676 139734056560448 deprecation.py:323] From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use `tf.keras.layers.Conv2D` instead.
 WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.
 Instructions for updating:
 Please use `layer.__call__` method instead.
 W0424 14:56:17.652061 139734056560448 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.
 Instructions for updating:
 Please use `layer.__call__` method instead.
 WARNING:tensorflow:From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use keras.layers.MaxPooling2D instead.
 W0424 14:56:17.666535 139734056560448 deprecation.py:323] From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use keras.layers.MaxPooling2D instead.
 Initializing graph
 WARNING:tensorflow:From /root/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2267: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
 Instructions for updating:
 Please switch to tf.train.MonitoredTrainingSession
 W0424 14:56:18.737229 139734056560448 deprecation.py:323] From /root/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2267: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
 Instructions for updating:
 Please switch to tf.train.MonitoredTrainingSession
 2020-04-24 14:56:18.924195: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1573] Found device 0 with properties:
 pciBusID: 0000:0a:00.0 name: Vega 20     ROCm AMD GPU ISA: gfx906
 coreClock: 1.801GHz coreCount: 60 deviceMemorySize: 15.98GiB deviceMemoryBandwidth: -1B/s
 2020-04-24 14:56:18.924246: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so
 2020-04-24 14:56:18.924254: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so
 2020-04-24 14:56:18.924260: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocfft.so
 2020-04-24 14:56:18.924266: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocrand.so
 2020-04-24 14:56:18.924307: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0
 2020-04-24 14:56:18.924317: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix:
 2020-04-24 14:56:18.924320: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102]      0
 2020-04-24 14:56:18.924322: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0:   N
 2020-04-24 14:56:18.924367: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 15306 MB memory) -> physical GPU (device: 0, name: Vega 20, pci bus id: 0000:0a:00.0)
 INFO:tensorflow:Running local_init_op.
 I0424 14:56:23.854503 139734056560448 session_manager.py:504] Running local_init_op.
 INFO:tensorflow:Done running local_init_op.
 I0424 14:56:23.888032 139734056560448 session_manager.py:507] Done running local_init_op.
 Running warm up
 2020-04-24 14:56:24.726787: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so
 2020-04-24 14:56:24.732073: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so
 MIOpen(HIP): Warning [GetValues] Perf db record is obsolete or corrupt: 2,32,2,32,1,1. Performance may degrade.
 MIOpen(HIP): Warning [GetValues] Perf db record is obsolete or corrupt: 1,16,2,4,4,1,8,4. Performance may degrade.
 Done warm up
 Step    Img/sec total_loss
 1       images/sec: 283.5 +/- 0.0 (jitter = 0.0)        7.695
 10      images/sec: 283.4 +/- 0.2 (jitter = 0.4)        8.122
 20      images/sec: 283.3 +/- 0.1 (jitter = 0.4)        8.039
 30      images/sec: 283.3 +/- 0.1 (jitter = 0.4)        7.974
 40      images/sec: 283.3 +/- 0.1 (jitter = 0.4)        7.808
 50      images/sec: 283.2 +/- 0.1 (jitter = 0.4)        7.640
 60      images/sec: 283.2 +/- 0.1 (jitter = 0.4)        7.610
 70      images/sec: 283.2 +/- 0.1 (jitter = 0.4)        8.212
 80      images/sec: 283.1 +/- 0.1 (jitter = 0.4)        7.701
 90      images/sec: 283.1 +/- 0.1 (jitter = 0.4)        7.823
 100     images/sec: 283.0 +/- 0.1 (jitter = 0.4)        7.737
 ----------------------------------------------------------------
 total images/sec: 282.96
 ----------------------------------------------------------------
 root@ubuntu:/root/benchmarks/scripts/tf_cnn_benchmarks# python3 tf_cnn_benchmarks.py \
 >  --data_format=NHWC --batch_size=128 --num_batches=100 \
 >  --model=resnet50 --optimizer=sgd --variable_update=replicated \
 >  --use_fp16=False --distortions=False --local_parameter_device=gpu \
 >  --num_gpus=1 --display_every=10
 WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/compat/v2_compat.py:88: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.
 Instructions for updating:
 non-resource variables are not supported in the long term
 2020-04-24 15:05:14.210578: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA
 2020-04-24 15:05:14.214322: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3593430000 Hz
 2020-04-24 15:05:14.214402: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x3754c40 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
 2020-04-24 15:05:14.214412: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
 2020-04-24 15:05:14.215307: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libhip_hcc.so
 2020-04-24 15:05:14.241858: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1573] Found device 0 with properties:
 pciBusID: 0000:0a:00.0 name: Vega 20     ROCm AMD GPU ISA: gfx906
 coreClock: 1.801GHz coreCount: 60 deviceMemorySize: 15.98GiB deviceMemoryBandwidth: -1B/s
 2020-04-24 15:05:14.267235: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so
 2020-04-24 15:05:14.268097: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so
 2020-04-24 15:05:14.268802: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocfft.so
 2020-04-24 15:05:14.268928: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocrand.so
 2020-04-24 15:05:14.269003: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0
 2020-04-24 15:05:14.269070: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix:
 2020-04-24 15:05:14.269076: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102]      0
 2020-04-24 15:05:14.269079: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0:   N
 2020-04-24 15:05:14.269180: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 15306 MB memory) -> physical GPU (device: 0, name: Vega 20, pci bus id: 0000:0a:00.0)
 TensorFlow:  2.1
 Model:       resnet50
 Dataset:     imagenet (synthetic)
 Mode:        training
 SingleSess:  False
 Batch size:  128 global
             128 per device
 Num batches: 100
 Num epochs:  0.01
 Devices:     ['/gpu:0']
 NUMA bind:   False
 Data format: NHWC
 Optimizer:   sgd
 Variables:   replicated
 AllReduce:   None
 ==========
 Generating training model
 WARNING:tensorflow:From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use `tf.keras.layers.Conv2D` instead.
 W0424 15:05:14.293022 140533809186624 deprecation.py:323] From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use `tf.keras.layers.Conv2D` instead.
 WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.
 Instructions for updating:
 Please use `layer.__call__` method instead.
 W0424 15:05:14.293999 140533809186624 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.
 Instructions for updating:
 Please use `layer.__call__` method instead.
 WARNING:tensorflow:From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use keras.layers.MaxPooling2D instead.
 W0424 15:05:14.308464 140533809186624 deprecation.py:323] From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use keras.layers.MaxPooling2D instead.
 Initializing graph
 WARNING:tensorflow:From /root/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2267: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
 Instructions for updating:
 Please switch to tf.train.MonitoredTrainingSession
 W0424 15:05:15.393247 140533809186624 deprecation.py:323] From /root/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2267: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
 Instructions for updating:
 Please switch to tf.train.MonitoredTrainingSession
 2020-04-24 15:05:15.582519: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1573] Found device 0 with properties:
 pciBusID: 0000:0a:00.0 name: Vega 20     ROCm AMD GPU ISA: gfx906
 coreClock: 1.801GHz coreCount: 60 deviceMemorySize: 15.98GiB deviceMemoryBandwidth: -1B/s
 2020-04-24 15:05:15.582579: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so
 2020-04-24 15:05:15.582590: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so
 2020-04-24 15:05:15.582598: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocfft.so
 2020-04-24 15:05:15.582607: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocrand.so
 2020-04-24 15:05:15.582652: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0
 2020-04-24 15:05:15.582664: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix:
 2020-04-24 15:05:15.582668: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102]      0
 2020-04-24 15:05:15.582672: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0:   N
 2020-04-24 15:05:15.582721: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 15306 MB memory) -> physical GPU (device: 0, name: Vega 20, pci bus id: 0000:0a:00.0)
 INFO:tensorflow:Running local_init_op.
 I0424 15:05:20.376449 140533809186624 session_manager.py:504] Running local_init_op.
 INFO:tensorflow:Done running local_init_op.
 I0424 15:05:20.408860 140533809186624 session_manager.py:507] Done running local_init_op.
 Running warm up
 2020-04-24 15:05:21.253770: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so
 2020-04-24 15:05:21.264017: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so
 MIOpen(HIP): Warning [GetValues] Perf db record is obsolete or corrupt: 1,32,1,16,1,1. Performance may degrade.
 MIOpen(HIP): Warning [GetValues] Perf db record is obsolete or corrupt: 1,16,2,4,4,1,8,4. Performance may degrade.
 Done warm up
 Step    Img/sec total_loss
 1       images/sec: 293.3 +/- 0.0 (jitter = 0.0)        7.973
 10      images/sec: 293.2 +/- 0.1 (jitter = 0.2)        7.882
 20      images/sec: 293.0 +/- 0.1 (jitter = 0.3)        7.909
 30      images/sec: 292.9 +/- 0.1 (jitter = 0.3)        7.804
 40      images/sec: 292.9 +/- 0.1 (jitter = 0.3)        7.991
 50      images/sec: 292.8 +/- 0.1 (jitter = 0.3)        7.870
 60      images/sec: 292.7 +/- 0.1 (jitter = 0.3)        7.937
 70      images/sec: 292.7 +/- 0.0 (jitter = 0.3)        7.759
 80      images/sec: 292.7 +/- 0.0 (jitter = 0.4)        7.825
 90      images/sec: 292.7 +/- 0.0 (jitter = 0.4)        7.951
 100     images/sec: 292.6 +/- 0.0 (jitter = 0.4)        7.785
 ----------------------------------------------------------------
 total images/sec: 292.60
 ----------------------------------------------------------------
 root@ubuntu:/root/benchmarks/scripts/tf_cnn_benchmarks# python3 tf_cnn_benchmarks.py \
 >  --data_format=NHWC --batch_size=128 --num_batches=100 \
 >  --model=resnet50 --optimizer=sgd --variable_update=replicated \
 >  --use_fp16=True --distortions=False --local_parameter_device=gpu \
 >  --num_gpus=1 --display_every=10
 WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/compat/v2_compat.py:88: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.
 Instructions for updating:
 non-resource variables are not supported in the long term
 2020-04-24 15:12:40.404005: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA
 2020-04-24 15:12:40.407715: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3593430000 Hz
 2020-04-24 15:12:40.407788: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4339fd0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
 2020-04-24 15:12:40.407798: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
 2020-04-24 15:12:40.408653: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libhip_hcc.so
 2020-04-24 15:12:40.435867: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1573] Found device 0 with properties:
 pciBusID: 0000:0a:00.0 name: Vega 20     ROCm AMD GPU ISA: gfx906
 coreClock: 1.801GHz coreCount: 60 deviceMemorySize: 15.98GiB deviceMemoryBandwidth: -1B/s
 2020-04-24 15:12:40.461057: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so
 2020-04-24 15:12:40.461940: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so
 2020-04-24 15:12:40.462691: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocfft.so
 2020-04-24 15:12:40.462831: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocrand.so
 2020-04-24 15:12:40.462918: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0
 2020-04-24 15:12:40.462987: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix:
 2020-04-24 15:12:40.462994: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102]      0
 2020-04-24 15:12:40.462999: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0:   N
 2020-04-24 15:12:40.463119: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 15306 MB memory) -> physical GPU (device: 0, name: Vega 20, pci bus id: 0000:0a:00.0)
 TensorFlow:  2.1
 Model:       resnet50
 Dataset:     imagenet (synthetic)
 Mode:        training
 SingleSess:  False
 Batch size:  128 global
             128 per device
 Num batches: 100
 Num epochs:  0.01
 Devices:     ['/gpu:0']
 NUMA bind:   False
 Data format: NHWC
 Optimizer:   sgd
 Variables:   replicated
 AllReduce:   None
 ==========
 Generating training model
 WARNING:tensorflow:From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use `tf.keras.layers.Conv2D` instead.
 W0424 15:12:40.486687 140033467434816 deprecation.py:323] From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use `tf.keras.layers.Conv2D` instead.
 WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.
 Instructions for updating:
 Please use `layer.__call__` method instead.
 W0424 15:12:40.487761 140033467434816 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.
 Instructions for updating:
 Please use `layer.__call__` method instead.
 WARNING:tensorflow:From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use keras.layers.MaxPooling2D instead.
 W0424 15:12:40.501842 140033467434816 deprecation.py:323] From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use keras.layers.MaxPooling2D instead.
 Initializing graph
 WARNING:tensorflow:From /root/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2267: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
 Instructions for updating:
 Please switch to tf.train.MonitoredTrainingSession
 W0424 15:12:41.723154 140033467434816 deprecation.py:323] From /root/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2267: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
 Instructions for updating:
 Please switch to tf.train.MonitoredTrainingSession
 2020-04-24 15:12:41.931076: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1573] Found device 0 with properties:
 pciBusID: 0000:0a:00.0 name: Vega 20     ROCm AMD GPU ISA: gfx906
 coreClock: 1.801GHz coreCount: 60 deviceMemorySize: 15.98GiB deviceMemoryBandwidth: -1B/s
 2020-04-24 15:12:41.931126: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so
 2020-04-24 15:12:41.931133: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so
 2020-04-24 15:12:41.931138: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocfft.so
 2020-04-24 15:12:41.931143: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocrand.so
 2020-04-24 15:12:41.931182: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0
 2020-04-24 15:12:41.931193: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix:
 2020-04-24 15:12:41.931196: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102]      0
 2020-04-24 15:12:41.931199: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0:   N
 2020-04-24 15:12:41.931243: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 15306 MB memory) -> physical GPU (device: 0, name: Vega 20, pci bus id: 0000:0a:00.0)
 INFO:tensorflow:Running local_init_op.
 I0424 15:12:46.866378 140033467434816 session_manager.py:504] Running local_init_op.
 INFO:tensorflow:Done running local_init_op.
 I0424 15:12:46.903136 140033467434816 session_manager.py:507] Done running local_init_op.
 Running warm up
 2020-04-24 15:12:47.946906: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so
 2020-04-24 15:12:47.964438: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so
 warning: <unknown>:0:0: loop not unrolled: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
 warning: <unknown>:0:0: loop not unrolled: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
 Done warm up
 Step    Img/sec total_loss
 1       images/sec: 424.6 +/- 0.0 (jitter = 0.0)        7.830
 10      images/sec: 424.2 +/- 0.3 (jitter = 0.9)        7.928
 20      images/sec: 424.0 +/- 0.2 (jitter = 1.0)        7.880
 30      images/sec: 423.9 +/- 0.1 (jitter = 0.8)        7.788
 40      images/sec: 423.7 +/- 0.1 (jitter = 0.8)        7.769
 50      images/sec: 423.7 +/- 0.1 (jitter = 0.8)        7.842
 60      images/sec: 423.6 +/- 0.1 (jitter = 0.7)        7.803
 70      images/sec: 423.4 +/- 0.1 (jitter = 0.9)        7.781
 80      images/sec: 423.4 +/- 0.1 (jitter = 0.9)        7.743
 90      images/sec: 423.3 +/- 0.1 (jitter = 0.9)        7.882
 100     images/sec: 423.2 +/- 0.1 (jitter = 1.0)        7.922
 ----------------------------------------------------------------
 total images/sec: 423.17
 ----------------------------------------------------------------
 root@ubuntu:/root/benchmarks/scripts/tf_cnn_benchmarks# python3 tf_cnn_benchmarks.py \
 >  --data_format=NHWC --batch_size=256 --num_batches=100 \
 >  --model=resnet50 --optimizer=sgd --variable_update=replicated \
 >  --use_fp16=True --distortions=False --local_parameter_device=gpu \
 >  --num_gpus=1 --display_every=10
 WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/compat/v2_compat.py:88: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.
 Instructions for updating:
 non-resource variables are not supported in the long term
 2020-04-24 15:18:27.043334: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA
 2020-04-24 15:18:27.047223: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3593430000 Hz
 2020-04-24 15:18:27.047298: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4bc35d0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
 2020-04-24 15:18:27.047310: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
 2020-04-24 15:18:27.048169: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libhip_hcc.so
 2020-04-24 15:18:27.075585: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1573] Found device 0 with properties:
 pciBusID: 0000:0a:00.0 name: Vega 20     ROCm AMD GPU ISA: gfx906
 coreClock: 1.801GHz coreCount: 60 deviceMemorySize: 15.98GiB deviceMemoryBandwidth: -1B/s
 2020-04-24 15:18:27.101432: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so
 2020-04-24 15:18:27.102312: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so
 2020-04-24 15:18:27.103067: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocfft.so
 2020-04-24 15:18:27.103203: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocrand.so
 2020-04-24 15:18:27.103287: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0
 2020-04-24 15:18:27.103364: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix:
 2020-04-24 15:18:27.103372: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102]      0
 2020-04-24 15:18:27.103376: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0:   N
 2020-04-24 15:18:27.103480: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 15306 MB memory) -> physical GPU (device: 0, name: Vega 20, pci bus id: 0000:0a:00.0)
 TensorFlow:  2.1
 Model:       resnet50
 Dataset:     imagenet (synthetic)
 Mode:        training
 SingleSess:  False
 Batch size:  256 global
             256 per device
 Num batches: 100
 Num epochs:  0.02
 Devices:     ['/gpu:0']
 NUMA bind:   False
 Data format: NHWC
 Optimizer:   sgd
 Variables:   replicated
 AllReduce:   None
 ==========
 Generating training model
 WARNING:tensorflow:From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use `tf.keras.layers.Conv2D` instead.
 W0424 15:18:27.126988 139747064190784 deprecation.py:323] From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use `tf.keras.layers.Conv2D` instead.
 WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.
 Instructions for updating:
 Please use `layer.__call__` method instead.
 W0424 15:18:27.128048 139747064190784 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.
 Instructions for updating:
 Please use `layer.__call__` method instead.
 WARNING:tensorflow:From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use keras.layers.MaxPooling2D instead.
 W0424 15:18:27.143269 139747064190784 deprecation.py:323] From /root/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use keras.layers.MaxPooling2D instead.
 Initializing graph
 WARNING:tensorflow:From /root/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2267: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
 Instructions for updating:
 Please switch to tf.train.MonitoredTrainingSession
 W0424 15:18:28.423727 139747064190784 deprecation.py:323] From /root/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2267: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
 Instructions for updating:
 Please switch to tf.train.MonitoredTrainingSession
 2020-04-24 15:18:28.641239: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1573] Found device 0 with properties:
 pciBusID: 0000:0a:00.0 name: Vega 20     ROCm AMD GPU ISA: gfx906
 coreClock: 1.801GHz coreCount: 60 deviceMemorySize: 15.98GiB deviceMemoryBandwidth: -1B/s
 2020-04-24 15:18:28.641290: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so
 2020-04-24 15:18:28.641298: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so
 2020-04-24 15:18:28.641304: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocfft.so
 2020-04-24 15:18:28.641311: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocrand.so
 2020-04-24 15:18:28.641354: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0
 2020-04-24 15:18:28.641364: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix:
 2020-04-24 15:18:28.641368: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102]      0
 2020-04-24 15:18:28.641370: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0:   N
 2020-04-24 15:18:28.641417: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 15306 MB memory) -> physical GPU (device: 0, name: Vega 20, pci bus id: 0000:0a:00.0)
 INFO:tensorflow:Running local_init_op.
 I0424 15:18:33.533480 139747064190784 session_manager.py:504] Running local_init_op.
 INFO:tensorflow:Done running local_init_op.
 I0424 15:18:33.571991 139747064190784 session_manager.py:507] Done running local_init_op.
 Running warm up
 2020-04-24 15:18:34.626119: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library librocblas.so
 2020-04-24 15:18:34.640218: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so
 Done warm up
 Step    Img/sec total_loss
 1       images/sec: 444.6 +/- 0.0 (jitter = 0.0)        7.930
 10      images/sec: 445.0 +/- 0.2 (jitter = 0.5)        7.887
 20      images/sec: 444.8 +/- 0.1 (jitter = 0.6)        7.833
 30      images/sec: 444.6 +/- 0.1 (jitter = 0.5)        7.849
 40      images/sec: 444.4 +/- 0.1 (jitter = 0.6)        7.932
 50      images/sec: 444.2 +/- 0.1 (jitter = 0.7)        7.951
 60      images/sec: 444.1 +/- 0.1 (jitter = 0.7)        7.858
 70      images/sec: 444.1 +/- 0.1 (jitter = 0.6)        7.816
 80      images/sec: 444.0 +/- 0.1 (jitter = 0.5)        7.830
 90      images/sec: 443.9 +/- 0.1 (jitter = 0.5)        7.865
 100     images/sec: 443.8 +/- 0.1 (jitter = 0.5)        7.846
 ----------------------------------------------------------------
 total images/sec: 443.81
 ----------------------------------------------------------------
 root@ubuntu:/root/benchmarks/scripts/tf_cnn_benchmarks#