fo40225 · October 11, 2019 12:01
diff --git a/twcc-1gpu.txt b/twcc-1gpu.txt
 lscpu
 Architecture:        x86_64
 CPU op-mode(s):      32-bit, 64-bit
 Byte Order:          Little Endian
 CPU(s):              36
 On-line CPU(s) list: 0-35
 Thread(s) per core:  1
 Core(s) per socket:  18
 Socket(s):           2
 NUMA node(s):        2
 Vendor ID:           GenuineIntel
 CPU family:          6
 Model:               85
 Model name:          Intel(R) Xeon(R) Gold 6154 CPU @ 3.00GHz
 Stepping:            4
 CPU MHz:             3000.000
 BogoMIPS:            6000.00
 Virtualization:      VT-x
 L1d cache:           32K
 L1i cache:           32K
 L2 cache:            1024K
 L3 cache:            25344K
 NUMA node0 CPU(s):   0-17
 NUMA node1 CPU(s):   18-35
 Flags:               fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr ssesse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb cat_l3 cdp_l3 intel_ppin intel_pt mba tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm mpx rdt_a avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 cqm_llc cqm_occup_llccqm_mbm_total cqm_mbm_local ibpb ibrs stibp dtherm ida arat pln pts hwp_epp pku ospke spec_ctrl intel_stibp

 free -h
              total        used        free      shared  buff/cache   available
 Mem:           754G         43G        673G        1.8G         37G        707G
 Swap:          4.0G        7.0M        4.0G

 nvidia-smi
 Fri Oct 11 18:54:11 2019
 +-----------------------------------------------------------------------------+
 | NVIDIA-SMI 418.67       Driver Version: 418.67       CUDA Version: 10.1     |
 |-------------------------------+----------------------+----------------------+
 | GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
 | Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
 |===============================+======================+======================|
 |   0  Tesla V100-SXM2...  On   | 00000000:DC:00.0 Off |                    0 |
 | N/A   32C    P0    43W / 300W |      0MiB / 32480MiB |      0%      Default |
 +-------------------------------+----------------------+----------------------+

 +-----------------------------------------------------------------------------+
 | Processes:                                                       GPU Memory |
 |  GPU       PID   Type   Process name                             Usage      |
 |=============================================================================|
 |  No running processes found                                                 |
 +-----------------------------------------------------------------------------+

 cd Flops/version3/binaries-linux/

 chmod +x 2006-Core2
 ./2006-Core2
 Running Core 2 tuned binary with 1 thread...

 Single-Precision - 128-bit SSE - Add/Sub
    GFlops = 29.344
    Result = 3.71957e+06

 Double-Precision - 128-bit SSE2 - Add/Sub
    GFlops = 14.656
    Result = 1.85451e+06

 Single-Precision - 128-bit SSE - Multiply
    GFlops = 29.328
    Result = 3.72638e+06

 Double-Precision - 128-bit SSE2 - Multiply
    GFlops = 14.712
    Result = 1.87148e+06

 Single-Precision - 128-bit SSE - Multiply + Add
    GFlops = 28.752
    Result = 3.01666e+06

 Double-Precision - 128-bit SSE2 - Multiply + Add
    GFlops = 14.28
    Result = 1.51542e+06


 Running Core 2 tuned binary with 36 thread(s)...

 Single-Precision - 128-bit SSE - Add/Sub
    GFlops = 89.632
    Result = 1.132e+07

 Double-Precision - 128-bit SSE2 - Add/Sub
    GFlops = 45.408
    Result = 5.75012e+06

 Single-Precision - 128-bit SSE - Multiply
    GFlops = 88.368
    Result = 1.11822e+07

 Double-Precision - 128-bit SSE2 - Multiply
    GFlops = 45.552
    Result = 5.78486e+06

 Single-Precision - 128-bit SSE - Multiply + Add
    GFlops = 94.032
    Result = 9.9791e+06

 Double-Precision - 128-bit SSE2 - Multiply + Add
    GFlops = 42.24
    Result = 4.45278e+06



 chmod +x 2013-Haswell
 ./2013-Haswell
 Running Haswell tuned binary with 1 thread...

 Single-Precision - 128-bit AVX - Add/Sub
    GFlops = 29.344
    Result = 3.69438e+06

 Double-Precision - 128-bit AVX - Add/Sub
    GFlops = 14.704
    Result = 1.8698e+06

 Single-Precision - 128-bit AVX - Multiply
    GFlops = 29.376
    Result = 3.74266e+06

 Double-Precision - 128-bit AVX - Multiply
    GFlops = 14.64
    Result = 1.8557e+06

 Single-Precision - 128-bit AVX - Multiply + Add
    GFlops = 29.376
    Result = 3.11566e+06

 Double-Precision - 128-bit AVX - Multiply + Add
    GFlops = 14.688
    Result = 1.55971e+06

 Single-Precision - 128-bit FMA3 - Fused Multiply Add
    GFlops = 58.848
    Result = 3.72792e+06

 Double-Precision - 128-bit FMA3 - Fused Multiply Add
    GFlops = 29.376
    Result = 1.86401e+06

 Single-Precision - 256-bit AVX - Add/Sub
    GFlops = 52.864
    Result = 6.71211e+06

 Double-Precision - 256-bit AVX - Add/Sub
    GFlops = 26.528
    Result = 3.33561e+06

 Single-Precision - 256-bit AVX - Multiply
    GFlops = 52.8
    Result = 6.70557e+06

 Double-Precision - 256-bit AVX - Multiply
    GFlops = 26.496
    Result = 3.35622e+06

 Single-Precision - 256-bit AVX - Multiply + Add
    GFlops = 52.608
    Result = 5.56602e+06

 Double-Precision - 256-bit AVX - Multiply + Add
    GFlops = 26.496
    Result = 2.80959e+06

 Single-Precision - 256-bit FMA3 - Fused Multiply Add
    GFlops = 106.176
    Result = 6.7291e+06

 Double-Precision - 256-bit FMA3 - Fused Multiply Add
    GFlops = 52.992
    Result = 3.35764e+06


 Running Haswell tuned binary with 36 thread(s)...

 Single-Precision - 128-bit AVX - Add/Sub
    GFlops = 92.096
    Result = 1.1681e+07

 Double-Precision - 128-bit AVX - Add/Sub
    GFlops = 45.904
    Result = 5.79656e+06

 Single-Precision - 128-bit AVX - Multiply
    GFlops = 85.92
    Result = 1.09319e+07

 Double-Precision - 128-bit AVX - Multiply
    GFlops = 47.976
    Result = 6.07363e+06

 Single-Precision - 128-bit AVX - Multiply + Add
    GFlops = 87.312
    Result = 9.20247e+06

 Double-Precision - 128-bit AVX - Multiply + Add
    GFlops = 47.544
    Result = 5.04776e+06

 Single-Precision - 128-bit FMA3 - Fused Multiply Add
    GFlops = 179.328
    Result = 1.13838e+07

 Double-Precision - 128-bit FMA3 - Fused Multiply Add
    GFlops = 93.456
    Result = 5.9269e+06

 Single-Precision - 256-bit AVX - Add/Sub
    GFlops = 163.2
    Result = 2.07379e+07

 Double-Precision - 256-bit AVX - Add/Sub
    GFlops = 80.224
    Result = 1.01561e+07

 Single-Precision - 256-bit AVX - Multiply
    GFlops = 174.336
    Result = 2.21584e+07

 Double-Precision - 256-bit AVX - Multiply
    GFlops = 80.832
    Result = 1.02321e+07

 Single-Precision - 256-bit AVX - Multiply + Add
    GFlops = 173.088
    Result = 1.8295e+07

 Double-Precision - 256-bit AVX - Multiply + Add
    GFlops = 80.928
    Result = 8.54397e+06

 Single-Precision - 256-bit FMA3 - Fused Multiply Add
    GFlops = 344.256
    Result = 2.18061e+07

 Double-Precision - 256-bit FMA3 - Fused Multiply Add
    GFlops = 163.104
    Result = 1.03515e+07



 chmod +x 2017-SkylakePurley
 ./2017-SkylakePurley
 Running Skylake Purley tuned binary with 1 thread...

 Single-Precision - 128-bit AVX - Add/Sub
    GFlops = 25.152
    Result = 3.17194e+06

 Double-Precision - 128-bit AVX - Add/Sub
    GFlops = 12.688
    Result = 1.61561e+06

 Single-Precision - 128-bit AVX - Multiply
    GFlops = 25.2
    Result = 3.19821e+06

 Double-Precision - 128-bit AVX - Multiply
    GFlops = 12.792
    Result = 1.62096e+06

 Single-Precision - 128-bit AVX - Multiply + Add
    GFlops = 24.864
    Result = 2.61679e+06

 Double-Precision - 128-bit AVX - Multiply + Add
    GFlops = 12.672
    Result = 1.34854e+06

 Single-Precision - 128-bit FMA3 - Fused Multiply Add
    GFlops = 50.112
    Result = 3.19578e+06

 Double-Precision - 128-bit FMA3 - Fused Multiply Add
    GFlops = 25.344
    Result = 1.6004e+06

 Single-Precision - 256-bit AVX - Add/Sub
    GFlops = 50.88
    Result = 6.47318e+06

 Double-Precision - 256-bit AVX - Add/Sub
    GFlops = 24.96
    Result = 3.14443e+06

 Single-Precision - 256-bit AVX - Multiply
    GFlops = 49.92
    Result = 6.33637e+06

 Double-Precision - 256-bit AVX - Multiply
    GFlops = 24.288
    Result = 3.07061e+06

 Single-Precision - 256-bit AVX - Multiply + Add
    GFlops = 49.152
    Result = 5.22759e+06

 Double-Precision - 256-bit AVX - Multiply + Add
    GFlops = 24.144
    Result = 2.55807e+06

 Single-Precision - 256-bit FMA3 - Fused Multiply Add
    GFlops = 98.88
    Result = 6.25693e+06

 Double-Precision - 256-bit FMA3 - Fused Multiply Add
    GFlops = 48.384
    Result = 3.07167e+06

 Single-Precision - 512-bit AVX512 - Add/Sub
    GFlops = 96.768
    Result = 1.23208e+07

 Double-Precision - 512-bit AVX512 - Add/Sub
    GFlops = 48.512
    Result = 6.19886e+06

 Single-Precision - 512-bit AVX512 - Multiply
    GFlops = 95.232
    Result = 1.20551e+07

 Double-Precision - 512-bit AVX512 - Multiply
    GFlops = 49.344
    Result = 6.2305e+06

 Single-Precision - 512-bit AVX512 - Multiply + Add
    GFlops = 96
    Result = 1.00897e+07

 Double-Precision - 512-bit AVX512 - Multiply + Add
    GFlops = 49.536
    Result = 5.23386e+06

 Single-Precision - 512-bit AVX512 - Fused Multiply Add
    GFlops = 194.304
    Result = 1.244e+07

 Double-Precision - 512-bit AVX512 - Fused Multiply Add
    GFlops = 99.072
    Result = 6.31979e+06


 Running Skylake Purley tuned binary with 36 thread(s)...

 Single-Precision - 128-bit AVX - Add/Sub
    GFlops = 71.296
    Result = 9.09718e+06

 Double-Precision - 128-bit AVX - Add/Sub
    GFlops = 37.088
    Result = 4.69809e+06

 Single-Precision - 128-bit AVX - Multiply
    GFlops = 69.888
    Result = 8.88799e+06

 Double-Precision - 128-bit AVX - Multiply
    GFlops = 39.408
    Result = 5.01496e+06

 Single-Precision - 128-bit AVX - Multiply + Add
    GFlops = 71.904
    Result = 7.62634e+06

 Double-Precision - 128-bit AVX - Multiply + Add
    GFlops = 38.736
    Result = 4.0948e+06

 Single-Precision - 128-bit FMA3 - Fused Multiply Add
    GFlops = 147.936
    Result = 9.4313e+06

 Double-Precision - 128-bit FMA3 - Fused Multiply Add
    GFlops = 74.16
    Result = 4.70099e+06

 Single-Precision - 256-bit AVX - Add/Sub
    GFlops = 152
    Result = 1.92423e+07

 Double-Precision - 256-bit AVX - Add/Sub
    GFlops = 72.032
    Result = 9.17055e+06

 Single-Precision - 256-bit AVX - Multiply
    GFlops = 154.752
    Result = 1.96629e+07

 Double-Precision - 256-bit AVX - Multiply
    GFlops = 72.048
    Result = 9.12874e+06

 Single-Precision - 256-bit AVX - Multiply + Add
    GFlops = 153.6
    Result = 1.62654e+07

 Double-Precision - 256-bit AVX - Multiply + Add
    GFlops = 72.048
    Result = 7.58305e+06

 Single-Precision - 256-bit FMA3 - Fused Multiply Add
    GFlops = 297.984
    Result = 1.89411e+07

 Double-Precision - 256-bit FMA3 - Fused Multiply Add
    GFlops = 141.984
    Result = 9.04786e+06

 Single-Precision - 512-bit AVX512 - Add/Sub
    GFlops = 288.512
    Result = 3.65999e+07

 Double-Precision - 512-bit AVX512 - Add/Sub
    GFlops = 151.808
    Result = 1.93254e+07

 Single-Precision - 512-bit AVX512 - Multiply
    GFlops = 289.152
    Result = 3.67569e+07

 Double-Precision - 512-bit AVX512 - Multiply
    GFlops = 155.136
    Result = 1.97997e+07

 Single-Precision - 512-bit AVX512 - Multiply + Add
    GFlops = 291.84
    Result = 3.08638e+07

 Double-Precision - 512-bit AVX512 - Multiply + Add
    GFlops = 153.024
    Result = 1.61504e+07

 Single-Precision - 512-bit AVX512 - Fused Multiply Add
    GFlops = 593.664
    Result = 3.7629e+07

 Double-Precision - 512-bit AVX512 - Fused Multiply Add
    GFlops = 304.128
    Result = 1.92865e+07



 cd ~
 cd ibench
 python -m ibench run -b all --size small --runs 3 --file result.json
 Cholesky:
 Cholesky:   N = 10000
 Cholesky:   elapsed 7.803895 gflops 42.713712
 Cholesky:   elapsed 8.711130 gflops 38.265221
 Cholesky:   elapsed 8.520189 gflops 39.122765
 Cholesky:   gflops 39.122765
 Det:
 Det:   N = 15000
 Det:   elapsed 43.932513 gflops 51.214917
 Det:   elapsed 43.093639 gflops 52.211882
 Det:   elapsed 44.605141 gflops 50.442616
 Det:   gflops 51.214917
 Dot:
 Dot:   N = 5000
 Dot:   elapsed 4.646020 gflops 53.809501
 Dot:   elapsed 4.200154 gflops 59.521626
 Dot:   elapsed 4.407191 gflops 56.725471
 Dot:   gflops 56.725471
 Fft:
 Fft:   N = 520000
 Fft:   elapsed 15.660179 gflops 3.152531
 Fft:   elapsed 15.475255 gflops 3.190202
 Fft:   elapsed 15.518962 gflops 3.181218
 Fft:   gflops 3.181218
 Inv:
 Inv:   N = 10000
 Inv:   elapsed 93.488099 gflops 21.393097Inv:   elapsed 91.996843 gflops 21.739876Inv:   elapsed 92.393857 gflops 21.646461
 Inv:   gflops 21.646461
 Lu:
 Lu:   N = 20000
 Lu:   elapsed 79.697402 gflops 66.919789
 Lu:   elapsed 77.417065 gflops 68.890926
 Lu:   elapsed 77.712383 gflops 68.629131
 Lu:   gflops 68.629131
 Qr:
 Qr:   N = 5000
 Qr:   elapsed 18.434642 gflops 9.040949
 Qr:   elapsed 20.498234 gflops 8.130782
 Qr:   elapsed 19.997191 gflops 8.334504
 Qr:   gflops 8.334504
 Svd:
 Svd:   N = 5000
 Svd:   elapsed 277.560287 gflops 0.600470
 Svd:   elapsed 259.796209 gflops 0.641528
 Svd:   elapsed 286.507553 gflops 0.581718
 Svd:   gflops 0.600470

 cd ~
 cd fio/
 ./configure
 make -j 4
 ./fio --loops=5 --size=1g --runtime=10 --stonewall --direct=1 --group_reporting \
      --name=SeqQ32T1read --bs=128k --iodepth=32 --rw=read \
      --name=SeqQ32T1write --bs=128k --iodepth=32 --rw=write \
      --name=4kQ8T8read --bs=4k --iodepth=8 --numjobs=8 --rw=randread \
      --name=4kQ8T8write --bs=4k --iodepth=8 --numjobs=8 --rw=randwrite \
      --name=4kQ32T1read --bs=4k --iodepth=32 --rw=randread \
      --name=4kQ32T1write --bs=4k --iodepth=32 --rw=randwrite \
      --name=4kQ1T1read --bs=4k --iodepth=1 --rw=randread \
      --name=4kQ1T1write --bs=4k --iodepth=1 --rw=randwrite
 SeqQ32T1read: (g=0): rw=read, bs=(R) 128KiB-128KiB, (W) 128KiB-128KiB, (T) 128KiB-128KiB, ioengine=psync, iodepth=32
 SeqQ32T1write: (g=1): rw=write, bs=(R) 128KiB-128KiB, (W) 128KiB-128KiB, (T) 128KiB-128KiB, ioengine=psync, iodepth=32
 4kQ8T8read: (g=2): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=psync, iodepth=8
 ...
 4kQ8T8write: (g=3): rw=randwrite, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=psync, iodepth=8
 ...
 4kQ32T1read: (g=4): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=psync, iodepth=32
 4kQ32T1write: (g=5): rw=randwrite, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=psync, iodepth=32
 4kQ1T1read: (g=6): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=psync, iodepth=1
 4kQ1T1write: (g=7): rw=randwrite, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=psync, iodepth=1
 fio-3.16
 Starting 22 processes
 SeqQ32T1read: Laying out IO file (1 file / 1024MiB)
 SeqQ32T1write: Laying out IO file (1 file / 1024MiB)
 4kQ8T8read: Laying out IO file (1 file / 1024MiB)
 4kQ8T8read: Laying out IO file (1 file / 1024MiB)
 4kQ8T8read: Laying out IO file (1 file / 1024MiB)
 4kQ8T8read: Laying out IO file (1 file / 1024MiB)
 4kQ8T8read: Laying out IO file (1 file / 1024MiB)
 4kQ8T8read: Laying out IO file (1 file / 1024MiB)
 4kQ8T8read: Laying out IO file (1 file / 1024MiB)
 4kQ8T8read: Laying out IO file (1 file / 1024MiB)
 4kQ8T8write: Laying out IO file (1 file / 1024MiB)
 4kQ8T8write: Laying out IO file (1 file / 1024MiB)
 4kQ8T8write: Laying out IO file (1 file / 1024MiB)
 4kQ8T8write: Laying out IO file (1 file / 1024MiB)
 4kQ8T8write: Laying out IO file (1 file / 1024MiB)
 4kQ8T8write: Laying out IO file (1 file / 1024MiB)
 4kQ8T8write: Laying out IO file (1 file / 1024MiB)
 4kQ8T8write: Laying out IO file (1 file / 1024MiB)
 4kQ32T1read: Laying out IO file (1 file / 1024MiB)
 4kQ32T1write: Laying out IO file (1 file / 1024MiB)
 4kQ1T1read: Laying out IO file (1 file / 1024MiB)
 4kQ1T1write: Laying out IO file (1 file / 1024MiB)
 Jobs: 1 (f=1): [_(21),w(1)][27.6%][w=13.2MiB/s][w=3372 IOPS][eta 03m:27s]
 SeqQ32T1read: (groupid=0, jobs=1): err= 0: pid=5000: Fri Oct 11 19:40:24 2019
  read: IOPS=5919, BW=740MiB/s (776MB/s)(5120MiB/6919msec)
    clat (usec): min=112, max=32979, avg=168.33, stdev=359.98
     lat (usec): min=112, max=32979, avg=168.40, stdev=359.99
    clat percentiles (usec):
     |  1.00th=[  124],  5.00th=[  130], 10.00th=[  133], 20.00th=[  139],
     | 30.00th=[  145], 40.00th=[  147], 50.00th=[  151], 60.00th=[  157],
     | 70.00th=[  161], 80.00th=[  169], 90.00th=[  184], 95.00th=[  204],
     | 99.00th=[  334], 99.50th=[  685], 99.90th=[ 2212], 99.95th=[ 3294],
     | 99.99th=[21627]
   bw (  KiB/s): min=428819, max=761335, per=78.25%, avg=592905.92, stdev=83166.16, samples=13
   iops        : min= 3350, max= 5947, avg=4631.54, stdev=649.66, samples=13
  lat (usec)   : 250=98.08%, 500=1.24%, 750=0.30%, 1000=0.14%
  lat (msec)   : 2=0.13%, 4=0.08%, 10=0.02%, 20=0.01%, 50=0.01%
  cpu          : usr=0.56%, sys=9.45%, ctx=40991, majf=0, minf=56
  IO depths    : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0%
     submit    : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
     complete  : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
     issued rwts: total=40960,0,0,0 short=0,0,0,0 dropped=0,0,0,0
     latency   : target=0, window=0, percentile=100.00%, depth=32
 SeqQ32T1write: (groupid=1, jobs=1): err= 0: pid=5001: Fri Oct 11 19:40:24 2019
  write: IOPS=807, BW=101MiB/s (106MB/s)(1010MiB/10001msec); 0 zone resets
    clat (usec): min=503, max=121205, avg=1232.19, stdev=6340.71
     lat (usec): min=506, max=121208, avg=1236.82, stdev=6340.70
    clat percentiles (usec):
     |  1.00th=[   537],  5.00th=[   553], 10.00th=[   562], 20.00th=[   578],
     | 30.00th=[   603], 40.00th=[   627], 50.00th=[   644], 60.00th=[   660],
     | 70.00th=[   676], 80.00th=[   709], 90.00th=[   750], 95.00th=[   807],
     | 99.00th=[  4293], 99.50th=[ 65274], 99.90th=[ 82314], 99.95th=[ 86508],
     | 99.99th=[121111]
   bw (  KiB/s): min=67162, max=108093, per=73.96%, avg=76461.47, stdev=9147.45, samples=19
   iops        : min=  524, max=  844, avg=596.89, stdev=71.40, samples=19
  lat (usec)   : 750=90.00%, 1000=7.38%
  lat (msec)   : 2=1.11%, 4=0.48%, 10=0.22%, 20=0.01%, 50=0.02%
  lat (msec)   : 100=0.76%, 250=0.01%
  cpu          : usr=0.59%, sys=2.30%, ctx=8345, majf=0, minf=23
  IO depths    : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0%
     submit    : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
     complete  : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
     issued rwts: total=0,8078,0,0 short=0,0,0,0 dropped=0,0,0,0
     latency   : target=0, window=0, percentile=100.00%, depth=32
 4kQ8T8read: (groupid=2, jobs=8): err= 0: pid=5002: Fri Oct 11 19:40:24 2019
  read: IOPS=72.6k, BW=284MiB/s (297MB/s)(2837MiB/10001msec)
    clat (usec): min=58, max=164850, avg=109.57, stdev=641.81
     lat (usec): min=58, max=164850, avg=109.63, stdev=641.82
    clat percentiles (usec):
     |  1.00th=[   67],  5.00th=[   70], 10.00th=[   72], 20.00th=[   74],
     | 30.00th=[   76], 40.00th=[   78], 50.00th=[   79], 60.00th=[   81],
     | 70.00th=[   83], 80.00th=[   86], 90.00th=[   92], 95.00th=[  102],
     | 99.00th=[  355], 99.50th=[ 1004], 99.90th=[ 6915], 99.95th=[16057],
     | 99.99th=[27919]
   bw (  KiB/s): min=123635, max=280439, per=71.46%, avg=207555.74, stdev=5690.35, samples=152
   iops        : min=30907, max=70106, avg=51885.95, stdev=1422.55, samples=152
  lat (usec)   : 100=94.61%, 250=4.04%, 500=0.58%, 750=0.18%, 1000=0.09%
  lat (msec)   : 2=0.25%, 4=0.11%, 10=0.07%, 20=0.04%, 50=0.03%
  lat (msec)   : 100=0.01%, 250=0.01%
  cpu          : usr=1.14%, sys=10.69%, ctx=730017, majf=0, minf=314
  IO depths    : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0%
     submit    : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
     complete  : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
     issued rwts: total=726222,0,0,0 short=0,0,0,0 dropped=0,0,0,0
     latency   : target=0, window=0, percentile=100.00%, depth=8
 4kQ8T8write: (groupid=3, jobs=8): err= 0: pid=5010: Fri Oct 11 19:40:24 2019
  write: IOPS=11.6k, BW=45.2MiB/s (47.4MB/s)(452MiB/10003msec); 0 zone resets
    clat (usec): min=218, max=162185, avg=681.80, stdev=4813.04
     lat (usec): min=218, max=162186, avg=681.93, stdev=4813.07
    clat percentiles (usec):
     |  1.00th=[   241],  5.00th=[   251], 10.00th=[   258], 20.00th=[   265],
     | 30.00th=[   273], 40.00th=[   281], 50.00th=[   293], 60.00th=[   302],
     | 70.00th=[   314], 80.00th=[   334], 90.00th=[   371], 95.00th=[   453],
     | 99.00th=[  4178], 99.50th=[  8979], 99.90th=[ 79168], 99.95th=[ 84411],
     | 99.99th=[101188]
   bw (  KiB/s): min=  307, max=69405, per=69.09%, avg=31950.32, stdev=3719.93, samples=152
   iops        : min=   73, max=17348, avg=7984.47, stdev=930.00, samples=152
  lat (usec)   : 250=4.93%, 500=90.76%, 750=1.08%, 1000=0.49%
  lat (msec)   : 2=1.06%, 4=0.66%, 10=0.55%, 20=0.03%, 50=0.01%
  lat (msec)   : 100=0.42%, 250=0.01%
  cpu          : usr=0.26%, sys=2.33%, ctx=118168, majf=0, minf=163
  IO depths    : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0%
     submit    : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
     complete  : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
     issued rwts: total=0,115640,0,0 short=0,0,0,0 dropped=0,0,0,0
     latency   : target=0, window=0, percentile=100.00%, depth=8
 4kQ32T1read: (groupid=4, jobs=1): err= 0: pid=5018: Fri Oct 11 19:40:24 2019
  read: IOPS=482, BW=1931KiB/s (1978kB/s)(18.9MiB/10004msec)
    clat (usec): min=66, max=64042, avg=2069.20, stdev=6258.29
     lat (usec): min=66, max=64042, avg=2069.38, stdev=6258.31
    clat percentiles (usec):
     |  1.00th=[   77],  5.00th=[   84], 10.00th=[   90], 20.00th=[   98],
     | 30.00th=[  108], 40.00th=[  116], 50.00th=[  124], 60.00th=[  135],
     | 70.00th=[  169], 80.00th=[  383], 90.00th=[ 3720], 95.00th=[19006],
     | 99.00th=[29492], 99.50th=[32113], 99.90th=[43779], 99.95th=[49546],
     | 99.99th=[64226]
   bw (  KiB/s): min=  586, max= 3413, per=93.59%, avg=1807.21, stdev=848.27, samples=19
   iops        : min=  146, max=  853, avg=451.47, stdev=212.08, samples=19
  lat (usec)   : 100=22.32%, 250=55.20%, 500=5.38%, 750=5.13%, 1000=0.39%
  lat (msec)   : 2=0.68%, 4=1.12%, 10=1.51%, 20=3.93%, 50=4.29%
  lat (msec)   : 100=0.04%
  cpu          : usr=0.20%, sys=1.31%, ctx=4843, majf=0, minf=21
  IO depths    : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0%
     submit    : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
     complete  : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
     issued rwts: total=4830,0,0,0 short=0,0,0,0 dropped=0,0,0,0
     latency   : target=0, window=0, percentile=100.00%, depth=32
 4kQ32T1write: (groupid=5, jobs=1): err= 0: pid=5019: Fri Oct 11 19:40:24 2019
  write: IOPS=1713, BW=6855KiB/s (7019kB/s)(66.9MiB/10001msec); 0 zone resets
    clat (usec): min=223, max=115235, avg=582.50, stdev=4655.18
     lat (usec): min=223, max=115237, avg=582.64, stdev=4655.21
    clat percentiles (usec):
     |  1.00th=[   237],  5.00th=[   245], 10.00th=[   251], 20.00th=[   258],
     | 30.00th=[   265], 40.00th=[   269], 50.00th=[   277], 60.00th=[   281],
     | 70.00th=[   289], 80.00th=[   306], 90.00th=[   338], 95.00th=[   379],
     | 99.00th=[   996], 99.50th=[  3326], 99.90th=[ 81265], 99.95th=[ 86508],
     | 99.99th=[102237]
   bw (  KiB/s): min=   43, max=12808, per=87.37%, avg=5988.05, stdev=6025.43, samples=19
   iops        : min=   10, max= 3202, avg=1496.63, stdev=1506.53, samples=19
  lat (usec)   : 250=9.62%, 500=88.41%, 750=0.57%, 1000=0.41%
  lat (msec)   : 2=0.34%, 4=0.20%, 10=0.08%, 50=0.01%, 100=0.36%
  lat (msec)   : 250=0.01%
  cpu          : usr=0.42%, sys=2.60%, ctx=17404, majf=0, minf=21
  IO depths    : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0%
     submit    : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
     complete  : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
     issued rwts: total=0,17138,0,0 short=0,0,0,0 dropped=0,0,0,0
     latency   : target=0, window=0, percentile=100.00%, depth=32
 4kQ1T1read: (groupid=6, jobs=1): err= 0: pid=5020: Fri Oct 11 19:40:24 2019
  read: IOPS=271, BW=1085KiB/s (1111kB/s)(10.6MiB/10033msec)
    clat (usec): min=70, max=67543, avg=3683.18, stdev=8132.88
     lat (usec): min=70, max=67543, avg=3683.46, stdev=8132.87
    clat percentiles (usec):
     |  1.00th=[   83],  5.00th=[   93], 10.00th=[  102], 20.00th=[  116],
     | 30.00th=[  124], 40.00th=[  133], 50.00th=[  147], 60.00th=[  188],
     | 70.00th=[  392], 80.00th=[ 1336], 90.00th=[18482], 95.00th=[23987],
     | 99.00th=[31065], 99.50th=[33817], 99.90th=[44303], 99.95th=[55313],
     | 99.99th=[67634]
   bw (  KiB/s): min=  355, max= 1275, per=67.96%, avg=737.37, stdev=227.64, samples=19
   iops        : min=   88, max=  318, avg=184.00, stdev=56.84, samples=19
  lat (usec)   : 100=8.78%, 250=58.89%, 500=5.14%, 750=6.47%, 1000=0.18%
  lat (msec)   : 2=1.10%, 4=1.65%, 10=2.31%, 20=6.72%, 50=8.67%
  lat (msec)   : 100=0.07%
  cpu          : usr=0.14%, sys=0.97%, ctx=2731, majf=0, minf=21
  IO depths    : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0%
     submit    : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
     complete  : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
     issued rwts: total=2722,0,0,0 short=0,0,0,0 dropped=0,0,0,0
     latency   : target=0, window=0, percentile=100.00%, depth=1
 4kQ1T1write: (groupid=7, jobs=1): err= 0: pid=5021: Fri Oct 11 19:40:24 2019
  write: IOPS=1750, BW=7003KiB/s (7171kB/s)(68.4MiB/10001msec); 0 zone resets
    clat (usec): min=223, max=128570, avg=570.20, stdev=4567.29
     lat (usec): min=224, max=128570, avg=570.33, stdev=4567.33
    clat percentiles (usec):
     |  1.00th=[   241],  5.00th=[   249], 10.00th=[   253], 20.00th=[   260],
     | 30.00th=[   265], 40.00th=[   273], 50.00th=[   277], 60.00th=[   285],
     | 70.00th=[   293], 80.00th=[   306], 90.00th=[   330], 95.00th=[   363],
     | 99.00th=[   799], 99.50th=[  3458], 99.90th=[ 80217], 99.95th=[ 85459],
     | 99.99th=[106431]
   bw (  KiB/s): min=   56, max=13980, per=95.05%, avg=6655.42, stdev=6677.64, samples=19
   iops        : min=   14, max= 3495, avg=1663.84, stdev=1669.40, samples=19
  lat (usec)   : 250=6.37%, 500=92.18%, 750=0.37%, 1000=0.26%
  lat (msec)   : 2=0.22%, 4=0.11%, 10=0.12%, 100=0.34%, 250=0.02%
  cpu          : usr=0.36%, sys=2.63%, ctx=17777, majf=0, minf=21
  IO depths    : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0%
     submit    : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
     complete  : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
     issued rwts: total=0,17509,0,0 short=0,0,0,0 dropped=0,0,0,0
     latency   : target=0, window=0, percentile=100.00%, depth=1

 Run status group 0 (all jobs):
   READ: bw=740MiB/s (776MB/s), 740MiB/s-740MiB/s (776MB/s-776MB/s), io=5120MiB (5369MB), run=6919-6919msec

 Run status group 1 (all jobs):
  WRITE: bw=101MiB/s (106MB/s), 101MiB/s-101MiB/s (106MB/s-106MB/s), io=1010MiB (1059MB), run=10001-10001msec

 Run status group 2 (all jobs):
   READ: bw=284MiB/s (297MB/s), 284MiB/s-284MiB/s (297MB/s-297MB/s), io=2837MiB (2975MB), run=10001-10001msec

 Run status group 3 (all jobs):
  WRITE: bw=45.2MiB/s (47.4MB/s), 45.2MiB/s-45.2MiB/s (47.4MB/s-47.4MB/s), io=452MiB (474MB), run=10003-10003msec

 Run status group 4 (all jobs):
   READ: bw=1931KiB/s (1978kB/s), 1931KiB/s-1931KiB/s (1978kB/s-1978kB/s), io=18.9MiB (19.8MB), run=10004-10004msec

 Run status group 5 (all jobs):
  WRITE: bw=6855KiB/s (7019kB/s), 6855KiB/s-6855KiB/s (7019kB/s-7019kB/s), io=66.9MiB (70.2MB), run=10001-10001msec

 Run status group 6 (all jobs):
   READ: bw=1085KiB/s (1111kB/s), 1085KiB/s-1085KiB/s (1111kB/s-1111kB/s), io=10.6MiB (11.1MB), run=10033-10033msec

 Run status group 7 (all jobs):
  WRITE: bw=7003KiB/s (7171kB/s), 7003KiB/s-7003KiB/s (7171kB/s-7171kB/s), io=68.4MiB (71.7MB), run=10001-10001msec

 cd ~
 cd benchmarks/scripts/tf_cnn_benchmarks/
 python tf_cnn_benchmarks.py --data_format=NHWC --batch_size=64 --num_batches=100 --model=resnet50 --optimizer=sgd --variable_update=replicated --use_fp16=False --distortions=False --local_parameter_device=gpu --num_gpus=1 --display_every=10
 python tf_cnn_benchmarks.py --data_format=NHWC --batch_size=64 --num_batches=100 --model=resnet50 --optimizer=sgd --variable_update=replicated --use_fp16=True --distortions=False --local_parameter_device=gpu --num_gpus=1 --display_every=10
 python tf_cnn_benchmarks.py --data_format=NHWC --batch_size=128 --num_batches=100 --model=resnet50 --optimizer=sgd --variable_update=replicated --use_fp16=False --distortions=False --local_parameter_device=gpu --num_gpus=1 --display_every=10
 python tf_cnn_benchmarks.py --data_format=NHWC --batch_size=128 --num_batches=100 --model=resnet50 --optimizer=sgd --variable_update=replicated --use_fp16=True --distortions=False --local_parameter_device=gpu --num_gpus=1 --display_every=10
 python tf_cnn_benchmarks.py --data_format=NHWC --batch_size=256 --num_batches=100 --model=resnet50 --optimizer=sgd --variable_update=replicated --use_fp16=False --distortions=False --local_parameter_device=gpu --num_gpus=1 --display_every=10
 python tf_cnn_benchmarks.py --data_format=NHWC --batch_size=256 --num_batches=100 --model=resnet50 --optimizer=sgd --variable_update=replicated --use_fp16=True --distortions=False --local_parameter_device=gpu --num_gpus=1 --display_every=10
 python tf_cnn_benchmarks.py --data_format=NHWC --batch_size=512 --num_batches=100 --model=resnet50 --optimizer=sgd --variable_update=replicated --use_fp16=True --distortions=False --local_parameter_device=gpu --num_gpus=1 --display_every=10
 2019-10-11 19:49:43.848636: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 WARNING: Logging before flag parsing goes to stderr.
 W1011 19:49:45.524016 140294974826304 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/mobilenet.py:388: The name tf.nn.avg_pool is deprecated. Please use tf.nn.avg_pool2d instead.

 W1011 19:49:45.527195 140294974826304 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/experimental/deepspeech.py:131: The name tf.nn.rnn_cell.RNNCell is deprecated. Please use tf.compat.v1.nn.rnn_cell.RNNCell instead.

 W1011 19:49:45.621507 140294974826304 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:722: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.

 W1011 19:49:45.621778 140294974826304 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:3503: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.

 2019-10-11 19:49:45.627571: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3000000000 Hz
 2019-10-11 19:49:45.631243: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x363a690 executing computations on platform Host. Devices:
 2019-10-11 19:49:45.631281: I tensorflow/compiler/xla/service/service.cc:175]   StreamExecutor device (0): <undefined>, <undefined>
 2019-10-11 19:49:45.633521: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcuda.so.1
 2019-10-11 19:49:45.850238: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x3637b30 executing computations on platform CUDA. Devices:
 2019-10-11 19:49:45.850309: I tensorflow/compiler/xla/service/service.cc:175]   StreamExecutor device (0): Tesla V100-SXM2-32GB, Compute Capability 7.0
 2019-10-11 19:49:45.854013: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1640] Found device 0 with properties: 
 name: Tesla V100-SXM2-32GB major: 7 minor: 0 memoryClockRate(GHz): 1.53
 pciBusID: 0000:dc:00.0
 2019-10-11 19:49:45.854069: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 2019-10-11 19:49:45.856851: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcublas.so.10
 2019-10-11 19:49:45.859094: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcufft.so.10
 2019-10-11 19:49:45.859749: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcurand.so.10
 2019-10-11 19:49:45.861964: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusolver.so.10
 2019-10-11 19:49:45.863060: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusparse.so.10
 2019-10-11 19:49:45.867195: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudnn.so.7
 2019-10-11 19:49:45.871267: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1763] Adding visible gpu devices: 0
 2019-10-11 19:49:45.871306: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 2019-10-11 19:49:46.352103: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1181] Device interconnect StreamExecutor with strength 1 edge matrix:
 2019-10-11 19:49:46.352155: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1187]      0 
 2019-10-11 19:49:46.352165: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1200] 0:   N 
 2019-10-11 19:49:46.356543: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1326] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30466 MB memory) -> physical GPU (device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:dc:00.0, compute capability: 7.0)
 W1011 19:49:46.368288 140294974826304 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2762: The name tf.set_random_seed is deprecated. Please use tf.compat.v1.set_random_seed instead.

 W1011 19:49:46.368674 140294974826304 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2782: The name tf.train.get_or_create_global_step is deprecated. Please use tf.compat.v1.train.get_or_create_global_step instead.

 W1011 19:49:46.372409 140294974826304 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/variable_mgr.py:316: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.

 W1011 19:49:46.372658 140294974826304 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/model.py:245: The name tf.truncated_normal is deprecated. Please use tf.random.truncated_normal instead.

 W1011 19:49:46.381343 140294974826304 deprecation.py:323] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:129: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use `tf.keras.layers.Conv2D` instead.
 W1011 19:49:46.564811 140294974826304 deprecation.py:323] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:261: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use keras.layers.MaxPooling2D instead.
 W1011 19:49:48.371326 140294974826304 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:334: The name tf.nn.xw_plus_b is deprecated. Please use tf.compat.v1.nn.xw_plus_b instead.

 W1011 19:49:48.372861 140294974826304 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/model.py:319: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.

 W1011 19:49:48.380411 140294974826304 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/losses/losses_impl.py:121: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use tf.where in 2.0, which has the same broadcast rule as np.where
 W1011 19:49:48.824538 140294974826304 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/resnet_model.py:333: The name tf.train.piecewise_constant is deprecated. Please use tf.compat.v1.train.piecewise_constant instead.

 W1011 19:49:48.879001 140294974826304 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:1209: The name tf.train.GradientDescentOptimizer is deprecated. Please use tf.compat.v1.train.GradientDescentOptimizer instead.

 W1011 19:49:48.974564 140294974826304 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2090: The name tf.train.get_global_step is deprecated. Please use tf.compat.v1.train.get_global_step instead.

 W1011 19:49:48.977684 140294974826304 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2126: The name tf.summary.merge_all is deprecated. Please use tf.compat.v1.summary.merge_all instead.

 W1011 19:49:48.977875 140294974826304 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2183: The name tf.train.Saver is deprecated. Please use tf.compat.v1.train.Saver instead.

 W1011 19:49:49.133491 140294974826304 deprecation.py:323] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2238: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
 Instructions for updating:
 Please switch to tf.train.MonitoredTrainingSession
 2019-10-11 19:49:49.437887: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1640] Found device 0 with properties: 
 name: Tesla V100-SXM2-32GB major: 7 minor: 0 memoryClockRate(GHz): 1.53
 pciBusID: 0000:dc:00.0
 2019-10-11 19:49:49.437952: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 2019-10-11 19:49:49.438015: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcublas.so.10
 2019-10-11 19:49:49.438042: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcufft.so.10
 2019-10-11 19:49:49.438063: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcurand.so.10
 2019-10-11 19:49:49.438086: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusolver.so.10
 2019-10-11 19:49:49.438109: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusparse.so.10
 2019-10-11 19:49:49.438131: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudnn.so.7
 2019-10-11 19:49:49.442111: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1763] Adding visible gpu devices: 0
 2019-10-11 19:49:49.442189: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1181] Device interconnect StreamExecutor with strength 1 edge matrix:
 2019-10-11 19:49:49.442198: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1187]      0 
 2019-10-11 19:49:49.442205: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1200] 0:   N 
 2019-10-11 19:49:49.446339: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1326] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30466 MB memory) -> physical GPU (device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:dc:00.0, compute capability: 7.0)
 2019-10-11 19:49:49.954342: W tensorflow/compiler/jit/mark_for_compilation_pass.cc:1412] (One-time warning): Not using XLA:CPU for cluster because envvar TF_XLA_FLAGS=--tf_xla_cpu_global_jit was not set.  If you want XLA:CPU, either set that envvar, or use experimental_jit_scope to enable XLA:CPU.  To confirm that XLA is active, pass --vmodule=xla_compilation_cache=1 (as a proper command-line flag, not via TF_XLA_FLAGS) or set the envvar XLA_FLAGS=--xla_hlo_profile.
 I1011 19:49:50.317705 140294974826304 session_manager.py:500] Running local_init_op.
 I1011 19:49:50.384543 140294974826304 session_manager.py:502] Done running local_init_op.
 2019-10-11 19:49:51.728059: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcublas.so.10
 2019-10-11 19:49:52.018030: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudnn.so.7
 TensorFlow:  1.14
 Model:       resnet50
 Dataset:     imagenet (synthetic)
 Mode:        training
 SingleSess:  False
 Batch size:  64 global
             64 per device
 Num batches: 100
 Num epochs:  0.00
 Devices:     ['/gpu:0']
 NUMA bind:   False
 Data format: NHWC
 Optimizer:   sgd
 Variables:   replicated
 AllReduce:   None
 ==========
 Generating training model
 Initializing graph
 Running warm up
 Done warm up
 Step	Img/sec	total_loss
 1	images/sec: 373.4 +/- 0.0 (jitter = 0.0)	7.695
 10	images/sec: 377.0 +/- 0.4 (jitter = 0.7)	8.122
 20	images/sec: 375.9 +/- 0.5 (jitter = 1.4)	8.039
 30	images/sec: 375.4 +/- 0.4 (jitter = 2.7)	7.974
 40	images/sec: 375.2 +/- 0.4 (jitter = 3.1)	7.808
 50	images/sec: 375.1 +/- 0.3 (jitter = 3.0)	7.640
 60	images/sec: 375.0 +/- 0.4 (jitter = 2.8)	7.611
 70	images/sec: 375.1 +/- 0.4 (jitter = 3.0)	8.210
 80	images/sec: 375.2 +/- 0.3 (jitter = 3.0)	7.701
 90	images/sec: 375.3 +/- 0.3 (jitter = 3.0)	7.815
 100	images/sec: 375.4 +/- 0.3 (jitter = 2.8)	7.731
 ----------------------------------------------------------------
 total images/sec: 375.13
 ----------------------------------------------------------------
 2019-10-11 19:50:15.933781: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 WARNING: Logging before flag parsing goes to stderr.
 W1011 19:50:17.500197 139892125337408 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/mobilenet.py:388: The name tf.nn.avg_pool is deprecated. Please use tf.nn.avg_pool2d instead.

 W1011 19:50:17.503320 139892125337408 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/experimental/deepspeech.py:131: The name tf.nn.rnn_cell.RNNCell is deprecated. Please use tf.compat.v1.nn.rnn_cell.RNNCell instead.

 W1011 19:50:17.593432 139892125337408 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:722: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.

 W1011 19:50:17.593695 139892125337408 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:3503: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.

 2019-10-11 19:50:17.600024: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3000000000 Hz
 2019-10-11 19:50:17.604063: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x3e5d3a0 executing computations on platform Host. Devices:
 2019-10-11 19:50:17.604106: I tensorflow/compiler/xla/service/service.cc:175]   StreamExecutor device (0): <undefined>, <undefined>
 2019-10-11 19:50:17.606471: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcuda.so.1
 2019-10-11 19:50:17.818224: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x3e5d700 executing computations on platform CUDA. Devices:
 2019-10-11 19:50:17.818291: I tensorflow/compiler/xla/service/service.cc:175]   StreamExecutor device (0): Tesla V100-SXM2-32GB, Compute Capability 7.0
 2019-10-11 19:50:17.821605: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1640] Found device 0 with properties: 
 name: Tesla V100-SXM2-32GB major: 7 minor: 0 memoryClockRate(GHz): 1.53
 pciBusID: 0000:dc:00.0
 2019-10-11 19:50:17.821668: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 2019-10-11 19:50:17.824729: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcublas.so.10
 2019-10-11 19:50:17.827794: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcufft.so.10
 2019-10-11 19:50:17.828082: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcurand.so.10
 2019-10-11 19:50:17.830275: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusolver.so.10
 2019-10-11 19:50:17.831347: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusparse.so.10
 2019-10-11 19:50:17.835488: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudnn.so.7
 2019-10-11 19:50:17.839513: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1763] Adding visible gpu devices: 0
 2019-10-11 19:50:17.839547: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 2019-10-11 19:50:18.324283: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1181] Device interconnect StreamExecutor with strength 1 edge matrix:
 2019-10-11 19:50:18.324341: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1187]      0 
 2019-10-11 19:50:18.324353: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1200] 0:   N 
 2019-10-11 19:50:18.328653: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1326] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30466 MB memory) -> physical GPU (device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:dc:00.0, compute capability: 7.0)
 W1011 19:50:18.334842 139892125337408 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2762: The name tf.set_random_seed is deprecated. Please use tf.compat.v1.set_random_seed instead.

 W1011 19:50:18.335142 139892125337408 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2782: The name tf.train.get_or_create_global_step is deprecated. Please use tf.compat.v1.train.get_or_create_global_step instead.

 W1011 19:50:18.339023 139892125337408 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2739: The name tf.get_variable is deprecated. Please use tf.compat.v1.get_variable instead.

 W1011 19:50:18.341532 139892125337408 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/variable_mgr.py:316: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.

 W1011 19:50:18.350395 139892125337408 deprecation.py:323] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:129: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use `tf.keras.layers.Conv2D` instead.
 W1011 19:50:18.529763 139892125337408 deprecation.py:323] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:261: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use keras.layers.MaxPooling2D instead.
 W1011 19:50:20.228176 139892125337408 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:334: The name tf.nn.xw_plus_b is deprecated. Please use tf.compat.v1.nn.xw_plus_b instead.

 W1011 19:50:20.230057 139892125337408 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/model.py:319: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.

 W1011 19:50:20.237237 139892125337408 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/losses/losses_impl.py:121: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use tf.where in 2.0, which has the same broadcast rule as np.where
 W1011 19:50:21.029615 139892125337408 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/resnet_model.py:333: The name tf.train.piecewise_constant is deprecated. Please use tf.compat.v1.train.piecewise_constant instead.

 W1011 19:50:21.082252 139892125337408 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:1209: The name tf.train.GradientDescentOptimizer is deprecated. Please use tf.compat.v1.train.GradientDescentOptimizer instead.

 W1011 19:50:21.177614 139892125337408 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2090: The name tf.train.get_global_step is deprecated. Please use tf.compat.v1.train.get_global_step instead.

 W1011 19:50:21.180676 139892125337408 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2126: The name tf.summary.merge_all is deprecated. Please use tf.compat.v1.summary.merge_all instead.

 W1011 19:50:21.180865 139892125337408 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2183: The name tf.train.Saver is deprecated. Please use tf.compat.v1.train.Saver instead.

 W1011 19:50:21.437868 139892125337408 deprecation.py:323] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2238: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
 Instructions for updating:
 Please switch to tf.train.MonitoredTrainingSession
 2019-10-11 19:50:21.763496: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1640] Found device 0 with properties: 
 name: Tesla V100-SXM2-32GB major: 7 minor: 0 memoryClockRate(GHz): 1.53
 pciBusID: 0000:dc:00.0
 2019-10-11 19:50:21.763571: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 2019-10-11 19:50:21.763621: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcublas.so.10
 2019-10-11 19:50:21.763644: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcufft.so.10
 2019-10-11 19:50:21.763658: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcurand.so.10
 2019-10-11 19:50:21.763672: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusolver.so.10
 2019-10-11 19:50:21.763686: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusparse.so.10
 2019-10-11 19:50:21.763701: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudnn.so.7
 2019-10-11 19:50:21.767615: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1763] Adding visible gpu devices: 0
 2019-10-11 19:50:21.767676: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1181] Device interconnect StreamExecutor with strength 1 edge matrix:
 2019-10-11 19:50:21.767686: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1187]      0 
 2019-10-11 19:50:21.767694: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1200] 0:   N 
 2019-10-11 19:50:21.771770: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1326] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30466 MB memory) -> physical GPU (device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:dc:00.0, compute capability: 7.0)
 2019-10-11 19:50:22.300271: W tensorflow/compiler/jit/mark_for_compilation_pass.cc:1412] (One-time warning): Not using XLA:CPU for cluster because envvar TF_XLA_FLAGS=--tf_xla_cpu_global_jit was not set.  If you want XLA:CPU, either set that envvar, or use experimental_jit_scope to enable XLA:CPU.  To confirm that XLA is active, pass --vmodule=xla_compilation_cache=1 (as a proper command-line flag, not via TF_XLA_FLAGS) or set the envvar XLA_FLAGS=--xla_hlo_profile.
 I1011 19:50:22.645198 139892125337408 session_manager.py:500] Running local_init_op.
 I1011 19:50:22.700330 139892125337408 session_manager.py:502] Done running local_init_op.
 2019-10-11 19:50:24.303836: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcublas.so.10
 2019-10-11 19:50:24.636131: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudnn.so.7
 TensorFlow:  1.14
 Model:       resnet50
 Dataset:     imagenet (synthetic)
 Mode:        training
 SingleSess:  False
 Batch size:  64 global
             64 per device
 Num batches: 100
 Num epochs:  0.00
 Devices:     ['/gpu:0']
 NUMA bind:   False
 Data format: NHWC
 Optimizer:   sgd
 Variables:   replicated
 AllReduce:   None
 ==========
 Generating training model
 Initializing graph
 Running warm up
 Done warm up
 Step	Img/sec	total_loss
 1	images/sec: 680.3 +/- 0.0 (jitter = 0.0)	7.912
 10	images/sec: 686.3 +/- 1.6 (jitter = 3.5)	7.809
 20	images/sec: 684.9 +/- 1.2 (jitter = 7.5)	7.955
 30	images/sec: 683.4 +/- 1.3 (jitter = 6.9)	7.767
 40	images/sec: 683.7 +/- 1.1 (jitter = 6.4)	7.957
 50	images/sec: 682.7 +/- 1.0 (jitter = 6.7)	7.890
 60	images/sec: 683.1 +/- 1.0 (jitter = 7.3)	8.004
 70	images/sec: 683.0 +/- 0.9 (jitter = 7.1)	7.811
 80	images/sec: 682.9 +/- 0.9 (jitter = 7.1)	7.828
 90	images/sec: 683.3 +/- 0.8 (jitter = 6.8)	7.846
 100	images/sec: 683.3 +/- 0.7 (jitter = 7.1)	7.838
 ----------------------------------------------------------------
 total images/sec: 682.52
 ----------------------------------------------------------------
 2019-10-11 19:50:40.135005: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 WARNING: Logging before flag parsing goes to stderr.
 W1011 19:50:41.811615 139865938323264 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/mobilenet.py:388: The name tf.nn.avg_pool is deprecated. Please use tf.nn.avg_pool2d instead.

 W1011 19:50:41.814767 139865938323264 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/experimental/deepspeech.py:131: The name tf.nn.rnn_cell.RNNCell is deprecated. Please use tf.compat.v1.nn.rnn_cell.RNNCell instead.

 W1011 19:50:41.912522 139865938323264 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:722: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.

 W1011 19:50:41.912767 139865938323264 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:3503: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.

 2019-10-11 19:50:41.918841: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3000000000 Hz
 2019-10-11 19:50:41.922506: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x360e320 executing computations on platform Host. Devices:
 2019-10-11 19:50:41.922542: I tensorflow/compiler/xla/service/service.cc:175]   StreamExecutor device (0): <undefined>, <undefined>
 2019-10-11 19:50:41.924905: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcuda.so.1
 2019-10-11 19:50:42.135179: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x360e830 executing computations on platform CUDA. Devices:
 2019-10-11 19:50:42.135266: I tensorflow/compiler/xla/service/service.cc:175]   StreamExecutor device (0): Tesla V100-SXM2-32GB, Compute Capability 7.0
 2019-10-11 19:50:42.139565: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1640] Found device 0 with properties: 
 name: Tesla V100-SXM2-32GB major: 7 minor: 0 memoryClockRate(GHz): 1.53
 pciBusID: 0000:dc:00.0
 2019-10-11 19:50:42.139632: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 2019-10-11 19:50:42.142439: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcublas.so.10
 2019-10-11 19:50:42.144786: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcufft.so.10
 2019-10-11 19:50:42.145476: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcurand.so.10
 2019-10-11 19:50:42.147768: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusolver.so.10
 2019-10-11 19:50:42.148909: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusparse.so.10
 2019-10-11 19:50:42.153302: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudnn.so.7
 2019-10-11 19:50:42.157366: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1763] Adding visible gpu devices: 0
 2019-10-11 19:50:42.157401: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 2019-10-11 19:50:42.653258: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1181] Device interconnect StreamExecutor with strength 1 edge matrix:
 2019-10-11 19:50:42.653321: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1187]      0 
 2019-10-11 19:50:42.653332: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1200] 0:   N 
 2019-10-11 19:50:42.657710: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1326] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30466 MB memory) -> physical GPU (device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:dc:00.0, compute capability: 7.0)
 W1011 19:50:42.664459 139865938323264 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2762: The name tf.set_random_seed is deprecated. Please use tf.compat.v1.set_random_seed instead.

 W1011 19:50:42.664810 139865938323264 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2782: The name tf.train.get_or_create_global_step is deprecated. Please use tf.compat.v1.train.get_or_create_global_step instead.

 W1011 19:50:42.668953 139865938323264 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/variable_mgr.py:316: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.

 W1011 19:50:42.669234 139865938323264 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/model.py:245: The name tf.truncated_normal is deprecated. Please use tf.random.truncated_normal instead.

 W1011 19:50:42.678226 139865938323264 deprecation.py:323] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:129: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use `tf.keras.layers.Conv2D` instead.
 W1011 19:50:42.860884 139865938323264 deprecation.py:323] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:261: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use keras.layers.MaxPooling2D instead.
 W1011 19:50:44.635977 139865938323264 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:334: The name tf.nn.xw_plus_b is deprecated. Please use tf.compat.v1.nn.xw_plus_b instead.

 W1011 19:50:44.637528 139865938323264 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/model.py:319: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.

 W1011 19:50:44.645317 139865938323264 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/losses/losses_impl.py:121: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use tf.where in 2.0, which has the same broadcast rule as np.where
 W1011 19:50:45.105153 139865938323264 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/resnet_model.py:333: The name tf.train.piecewise_constant is deprecated. Please use tf.compat.v1.train.piecewise_constant instead.

 W1011 19:50:45.159668 139865938323264 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:1209: The name tf.train.GradientDescentOptimizer is deprecated. Please use tf.compat.v1.train.GradientDescentOptimizer instead.

 W1011 19:50:45.256085 139865938323264 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2090: The name tf.train.get_global_step is deprecated. Please use tf.compat.v1.train.get_global_step instead.

 W1011 19:50:45.259293 139865938323264 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2126: The name tf.summary.merge_all is deprecated. Please use tf.compat.v1.summary.merge_all instead.

 W1011 19:50:45.259496 139865938323264 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2183: The name tf.train.Saver is deprecated. Please use tf.compat.v1.train.Saver instead.

 W1011 19:50:45.415032 139865938323264 deprecation.py:323] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2238: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
 Instructions for updating:
 Please switch to tf.train.MonitoredTrainingSession
 2019-10-11 19:50:45.721839: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1640] Found device 0 with properties: 
 name: Tesla V100-SXM2-32GB major: 7 minor: 0 memoryClockRate(GHz): 1.53
 pciBusID: 0000:dc:00.0
 2019-10-11 19:50:45.721914: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 2019-10-11 19:50:45.721965: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcublas.so.10
 2019-10-11 19:50:45.721990: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcufft.so.10
 2019-10-11 19:50:45.722007: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcurand.so.10
 2019-10-11 19:50:45.722031: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusolver.so.10
 2019-10-11 19:50:45.722052: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusparse.so.10
 2019-10-11 19:50:45.722074: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudnn.so.7
 2019-10-11 19:50:45.726030: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1763] Adding visible gpu devices: 0
 2019-10-11 19:50:45.726107: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1181] Device interconnect StreamExecutor with strength 1 edge matrix:
 2019-10-11 19:50:45.726117: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1187]      0 
 2019-10-11 19:50:45.726124: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1200] 0:   N 
 2019-10-11 19:50:45.730228: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1326] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30466 MB memory) -> physical GPU (device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:dc:00.0, compute capability: 7.0)
 2019-10-11 19:50:46.263908: W tensorflow/compiler/jit/mark_for_compilation_pass.cc:1412] (One-time warning): Not using XLA:CPU for cluster because envvar TF_XLA_FLAGS=--tf_xla_cpu_global_jit was not set.  If you want XLA:CPU, either set that envvar, or use experimental_jit_scope to enable XLA:CPU.  To confirm that XLA is active, pass --vmodule=xla_compilation_cache=1 (as a proper command-line flag, not via TF_XLA_FLAGS) or set the envvar XLA_FLAGS=--xla_hlo_profile.
 I1011 19:50:46.624619 139865938323264 session_manager.py:500] Running local_init_op.
 I1011 19:50:46.690861 139865938323264 session_manager.py:502] Done running local_init_op.
 2019-10-11 19:50:48.106532: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcublas.so.10
 2019-10-11 19:50:48.400962: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudnn.so.7
 TensorFlow:  1.14
 Model:       resnet50
 Dataset:     imagenet (synthetic)
 Mode:        training
 SingleSess:  False
 Batch size:  128 global
             128 per device
 Num batches: 100
 Num epochs:  0.01
 Devices:     ['/gpu:0']
 NUMA bind:   False
 Data format: NHWC
 Optimizer:   sgd
 Variables:   replicated
 AllReduce:   None
 ==========
 Generating training model
 Initializing graph
 Running warm up
 Done warm up
 Step	Img/sec	total_loss
 1	images/sec: 400.3 +/- 0.0 (jitter = 0.0)	7.973
 10	images/sec: 400.1 +/- 0.7 (jitter = 1.8)	7.882
 20	images/sec: 400.4 +/- 0.5 (jitter = 1.7)	7.909
 30	images/sec: 400.6 +/- 0.4 (jitter = 1.5)	7.805
 40	images/sec: 400.4 +/- 0.3 (jitter = 1.2)	7.989
 50	images/sec: 400.4 +/- 0.3 (jitter = 1.5)	7.874
 60	images/sec: 400.5 +/- 0.2 (jitter = 1.4)	7.937
 70	images/sec: 400.5 +/- 0.2 (jitter = 1.4)	7.757
 80	images/sec: 400.5 +/- 0.2 (jitter = 1.4)	7.829
 90	images/sec: 400.6 +/- 0.2 (jitter = 1.4)	7.938
 100	images/sec: 400.5 +/- 0.2 (jitter = 1.5)	7.789
 ----------------------------------------------------------------
 total images/sec: 400.34
 ----------------------------------------------------------------
 2019-10-11 19:51:30.859302: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 WARNING: Logging before flag parsing goes to stderr.
 W1011 19:51:32.543324 140495441659712 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/mobilenet.py:388: The name tf.nn.avg_pool is deprecated. Please use tf.nn.avg_pool2d instead.

 W1011 19:51:32.546574 140495441659712 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/experimental/deepspeech.py:131: The name tf.nn.rnn_cell.RNNCell is deprecated. Please use tf.compat.v1.nn.rnn_cell.RNNCell instead.

 W1011 19:51:32.650613 140495441659712 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:722: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.

 W1011 19:51:32.650888 140495441659712 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:3503: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.

 2019-10-11 19:51:32.656843: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3000000000 Hz
 2019-10-11 19:51:32.661078: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4501190 executing computations on platform Host. Devices:
 2019-10-11 19:51:32.661124: I tensorflow/compiler/xla/service/service.cc:175]   StreamExecutor device (0): <undefined>, <undefined>
 2019-10-11 19:51:32.663499: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcuda.so.1
 2019-10-11 19:51:32.869696: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x45014f0 executing computations on platform CUDA. Devices:
 2019-10-11 19:51:32.869786: I tensorflow/compiler/xla/service/service.cc:175]   StreamExecutor device (0): Tesla V100-SXM2-32GB, Compute Capability 7.0
 2019-10-11 19:51:32.874744: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1640] Found device 0 with properties: 
 name: Tesla V100-SXM2-32GB major: 7 minor: 0 memoryClockRate(GHz): 1.53
 pciBusID: 0000:dc:00.0
 2019-10-11 19:51:32.874822: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 2019-10-11 19:51:32.877324: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcublas.so.10
 2019-10-11 19:51:32.879658: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcufft.so.10
 2019-10-11 19:51:32.879980: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcurand.so.10
 2019-10-11 19:51:32.882249: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusolver.so.10
 2019-10-11 19:51:32.883361: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusparse.so.10
 2019-10-11 19:51:32.887621: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudnn.so.7
 2019-10-11 19:51:32.895440: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1763] Adding visible gpu devices: 0
 2019-10-11 19:51:32.895483: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 2019-10-11 19:51:33.353284: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1181] Device interconnect StreamExecutor with strength 1 edge matrix:
 2019-10-11 19:51:33.353336: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1187]      0 
 2019-10-11 19:51:33.353346: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1200] 0:   N 
 2019-10-11 19:51:33.361340: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1326] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30466 MB memory) -> physical GPU (device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:dc:00.0, compute capability: 7.0)
 W1011 19:51:33.367627 140495441659712 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2762: The name tf.set_random_seed is deprecated. Please use tf.compat.v1.set_random_seed instead.

 W1011 19:51:33.367976 140495441659712 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2782: The name tf.train.get_or_create_global_step is deprecated. Please use tf.compat.v1.train.get_or_create_global_step instead.

 W1011 19:51:33.371899 140495441659712 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2739: The name tf.get_variable is deprecated. Please use tf.compat.v1.get_variable instead.

 W1011 19:51:33.374491 140495441659712 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/variable_mgr.py:316: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.

 W1011 19:51:33.383539 140495441659712 deprecation.py:323] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:129: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use `tf.keras.layers.Conv2D` instead.
 W1011 19:51:33.566757 140495441659712 deprecation.py:323] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:261: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use keras.layers.MaxPooling2D instead.
 W1011 19:51:35.320673 140495441659712 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:334: The name tf.nn.xw_plus_b is deprecated. Please use tf.compat.v1.nn.xw_plus_b instead.

 W1011 19:51:35.322623 140495441659712 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/model.py:319: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.

 W1011 19:51:35.330125 140495441659712 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/losses/losses_impl.py:121: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use tf.where in 2.0, which has the same broadcast rule as np.where
 W1011 19:51:36.139456 140495441659712 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/resnet_model.py:333: The name tf.train.piecewise_constant is deprecated. Please use tf.compat.v1.train.piecewise_constant instead.

 W1011 19:51:36.193551 140495441659712 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:1209: The name tf.train.GradientDescentOptimizer is deprecated. Please use tf.compat.v1.train.GradientDescentOptimizer instead.

 W1011 19:51:36.290264 140495441659712 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2090: The name tf.train.get_global_step is deprecated. Please use tf.compat.v1.train.get_global_step instead.

 W1011 19:51:36.293388 140495441659712 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2126: The name tf.summary.merge_all is deprecated. Please use tf.compat.v1.summary.merge_all instead.

 W1011 19:51:36.293581 140495441659712 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2183: The name tf.train.Saver is deprecated. Please use tf.compat.v1.train.Saver instead.

 W1011 19:51:36.555901 140495441659712 deprecation.py:323] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2238: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
 Instructions for updating:
 Please switch to tf.train.MonitoredTrainingSession
 2019-10-11 19:51:36.889117: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1640] Found device 0 with properties: 
 name: Tesla V100-SXM2-32GB major: 7 minor: 0 memoryClockRate(GHz): 1.53
 pciBusID: 0000:dc:00.0
 2019-10-11 19:51:36.889188: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 2019-10-11 19:51:36.889234: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcublas.so.10
 2019-10-11 19:51:36.889253: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcufft.so.10
 2019-10-11 19:51:36.889270: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcurand.so.10
 2019-10-11 19:51:36.889284: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusolver.so.10
 2019-10-11 19:51:36.889298: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusparse.so.10
 2019-10-11 19:51:36.889313: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudnn.so.7
 2019-10-11 19:51:36.893213: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1763] Adding visible gpu devices: 0
 2019-10-11 19:51:36.893261: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1181] Device interconnect StreamExecutor with strength 1 edge matrix:
 2019-10-11 19:51:36.893271: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1187]      0 
 2019-10-11 19:51:36.893278: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1200] 0:   N 
 2019-10-11 19:51:36.897329: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1326] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30466 MB memory) -> physical GPU (device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:dc:00.0, compute capability: 7.0)
 2019-10-11 19:51:37.410710: W tensorflow/compiler/jit/mark_for_compilation_pass.cc:1412] (One-time warning): Not using XLA:CPU for cluster because envvar TF_XLA_FLAGS=--tf_xla_cpu_global_jit was not set.  If you want XLA:CPU, either set that envvar, or use experimental_jit_scope to enable XLA:CPU.  To confirm that XLA is active, pass --vmodule=xla_compilation_cache=1 (as a proper command-line flag, not via TF_XLA_FLAGS) or set the envvar XLA_FLAGS=--xla_hlo_profile.
 I1011 19:51:37.757435 140495441659712 session_manager.py:500] Running local_init_op.
 I1011 19:51:37.817575 140495441659712 session_manager.py:502] Done running local_init_op.
 2019-10-11 19:51:39.355631: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcublas.so.10
 2019-10-11 19:51:39.640898: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudnn.so.7
 TensorFlow:  1.14
 Model:       resnet50
 Dataset:     imagenet (synthetic)
 Mode:        training
 SingleSess:  False
 Batch size:  128 global
             128 per device
 Num batches: 100
 Num epochs:  0.01
 Devices:     ['/gpu:0']
 NUMA bind:   False
 Data format: NHWC
 Optimizer:   sgd
 Variables:   replicated
 AllReduce:   None
 ==========
 Generating training model
 Initializing graph
 Running warm up
 Done warm up
 Step	Img/sec	total_loss
 1	images/sec: 773.1 +/- 0.0 (jitter = 0.0)	7.837
 10	images/sec: 769.3 +/- 2.2 (jitter = 3.1)	7.924
 20	images/sec: 772.6 +/- 1.5 (jitter = 4.1)	7.878
 30	images/sec: 772.3 +/- 1.6 (jitter = 5.7)	7.794
 40	images/sec: 773.1 +/- 1.2 (jitter = 4.0)	7.772
 50	images/sec: 772.7 +/- 1.0 (jitter = 4.1)	7.846
 60	images/sec: 772.2 +/- 1.0 (jitter = 4.1)	7.795
 70	images/sec: 772.3 +/- 0.8 (jitter = 3.8)	7.782
 80	images/sec: 772.5 +/- 0.8 (jitter = 4.0)	7.749
 90	images/sec: 773.0 +/- 0.7 (jitter = 4.5)	7.889
 100	images/sec: 773.1 +/- 0.7 (jitter = 4.5)	7.926
 ----------------------------------------------------------------
 total images/sec: 772.52
 ----------------------------------------------------------------
 2019-10-11 19:52:04.654122: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 WARNING: Logging before flag parsing goes to stderr.
 W1011 19:52:06.333396 140718795937600 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/mobilenet.py:388: The name tf.nn.avg_pool is deprecated. Please use tf.nn.avg_pool2d instead.

 W1011 19:52:06.336587 140718795937600 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/experimental/deepspeech.py:131: The name tf.nn.rnn_cell.RNNCell is deprecated. Please use tf.compat.v1.nn.rnn_cell.RNNCell instead.

 W1011 19:52:06.429082 140718795937600 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:722: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.

 W1011 19:52:06.429342 140718795937600 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:3503: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.

 2019-10-11 19:52:06.435348: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3000000000 Hz
 2019-10-11 19:52:06.439783: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x3671ec0 executing computations on platform Host. Devices:
 2019-10-11 19:52:06.439824: I tensorflow/compiler/xla/service/service.cc:175]   StreamExecutor device (0): <undefined>, <undefined>
 2019-10-11 19:52:06.442079: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcuda.so.1
 2019-10-11 19:52:06.660194: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x367ba40 executing computations on platform CUDA. Devices:
 2019-10-11 19:52:06.660273: I tensorflow/compiler/xla/service/service.cc:175]   StreamExecutor device (0): Tesla V100-SXM2-32GB, Compute Capability 7.0
 2019-10-11 19:52:06.664340: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1640] Found device 0 with properties: 
 name: Tesla V100-SXM2-32GB major: 7 minor: 0 memoryClockRate(GHz): 1.53
 pciBusID: 0000:dc:00.0
 2019-10-11 19:52:06.664380: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 2019-10-11 19:52:06.666339: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcublas.so.10
 2019-10-11 19:52:06.668613: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcufft.so.10
 2019-10-11 19:52:06.669269: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcurand.so.10
 2019-10-11 19:52:06.671495: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusolver.so.10
 2019-10-11 19:52:06.672577: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusparse.so.10
 2019-10-11 19:52:06.676786: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudnn.so.7
 2019-10-11 19:52:06.680793: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1763] Adding visible gpu devices: 0
 2019-10-11 19:52:06.680826: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 2019-10-11 19:52:07.126664: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1181] Device interconnect StreamExecutor with strength 1 edge matrix:
 2019-10-11 19:52:07.126720: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1187]      0 
 2019-10-11 19:52:07.126732: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1200] 0:   N 
 2019-10-11 19:52:07.131026: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1326] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30466 MB memory) -> physical GPU (device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:dc:00.0, compute capability: 7.0)
 W1011 19:52:07.137109 140718795937600 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2762: The name tf.set_random_seed is deprecated. Please use tf.compat.v1.set_random_seed instead.

 W1011 19:52:07.137446 140718795937600 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2782: The name tf.train.get_or_create_global_step is deprecated. Please use tf.compat.v1.train.get_or_create_global_step instead.

 W1011 19:52:07.141323 140718795937600 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/variable_mgr.py:316: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.

 W1011 19:52:07.141582 140718795937600 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/model.py:245: The name tf.truncated_normal is deprecated. Please use tf.random.truncated_normal instead.

 W1011 19:52:07.150378 140718795937600 deprecation.py:323] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:129: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use `tf.keras.layers.Conv2D` instead.
 W1011 19:52:07.330860 140718795937600 deprecation.py:323] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:261: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use keras.layers.MaxPooling2D instead.
 W1011 19:52:09.096513 140718795937600 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:334: The name tf.nn.xw_plus_b is deprecated. Please use tf.compat.v1.nn.xw_plus_b instead.

 W1011 19:52:09.098062 140718795937600 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/model.py:319: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.

 W1011 19:52:09.105939 140718795937600 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/losses/losses_impl.py:121: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use tf.where in 2.0, which has the same broadcast rule as np.where
 W1011 19:52:09.564369 140718795937600 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/resnet_model.py:333: The name tf.train.piecewise_constant is deprecated. Please use tf.compat.v1.train.piecewise_constant instead.

 W1011 19:52:09.619965 140718795937600 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:1209: The name tf.train.GradientDescentOptimizer is deprecated. Please use tf.compat.v1.train.GradientDescentOptimizer instead.

 W1011 19:52:09.717300 140718795937600 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2090: The name tf.train.get_global_step is deprecated. Please use tf.compat.v1.train.get_global_step instead.

 W1011 19:52:09.720579 140718795937600 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2126: The name tf.summary.merge_all is deprecated. Please use tf.compat.v1.summary.merge_all instead.

 W1011 19:52:09.720799 140718795937600 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2183: The name tf.train.Saver is deprecated. Please use tf.compat.v1.train.Saver instead.

 W1011 19:52:09.878681 140718795937600 deprecation.py:323] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2238: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
 Instructions for updating:
 Please switch to tf.train.MonitoredTrainingSession
 2019-10-11 19:52:10.198021: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1640] Found device 0 with properties: 
 name: Tesla V100-SXM2-32GB major: 7 minor: 0 memoryClockRate(GHz): 1.53
 pciBusID: 0000:dc:00.0
 2019-10-11 19:52:10.198101: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 2019-10-11 19:52:10.198145: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcublas.so.10
 2019-10-11 19:52:10.198161: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcufft.so.10
 2019-10-11 19:52:10.198174: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcurand.so.10
 2019-10-11 19:52:10.198188: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusolver.so.10
 2019-10-11 19:52:10.198202: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusparse.so.10
 2019-10-11 19:52:10.198216: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudnn.so.7
 2019-10-11 19:52:10.202111: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1763] Adding visible gpu devices: 0
 2019-10-11 19:52:10.202164: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1181] Device interconnect StreamExecutor with strength 1 edge matrix:
 2019-10-11 19:52:10.202174: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1187]      0 
 2019-10-11 19:52:10.202181: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1200] 0:   N 
 2019-10-11 19:52:10.206229: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1326] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30466 MB memory) -> physical GPU (device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:dc:00.0, compute capability: 7.0)
 2019-10-11 19:52:10.735885: W tensorflow/compiler/jit/mark_for_compilation_pass.cc:1412] (One-time warning): Not using XLA:CPU for cluster because envvar TF_XLA_FLAGS=--tf_xla_cpu_global_jit was not set.  If you want XLA:CPU, either set that envvar, or use experimental_jit_scope to enable XLA:CPU.  To confirm that XLA is active, pass --vmodule=xla_compilation_cache=1 (as a proper command-line flag, not via TF_XLA_FLAGS) or set the envvar XLA_FLAGS=--xla_hlo_profile.
 I1011 19:52:11.090065 140718795937600 session_manager.py:500] Running local_init_op.
 I1011 19:52:11.142480 140718795937600 session_manager.py:502] Done running local_init_op.
 2019-10-11 19:52:12.508197: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcublas.so.10
 2019-10-11 19:52:12.824113: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudnn.so.7
 TensorFlow:  1.14
 Model:       resnet50
 Dataset:     imagenet (synthetic)
 Mode:        training
 SingleSess:  False
 Batch size:  256 global
             256 per device
 Num batches: 100
 Num epochs:  0.02
 Devices:     ['/gpu:0']
 NUMA bind:   False
 Data format: NHWC
 Optimizer:   sgd
 Variables:   replicated
 AllReduce:   None
 ==========
 Generating training model
 Initializing graph
 Running warm up
 Done warm up
 Step	Img/sec	total_loss
 1	images/sec: 414.3 +/- 0.0 (jitter = 0.0)	8.000
 10	images/sec: 413.6 +/- 0.4 (jitter = 1.2)	7.877
 20	images/sec: 413.5 +/- 0.2 (jitter = 0.8)	7.967
 30	images/sec: 413.6 +/- 0.2 (jitter = 0.8)	7.932
 40	images/sec: 413.6 +/- 0.1 (jitter = 0.8)	7.733
 50	images/sec: 413.7 +/- 0.1 (jitter = 1.0)	7.790
 60	images/sec: 413.7 +/- 0.1 (jitter = 0.9)	7.822
 70	images/sec: 413.8 +/- 0.1 (jitter = 1.0)	7.745
 80	images/sec: 413.9 +/- 0.1 (jitter = 1.1)	7.903
 90	images/sec: 413.9 +/- 0.1 (jitter = 1.0)	7.850
 100	images/sec: 413.9 +/- 0.1 (jitter = 1.0)	7.815
 ----------------------------------------------------------------
 total images/sec: 413.79
 ----------------------------------------------------------------
 2019-10-11 19:53:31.528217: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 WARNING: Logging before flag parsing goes to stderr.
 W1011 19:53:33.206959 140182372259648 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/mobilenet.py:388: The name tf.nn.avg_pool is deprecated. Please use tf.nn.avg_pool2d instead.

 W1011 19:53:33.210127 140182372259648 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/experimental/deepspeech.py:131: The name tf.nn.rnn_cell.RNNCell is deprecated. Please use tf.compat.v1.nn.rnn_cell.RNNCell instead.

 W1011 19:53:33.299404 140182372259648 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:722: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.

 W1011 19:53:33.299689 140182372259648 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:3503: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.

 2019-10-11 19:53:33.305922: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3000000000 Hz
 2019-10-11 19:53:33.309227: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4866400 executing computations on platform Host. Devices:
 2019-10-11 19:53:33.309255: I tensorflow/compiler/xla/service/service.cc:175]   StreamExecutor device (0): <undefined>, <undefined>
 2019-10-11 19:53:33.311588: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcuda.so.1
 2019-10-11 19:53:33.576603: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4866760 executing computations on platform CUDA. Devices:
 2019-10-11 19:53:33.576705: I tensorflow/compiler/xla/service/service.cc:175]   StreamExecutor device (0): Tesla V100-SXM2-32GB, Compute Capability 7.0
 2019-10-11 19:53:33.581594: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1640] Found device 0 with properties: 
 name: Tesla V100-SXM2-32GB major: 7 minor: 0 memoryClockRate(GHz): 1.53
 pciBusID: 0000:dc:00.0
 2019-10-11 19:53:33.581674: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 2019-10-11 19:53:33.583976: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcublas.so.10
 2019-10-11 19:53:33.586100: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcufft.so.10
 2019-10-11 19:53:33.586858: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcurand.so.10
 2019-10-11 19:53:33.589274: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusolver.so.10
 2019-10-11 19:53:33.590361: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusparse.so.10
 2019-10-11 19:53:33.594953: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudnn.so.7
 2019-10-11 19:53:33.599140: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1763] Adding visible gpu devices: 0
 2019-10-11 19:53:33.599175: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 2019-10-11 19:53:34.061743: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1181] Device interconnect StreamExecutor with strength 1 edge matrix:
 2019-10-11 19:53:34.061807: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1187]      0 
 2019-10-11 19:53:34.061819: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1200] 0:   N 
 2019-10-11 19:53:34.066129: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1326] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30466 MB memory) -> physical GPU (device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:dc:00.0, compute capability: 7.0)
 W1011 19:53:34.072288 140182372259648 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2762: The name tf.set_random_seed is deprecated. Please use tf.compat.v1.set_random_seed instead.

 W1011 19:53:34.072629 140182372259648 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2782: The name tf.train.get_or_create_global_step is deprecated. Please use tf.compat.v1.train.get_or_create_global_step instead.

 W1011 19:53:34.076697 140182372259648 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2739: The name tf.get_variable is deprecated. Please use tf.compat.v1.get_variable instead.

 W1011 19:53:34.079267 140182372259648 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/variable_mgr.py:316: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.

 W1011 19:53:34.088296 140182372259648 deprecation.py:323] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:129: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use `tf.keras.layers.Conv2D` instead.
 W1011 19:53:34.270347 140182372259648 deprecation.py:323] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:261: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use keras.layers.MaxPooling2D instead.
 W1011 19:53:36.013594 140182372259648 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:334: The name tf.nn.xw_plus_b is deprecated. Please use tf.compat.v1.nn.xw_plus_b instead.

 W1011 19:53:36.015531 140182372259648 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/model.py:319: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.

 W1011 19:53:36.022940 140182372259648 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/losses/losses_impl.py:121: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use tf.where in 2.0, which has the same broadcast rule as np.where
 W1011 19:53:36.827465 140182372259648 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/resnet_model.py:333: The name tf.train.piecewise_constant is deprecated. Please use tf.compat.v1.train.piecewise_constant instead.

 W1011 19:53:36.880496 140182372259648 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:1209: The name tf.train.GradientDescentOptimizer is deprecated. Please use tf.compat.v1.train.GradientDescentOptimizer instead.

 W1011 19:53:36.977261 140182372259648 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2090: The name tf.train.get_global_step is deprecated. Please use tf.compat.v1.train.get_global_step instead.

 W1011 19:53:36.980291 140182372259648 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2126: The name tf.summary.merge_all is deprecated. Please use tf.compat.v1.summary.merge_all instead.

 W1011 19:53:36.980471 140182372259648 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2183: The name tf.train.Saver is deprecated. Please use tf.compat.v1.train.Saver instead.

 W1011 19:53:37.235051 140182372259648 deprecation.py:323] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2238: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
 Instructions for updating:
 Please switch to tf.train.MonitoredTrainingSession
 2019-10-11 19:53:37.569010: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1640] Found device 0 with properties: 
 name: Tesla V100-SXM2-32GB major: 7 minor: 0 memoryClockRate(GHz): 1.53
 pciBusID: 0000:dc:00.0
 2019-10-11 19:53:37.569077: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 2019-10-11 19:53:37.569124: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcublas.so.10
 2019-10-11 19:53:37.569162: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcufft.so.10
 2019-10-11 19:53:37.569185: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcurand.so.10
 2019-10-11 19:53:37.569202: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusolver.so.10
 2019-10-11 19:53:37.569219: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusparse.so.10
 2019-10-11 19:53:37.569251: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudnn.so.7
 2019-10-11 19:53:37.573196: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1763] Adding visible gpu devices: 0
 2019-10-11 19:53:37.573265: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1181] Device interconnect StreamExecutor with strength 1 edge matrix:
 2019-10-11 19:53:37.573275: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1187]      0 
 2019-10-11 19:53:37.573282: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1200] 0:   N 
 2019-10-11 19:53:37.577375: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1326] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30466 MB memory) -> physical GPU (device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:dc:00.0, compute capability: 7.0)
 2019-10-11 19:53:38.110152: W tensorflow/compiler/jit/mark_for_compilation_pass.cc:1412] (One-time warning): Not using XLA:CPU for cluster because envvar TF_XLA_FLAGS=--tf_xla_cpu_global_jit was not set.  If you want XLA:CPU, either set that envvar, or use experimental_jit_scope to enable XLA:CPU.  To confirm that XLA is active, pass --vmodule=xla_compilation_cache=1 (as a proper command-line flag, not via TF_XLA_FLAGS) or set the envvar XLA_FLAGS=--xla_hlo_profile.
 I1011 19:53:38.465950 140182372259648 session_manager.py:500] Running local_init_op.
 I1011 19:53:38.516538 140182372259648 session_manager.py:502] Done running local_init_op.
 2019-10-11 19:53:40.065770: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcublas.so.10
 2019-10-11 19:53:40.376101: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudnn.so.7
 TensorFlow:  1.14
 Model:       resnet50
 Dataset:     imagenet (synthetic)
 Mode:        training
 SingleSess:  False
 Batch size:  256 global
             256 per device
 Num batches: 100
 Num epochs:  0.02
 Devices:     ['/gpu:0']
 NUMA bind:   False
 Data format: NHWC
 Optimizer:   sgd
 Variables:   replicated
 AllReduce:   None
 ==========
 Generating training model
 Initializing graph
 Running warm up
 Done warm up
 Step	Img/sec	total_loss
 1	images/sec: 843.5 +/- 0.0 (jitter = 0.0)	7.929
 10	images/sec: 842.0 +/- 1.0 (jitter = 2.2)	7.881
 20	images/sec: 840.1 +/- 0.8 (jitter = 3.8)	7.831
 30	images/sec: 840.6 +/- 0.6 (jitter = 3.9)	7.839
 40	images/sec: 841.3 +/- 0.5 (jitter = 4.4)	7.929
 50	images/sec: 840.8 +/- 0.5 (jitter = 4.4)	7.949
 60	images/sec: 841.0 +/- 0.4 (jitter = 4.0)	7.862
 70	images/sec: 840.8 +/- 0.4 (jitter = 3.8)	7.817
 80	images/sec: 840.9 +/- 0.4 (jitter = 4.0)	7.827
 90	images/sec: 841.0 +/- 0.3 (jitter = 3.7)	7.859
 100	images/sec: 841.1 +/- 0.3 (jitter = 3.5)	7.846
 ----------------------------------------------------------------
 total images/sec: 840.71
 ----------------------------------------------------------------
 2019-10-11 19:54:24.033340: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 WARNING: Logging before flag parsing goes to stderr.
 W1011 19:54:25.640588 140144639596352 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/mobilenet.py:388: The name tf.nn.avg_pool is deprecated. Please use tf.nn.avg_pool2d instead.

 W1011 19:54:25.643784 140144639596352 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/experimental/deepspeech.py:131: The name tf.nn.rnn_cell.RNNCell is deprecated. Please use tf.compat.v1.nn.rnn_cell.RNNCell instead.

 W1011 19:54:25.738508 140144639596352 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:722: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.

 W1011 19:54:25.738786 140144639596352 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:3503: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.

 2019-10-11 19:54:25.745103: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3000000000 Hz
 2019-10-11 19:54:25.748452: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x31d1e80 executing computations on platform Host. Devices:
 2019-10-11 19:54:25.748484: I tensorflow/compiler/xla/service/service.cc:175]   StreamExecutor device (0): <undefined>, <undefined>
 2019-10-11 19:54:25.750896: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcuda.so.1
 2019-10-11 19:54:26.039829: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x31dba00 executing computations on platform CUDA. Devices:
 2019-10-11 19:54:26.039902: I tensorflow/compiler/xla/service/service.cc:175]   StreamExecutor device (0): Tesla V100-SXM2-32GB, Compute Capability 7.0
 2019-10-11 19:54:26.044040: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1640] Found device 0 with properties: 
 name: Tesla V100-SXM2-32GB major: 7 minor: 0 memoryClockRate(GHz): 1.53
 pciBusID: 0000:dc:00.0
 2019-10-11 19:54:26.044112: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 2019-10-11 19:54:26.047811: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcublas.so.10
 2019-10-11 19:54:26.050114: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcufft.so.10
 2019-10-11 19:54:26.050810: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcurand.so.10
 2019-10-11 19:54:26.053132: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusolver.so.10
 2019-10-11 19:54:26.054246: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusparse.so.10
 2019-10-11 19:54:26.058622: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudnn.so.7
 2019-10-11 19:54:26.066869: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1763] Adding visible gpu devices: 0
 2019-10-11 19:54:26.066948: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 2019-10-11 19:54:26.537968: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1181] Device interconnect StreamExecutor with strength 1 edge matrix:
 2019-10-11 19:54:26.538028: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1187]      0 
 2019-10-11 19:54:26.538042: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1200] 0:   N 
 2019-10-11 19:54:26.542456: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1326] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30466 MB memory) -> physical GPU (device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:dc:00.0, compute capability: 7.0)
 W1011 19:54:26.548414 140144639596352 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2762: The name tf.set_random_seed is deprecated. Please use tf.compat.v1.set_random_seed instead.

 W1011 19:54:26.548786 140144639596352 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2782: The name tf.train.get_or_create_global_step is deprecated. Please use tf.compat.v1.train.get_or_create_global_step instead.

 W1011 19:54:26.553111 140144639596352 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2739: The name tf.get_variable is deprecated. Please use tf.compat.v1.get_variable instead.

 W1011 19:54:26.555955 140144639596352 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/variable_mgr.py:316: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.

 W1011 19:54:26.565997 140144639596352 deprecation.py:323] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:129: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use `tf.keras.layers.Conv2D` instead.
 W1011 19:54:26.754782 140144639596352 deprecation.py:323] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:261: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use keras.layers.MaxPooling2D instead.
 W1011 19:54:28.625625 140144639596352 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:334: The name tf.nn.xw_plus_b is deprecated. Please use tf.compat.v1.nn.xw_plus_b instead.

 W1011 19:54:28.627624 140144639596352 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/model.py:319: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.

 W1011 19:54:28.635330 140144639596352 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/losses/losses_impl.py:121: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
 Instructions for updating:
 Use tf.where in 2.0, which has the same broadcast rule as np.where
 W1011 19:54:29.455733 140144639596352 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/models/resnet_model.py:333: The name tf.train.piecewise_constant is deprecated. Please use tf.compat.v1.train.piecewise_constant instead.

 W1011 19:54:29.510030 140144639596352 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:1209: The name tf.train.GradientDescentOptimizer is deprecated. Please use tf.compat.v1.train.GradientDescentOptimizer instead.

 W1011 19:54:29.607766 140144639596352 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2090: The name tf.train.get_global_step is deprecated. Please use tf.compat.v1.train.get_global_step instead.

 W1011 19:54:29.611020 140144639596352 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2126: The name tf.summary.merge_all is deprecated. Please use tf.compat.v1.summary.merge_all instead.

 W1011 19:54:29.611258 140144639596352 deprecation_wrapper.py:119] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2183: The name tf.train.Saver is deprecated. Please use tf.compat.v1.train.Saver instead.

 W1011 19:54:29.873010 140144639596352 deprecation.py:323] From /home/r05945040/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2238: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
 Instructions for updating:
 Please switch to tf.train.MonitoredTrainingSession
 2019-10-11 19:54:30.208334: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1640] Found device 0 with properties: 
 name: Tesla V100-SXM2-32GB major: 7 minor: 0 memoryClockRate(GHz): 1.53
 pciBusID: 0000:dc:00.0
 2019-10-11 19:54:30.208402: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.1
 2019-10-11 19:54:30.208442: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcublas.so.10
 2019-10-11 19:54:30.208460: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcufft.so.10
 2019-10-11 19:54:30.208474: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcurand.so.10
 2019-10-11 19:54:30.208486: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusolver.so.10
 2019-10-11 19:54:30.208499: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusparse.so.10
 2019-10-11 19:54:30.208514: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudnn.so.7
 2019-10-11 19:54:30.212442: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1763] Adding visible gpu devices: 0
 2019-10-11 19:54:30.212497: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1181] Device interconnect StreamExecutor with strength 1 edge matrix:
 2019-10-11 19:54:30.212507: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1187]      0 
 2019-10-11 19:54:30.212514: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1200] 0:   N 
 2019-10-11 19:54:30.216575: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1326] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30466 MB memory) -> physical GPU (device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:dc:00.0, compute capability: 7.0)
 2019-10-11 19:54:30.756186: W tensorflow/compiler/jit/mark_for_compilation_pass.cc:1412] (One-time warning): Not using XLA:CPU for cluster because envvar TF_XLA_FLAGS=--tf_xla_cpu_global_jit was not set.  If you want XLA:CPU, either set that envvar, or use experimental_jit_scope to enable XLA:CPU.  To confirm that XLA is active, pass --vmodule=xla_compilation_cache=1 (as a proper command-line flag, not via TF_XLA_FLAGS) or set the envvar XLA_FLAGS=--xla_hlo_profile.
 I1011 19:54:31.108315 140144639596352 session_manager.py:500] Running local_init_op.
 I1011 19:54:31.170161 140144639596352 session_manager.py:502] Done running local_init_op.
 2019-10-11 19:54:32.842672: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcublas.so.10
 2019-10-11 19:54:33.156314: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudnn.so.7
 TensorFlow:  1.14
 Model:       resnet50
 Dataset:     imagenet (synthetic)
 Mode:        training
 SingleSess:  False
 Batch size:  512 global
             512 per device
 Num batches: 100
 Num epochs:  0.04
 Devices:     ['/gpu:0']
 NUMA bind:   False
 Data format: NHWC
 Optimizer:   sgd
 Variables:   replicated
 AllReduce:   None
 ==========
 Generating training model
 Initializing graph
 Running warm up
 Done warm up
 Step	Img/sec	total_loss
 1	images/sec: 855.2 +/- 0.0 (jitter = 0.0)	7.887
 10	images/sec: 856.2 +/- 0.3 (jitter = 0.4)	7.905
 20	images/sec: 856.0 +/- 0.4 (jitter = 0.5)	7.876
 30	images/sec: 855.9 +/- 0.3 (jitter = 0.6)	7.918
 40	images/sec: 856.1 +/- 0.3 (jitter = 0.7)	7.861
 50	images/sec: 856.1 +/- 0.2 (jitter = 0.9)	7.757
 60	images/sec: 856.0 +/- 0.2 (jitter = 1.0)	7.757
 70	images/sec: 856.1 +/- 0.2 (jitter = 0.9)	7.740
 80	images/sec: 856.1 +/- 0.2 (jitter = 1.0)	7.747
 90	images/sec: 856.1 +/- 0.2 (jitter = 1.1)	7.727
 100	images/sec: 854.3 +/- 1.5 (jitter = 1.1)	7.706
 ----------------------------------------------------------------
 total images/sec: 854.12
 ----------------------------------------------------------------