Linux Ubuntu 2016.
- 1080 GTX
- SDK 8.0
- CuDNN 5.1
ulimit -c unlimited
import numpy as np | |
def sigmoid(x): | |
return 1. / (1. + np.exp(-x)) | |
def train_log_reg(): | |
bsz = 100 | |
fsz = 5 | |
mbsz = 10 | |
lr = 0.00001 |
rpn_outputs = [rpn_head('rpn', pi, cfg.FPN.NUM_CHANNEL, len(cfg.RPN.ANCHOR_RATIOS), fp16=self.fp16) | |
for pi in features] | |
multilevel_label_logits = [k[0] for k in rpn_outputs] | |
multilevel_box_logits = [k[1] for k in rpn_outputs] | |
#multilevel_pred_boxes = [anchor.decode_logits(logits) | |
#for anchor, logits in zip(multilevel_anchors, multilevel_box_logits)] | |
#proposal_boxes, proposal_scores = generate_fpn_proposals( | |
# multilevel_pred_boxes, multilevel_label_logits, image_shape2d) |
+ VENV=tensorflow_p36_13rc1 | |
+ git branch | |
+ grep '*' | |
+ awk '{print $2}' | |
+ git log | |
+ head -1 | |
++ basename ./no_batch_train_1node_16xl_convergence.sh | |
+ cp no_batch_train_1node_16xl_convergence.sh /home/ubuntu/logs/train_log_20190308_185758 | |
+ env | |
+ pip freeze |
[0308 18:43:36 @base.py:274] Start Epoch 1 ... | |
0%| |0/15000[00:00<?,?it/s]2019-03-08 18:43:57.515207: E tensorflow/stream_executor/cuda/cuda_blas.cc:694] failed to run cuBLAS routine cublasSgemmEx: CUBLAS_STATUS_EXECUTION_FAILED | |
2019-03-08 18:43:57.522957: F tensorflow/stream_executor/gpu/gpu_timer.cc:65] Check failed: start_event_ != nullptr && stop_event_ != nullptr | |
[ip-172-31-14-112:17513] *** Process received signal *** | |
[ip-172-31-14-112:17513] Signal: Aborted (6) | |
[ip-172-31-14-112:17513] Signal code: (-6) | |
[ip-172-31-14-112:17513] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7fc9d8af2390] | |
[ip-172-31-14-112:17513] [ 1] /lib/x86_64-linux-gnu/libc.so.6(gsignal+0x38)[0x7fc9d874c428] | |
[ip-172-31-14-112:17513] [ 2] /lib/x86_64-linux-gnu/libc.so.6(abort+0x16a)[0x7fc9d874e02a] | |
[ip-172-31-14-112:17513] [ 3] /home/ubuntu/anaconda3/ |
diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py | |
index fc2e8aa..e3081c0 100644 | |
--- a/tensorflow/python/keras/backend.py | |
+++ b/tensorflow/python/keras/backend.py | |
@@ -594,7 +594,8 @@ def _has_nchw_support(): | |
bool: if the current scope device placement would support nchw | |
""" | |
explicitly_on_cpu = _is_current_explicit_device('CPU') | |
- gpus_available = bool(_get_available_gpus()) | |
+ #gpus_available = bool(_get_available_gpus()) |
Traceback (most recent call last): | |
File "/home/ubuntu/anaconda3/envs/tensorflow_p36_13rc1/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1335, in _do_call | |
return fn(*args) | |
File "/home/ubuntu/anaconda3/envs/tensorflow_p36_13rc1/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1320, in _run_fn | |
options, feed_dict, fetch_list, target_list, run_metadata) | |
File "/home/ubuntu/anaconda3/envs/tensorflow_p36_13rc1/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1408, in _call_tf_sessionrun | |
run_metadata) | |
tensorflow.python.framework.errors_impl.InvalidArgumentError: Shapes of all inputs must match: values[0].shape = [4,200,304,3] != values[1].shape = [4,100,152,3] | |
[[{{node Sum_5/input}}]] | |
[[gradients/group1/block0/conv1/Conv2D_grad/Conv2DBackpropFilter/_3987]] |
Type Time(%) Time Calls Avg Min Max Name | |
GPU activities: 14.63% 49.9120s 212547 234.83us 4.8640us 3.9977ms volta_gcgemm_32x32_nt | |
9.34% 31.8694s 408982 77.923us 1.3750us 27.761ms [CUDA memcpy HtoD] | |
5.33% 18.1885s 263039 69.147us 1.4710us 6.0376ms [CUDA memcpy DtoH] | |
4.66% 15.8805s 22869 694.41us 54.015us 3.0393ms volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | |
2.70% 9.19615s 477147 19.273us 1.6960us 157.98us void nchwToNhwcKernel<__half, __half, float, bool=1, bool=0>(int, int, int, int, __half const *, __half*, float, float) | |
2.22% 7.55613s 403149 18.742us 960ns 160.35us void Eigen::internal::EigenMetaKernel<Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, int=1, int=1, long>, int=16, Eigen::MakePointer>, Eigen::TensorConversionOp<float, Eigen::TensorMap<Eigen::Tensor<Eigen |
[0213 03:07:48 @base.py:274] Start Epoch 1 ... | |
[0213 04:06:00 @base.py:284] Epoch 1 (global_step 15000) finished, time:58 minutes 11 seconds. | |
[0213 04:06:00 @performance.py:71] [ThroughputTracker] Over last epoch, MeanEpochThroughput: 34.37 | |
[0213 04:06:00 @performance.py:72] [ThroughputTracker] Over last epoch, EpochWallClockDuration: 3,491.31 | |
[0213 04:06:00 @monitor.py:467] Throughput/EpochWallClockDuration: 3491.3 | |
[0213 04:06:00 @monitor.py:467] Throughput/MeanEpochThroughput: 34.371 | |
[0213 04:06:00 @base.py:274] Start Epoch 2 ... | |
[0213 04:55:57 @base.py:284] Epoch 2 (global_step 30000) finished, time:49 minutes 56 seconds. | |
[0213 04:55:57 @performance.py:71] [ThroughputTracker] Over last epoch, MeanEpochThroughput: 40.04 | |
[0213 04:55:57 @performance.py:72] [ThroughputTracker] Over last epoch, EpochWallClockDuration: 2,996.91 |
def load_graph(graph_filename): | |
with open(graph_filename, 'rb') as f: | |
graph_def = tf.GraphDef() | |
graph_def.ParseFromString(f.read()) | |
with tf.Graph().as_default() as graph: | |
tf.import_graph_def(graph_def) | |
return graph |