Created
September 23, 2019 17:09
-
-
Save sergei-mironov/d7ed90c07347c8bc5d5da0a5d509fbfc to your computer and use it in GitHub Desktop.
nl2bash-OOM
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
attention input keep probability = 0.6 | |
AttentionCellWrapper added! | |
W0922 22:44:48.935649 140431404037952 deprecation_wrapper.py:119] From /workspace/mironov/nl2bash/encoder_decoder/framework.py:200: The name tf.train.AdamOptimizer is deprecated. Please use tf.compat.v1.train.AdamOptimizer instead. | |
W0922 22:44:58.016724 140431404037952 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/clip_ops.py:286: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version. | |
Instructions for updating: | |
Use tf.where in 2.0, which has the same broadcast rule as np.where | |
W0922 22:45:20.083132 140431404037952 deprecation_wrapper.py:119] From /workspace/mironov/nl2bash/encoder_decoder/framework.py:223: The name tf.train.Saver is deprecated. Please use tf.compat.v1.train.Saver instead. | |
Making model_dir... | |
Initialize the graph with random parameters. | |
bucket 0: (13, 57) (3184) | |
bucket 1: (18, 57) (2343) | |
bucket 2: (42, 57) (2433) | |
Epoch 1 | |
0%| | 3/4000 [00:35<15:45:52, 14.20s/it] | |
Traceback (most recent call last): | |
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 1356, in _do_call | |
return fn(*args) | |
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 1341, in _run_fn | |
options, feed_dict, fetch_list, target_list, run_metadata) | |
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 1429, in _call_tf_sessionrun | |
run_metadata) | |
tensorflow.python.framework.errors_impl.ResourceExhaustedError: 2 root error(s) found. | |
(0) Resource exhausted: OOM when allocating tensor with shape[128,42,400] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc | |
[[{{node gradients_2/token_decoder_decoder_rnn_2/Attention_0_14/mul_2_grad/Mul}}]] | |
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. | |
[[global_norm_2/global_norm/_5353]] | |
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. | |
(1) Resource exhausted: OOM when allocating tensor with shape[128,42,400] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc | |
[[{{node gradients_2/token_decoder_decoder_rnn_2/Attention_0_14/mul_2_grad/Mul}}]] | |
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. | |
0 successful operations. | |
0 derived errors ignored. | |
During handling of the above exception, another exception occurred: | |
Traceback (most recent call last): | |
File "/usr/lib/python3.6/runpy.py", line 193, in _run_module_as_main | |
"__main__", mod_spec) | |
File "/usr/lib/python3.6/runpy.py", line 85, in _run_code | |
exec(code, run_globals) | |
File "/workspace/mironov/nl2bash/encoder_decoder/translate.py", line 373, in <module> | |
tf.app.run() | |
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/platform/app.py", line 40, in run | |
_run(main=main, argv=argv, flags_parser=_parse_flags_tolerate_undef) | |
File "/usr/local/lib/python3.6/dist-packages/absl/app.py", line 300, in run | |
_run_main(main, args) | |
File "/usr/local/lib/python3.6/dist-packages/absl/app.py", line 251, in _run_main | |
sys.exit(main(argv)) | |
File "/workspace/mironov/nl2bash/encoder_decoder/translate.py", line 353, in main | |
train(train_set, dataset) | |
File "/workspace/mironov/nl2bash/encoder_decoder/translate.py", line 94, in train | |
sess, formatted_example, bucket_id, forward_only=False) | |
File "/workspace/mironov/nl2bash/encoder_decoder/framework.py", line 631, in step | |
outputs = session.run(output_feed, input_feed) | |
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 950, in run | |
run_metadata_ptr) | |
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 1173, in _run | |
feed_dict_tensor, options, run_metadata) | |
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 1350, in _do_run | |
run_metadata) | |
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 1370, in _do_call | |
raise type(e)(node_def, op, message) | |
tensorflow.python.framework.errors_impl.ResourceExhaustedError: 2 root error(s) found. | |
(0) Resource exhausted: OOM when allocating tensor with shape[128,42,400] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc | |
[[node gradients_2/token_decoder_decoder_rnn_2/Attention_0_14/mul_2_grad/Mul (defined at workspace/mironov/nl2bash/encoder_decoder/framework.py:210) ]] | |
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. | |
[[global_norm_2/global_norm/_5353]] | |
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. | |
(1) Resource exhausted: OOM when allocating tensor with shape[128,42,400] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc | |
[[node gradients_2/token_decoder_decoder_rnn_2/Attention_0_14/mul_2_grad/Mul (defined at workspace/mironov/nl2bash/encoder_decoder/framework.py:210) ]] | |
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. | |
0 successful operations. | |
0 derived errors ignored. | |
Errors may have originated from an input operation. | |
Input Source operations connected to node gradients_2/token_decoder_decoder_rnn_2/Attention_0_14/mul_2_grad/Mul: | |
token_decoder_decoder_rnn_2/dropout/mul_1 (defined at workspace/mironov/nl2bash/encoder_decoder/decoder.py:160) | |
Input Source operations connected to node gradients_2/token_decoder_decoder_rnn_2/Attention_0_14/mul_2_grad/Mul: | |
token_decoder_decoder_rnn_2/dropout/mul_1 (defined at workspace/mironov/nl2bash/encoder_decoder/decoder.py:160) | |
Original stack trace for 'gradients_2/token_decoder_decoder_rnn_2/Attention_0_14/mul_2_grad/Mul': | |
File "usr/lib/python3.6/runpy.py", line 193, in _run_module_as_main | |
"__main__", mod_spec) | |
File "usr/lib/python3.6/runpy.py", line 85, in _run_code | |
exec(code, run_globals) | |
File "workspace/mironov/nl2bash/encoder_decoder/translate.py", line 373, in <module> | |
tf.app.run() | |
File "usr/local/lib/python3.6/dist-packages/tensorflow/python/platform/app.py", line 40, in run | |
_run(main=main, argv=argv, flags_parser=_parse_flags_tolerate_undef) | |
File "usr/local/lib/python3.6/dist-packages/absl/app.py", line 300, in run | |
_run_main(main, args) | |
File "usr/local/lib/python3.6/dist-packages/absl/app.py", line 251, in _run_main | |
sys.exit(main(argv)) | |
File "workspace/mironov/nl2bash/encoder_decoder/translate.py", line 353, in main | |
train(train_set, dataset) | |
File "workspace/mironov/nl2bash/encoder_decoder/translate.py", line 63, in train | |
model = define_model(sess, forward_only=False, buckets=train_set.buckets) | |
File "workspace/mironov/nl2bash/encoder_decoder/translate.py", line 52, in define_model | |
FLAGS, session, Seq2SeqModel, buckets, forward_only) | |
File "workspace/mironov/nl2bash/encoder_decoder/graph_utils.py", line 143, in define_model | |
model = model_constructor(params, buckets) | |
File "workspace/mironov/nl2bash/encoder_decoder/seq2seq/seq2seq_model.py", line 28, in __init__ | |
super(Seq2SeqModel, self).__init__(hyperparams, buckets) | |
File "workspace/mironov/nl2bash/encoder_decoder/framework.py", line 71, in __init__ | |
self.define_graph() | |
File "workspace/mironov/nl2bash/encoder_decoder/framework.py", line 210, in define_graph | |
gradients = tf.gradients(self.losses[bucket_id], params) | |
File "usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gradients_impl.py", line 158, in gradients | |
unconnected_gradients) | |
File "usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gradients_util.py", line 731, in _GradientsHelper | |
lambda: grad_fn(op, *out_grads)) | |
File "usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gradients_util.py", line 403, in _MaybeCompile | |
return grad_fn() # Exit early | |
File "usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gradients_util.py", line 731, in <lambda> | |
lambda: grad_fn(op, *out_grads)) | |
File "usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_grad.py", line 1048, in _MulGrad | |
math_ops.reduce_sum(gen_math_ops.mul(grad, y), rx), sx), | |
File "usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_math_ops.py", line 6490, in mul | |
"Mul", x=x, y=y, name=name) | |
File "usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py", line 788, in _apply_op_helper | |
op_def=op_def) | |
File "usr/local/lib/python3.6/dist-packages/tensorflow/python/util/deprecation.py", line 507, in new_func | |
return func(*args, **kwargs) | |
File "usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 3616, in create_op | |
op_def=op_def) | |
File "usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 2005, in __init__ | |
self._traceback = tf_stack.extract_stack() | |
...which was originally created as op 'token_decoder_decoder_rnn_2/Attention_0_14/mul_2', defined at: | |
File "usr/lib/python3.6/runpy.py", line 193, in _run_module_as_main | |
"__main__", mod_spec) | |
[elided 10 identical lines from previous traceback] | |
File "workspace/mironov/nl2bash/encoder_decoder/framework.py", line 71, in __init__ | |
self.define_graph() | |
File "workspace/mironov/nl2bash/encoder_decoder/framework.py", line 140, in define_graph | |
encoder_copy_inputs=self.encoder_copy_inputs[:bucket[0]] | |
File "workspace/mironov/nl2bash/encoder_decoder/framework.py", line 256, in encode_decode | |
encoder_copy_inputs=encoder_copy_inputs) | |
File "workspace/mironov/nl2bash/encoder_decoder/seq2seq/rnn_decoder.py", line 199, in define_graph | |
decoder_cell(input_embedding, state) | |
File "workspace/mironov/nl2bash/encoder_decoder/decoder.py", line 240, in __call__ | |
attns, alignments = self.attention(cell_output) | |
File "workspace/mironov/nl2bash/encoder_decoder/decoder.py", line 226, in attention | |
* self.hidden_features[a], [1]) | |
File "usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py", line 884, in binary_op_wrapper | |
return func(x, y, name=name) | |
File "usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py", line 1180, in _mul_dispatch | |
return gen_math_ops.mul(x, y, name=name) | |
File "usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_math_ops.py", line 6490, in mul | |
"Mul", x=x, y=y, name=name) | |
File "usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py", line 788, in _apply_op_helper | |
op_def=op_def) | |
File "usr/local/lib/python3.6/dist-packages/tensorflow/python/util/deprecation.py", line 507, in new_func | |
return func(*args, **kwargs) | |
File "usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 3616, in create_op | |
op_def=op_def) | |
0%| | 3/4000 [10:30<233:18:28, 210.13s/it] | |
Makefile:41: recipe for target 'train' failed | |
make: *** [train] Error 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment