Created
October 5, 2016 08:44
-
-
Save floriandotpy/03c6754b942a6c46a333f9b9801703c9 to your computer and use it in GitHub Desktop.
Using hyperopt with Tensorflow eats up my vmemory. Why?
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[stat] LOSS: 1.00405 | |
Trying model: C1: 3, filters: 16, C2: 3, filters: 4 | |
I tensorflow/core/common_runtime/gpu/gpu_device.cc:867] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX 1060 6GB, pci bus id: 0000:01:00.0) | |
[stat] LOSS: 0.23471 | |
Trying model: C1: 3, filters: 4, C2: 3, filters: 8 | |
I tensorflow/core/common_runtime/gpu/gpu_device.cc:867] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX 1060 6GB, pci bus id: 0000:01:00.0) | |
[stat] LOSS: 0.17076 | |
Trying model: C1: 3, filters: 4, C2: 5, filters: 32 | |
I tensorflow/core/common_runtime/gpu/gpu_device.cc:867] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX 1060 6GB, pci bus id: 0000:01:00.0) | |
[stat] LOSS: 0.38854 | |
Trying model: C1: 5, filters: 16, C2: 5, filters: 16 | |
I tensorflow/core/common_runtime/gpu/gpu_device.cc:867] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX 1060 6GB, pci bus id: 0000:01:00.0) | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (256): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (512): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (1024): Total Chunks: 1, Chunks in use: 0 1.8KiB allocated for chunks. 4B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (2048): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (4096): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (8192): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
// ... | |
// ... many more lines like this | |
// ... | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (8388608): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (16777216): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (33554432): Total Chunks: 1, Chunks in use: 0 32.81MiB allocated for chunks. 23.44MiB client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (67108864): Total Chunks: 1, Chunks in use: 0 112.50MiB allocated for chunks. 23.44MiB client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (134217728): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (268435456): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin. | |
I tensorflow/core/common_runtime/bfc_allocator.cc:656] Bin for 175.78MiB was 128.00MiB, Chunk State: | |
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x10208000000 of size 1280 | |
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x10208000500 of size 1280 | |
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x10208000a00 of size 1280 | |
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x10208000f00 of size 1280 | |
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x10208001400 of size 1280 | |
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x10208001900 of size 256 | |
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x10208001a00 of size 256 | |
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x10208001b00 of size 256 | |
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x10208001c00 of size 256 | |
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x10208001d00 of size 3072 | |
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x10208002900 of size 256 | |
// ... | |
// ... many more lines like this | |
// ... | |
I tensorflow/core/common_runtime/bfc_allocator.cc:692] 5 Chunks of size 235929600 totalling 1.10GiB | |
I tensorflow/core/common_runtime/bfc_allocator.cc:692] 1 Chunks of size 236640256 totalling 225.68MiB | |
I tensorflow/core/common_runtime/bfc_allocator.cc:692] 5 Chunks of size 471859200 totalling 2.20GiB | |
I tensorflow/core/common_runtime/bfc_allocator.cc:692] 1 Chunks of size 487964928 totalling 465.36MiB | |
I tensorflow/core/common_runtime/bfc_allocator.cc:692] 1 Chunks of size 707788800 totalling 675.00MiB | |
I tensorflow/core/common_runtime/bfc_allocator.cc:696] Sum Total of in-use chunks: 5.37GiB | |
I tensorflow/core/common_runtime/bfc_allocator.cc:698] Stats: | |
Limit: 5918818304 | |
InUse: 5765303296 | |
MaxInUse: 5783643904 | |
NumAllocs: 242839 | |
MaxAllocSize: 707788800 | |
W tensorflow/core/common_runtime/bfc_allocator.cc:270] ***************************xxx******************************************_*************************** | |
W tensorflow/core/common_runtime/bfc_allocator.cc:271] Ran out of memory trying to allocate 175.78MiB. See logs for memory state. | |
W tensorflow/core/framework/op_kernel.cc:968] Resource exhausted: OOM when allocating tensor with shape[10,16,120,160,15] | |
Traceback (most recent call last): | |
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 965, in _do_call | |
return fn(*args) | |
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 947, in _run_fn | |
status, run_metadata) | |
File "/usr/lib/python3.5/contextlib.py", line 66, in __exit__ | |
next(self.gen) | |
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/errors.py", line 450, in raise_exception_on_not_ok_status | |
pywrap_tensorflow.TF_GetCode(status)) | |
tensorflow.python.framework.errors.ResourceExhaustedError: OOM when allocating tensor with shape[10,16,120,160,15] | |
[[Node: gradients_4/MaxPool3D_8_grad/MaxPool3DGrad = MaxPool3DGrad[T=DT_FLOAT, ksize=[1, 3, 3, 3, 1], padding="SAME", strides=[1, 2, 2, 2, 1], _device="/job:localhost/replica:0/task:0/gpu:0"](Relu_8, MaxPool3D_8, gradients_4/Conv3D_9_grad/tuple/control_dependency)]] | |
During handling of the above exception, another exception occurred: | |
Traceback (most recent call last): | |
File "optimize.py", line 42, in <module> | |
optimize() | |
File "optimize.py", line 35, in optimize | |
best_model = hyperopt.fmin(objective, space, algo=hyperopt.tpe.suggest, max_evals=150) | |
File "/usr/local/lib/python3.5/dist-packages/hyperopt/fmin.py", line 319, in fmin | |
rval.exhaust() | |
File "/usr/local/lib/python3.5/dist-packages/hyperopt/fmin.py", line 198, in exhaust | |
self.run(self.max_evals - n_done, block_until_done=self.async) | |
File "/usr/local/lib/python3.5/dist-packages/hyperopt/fmin.py", line 172, in run | |
self.serial_evaluate() | |
File "/usr/local/lib/python3.5/dist-packages/hyperopt/fmin.py", line 89, in serial_evaluate | |
result = self.domain.evaluate(spec, ctrl) | |
File "/usr/local/lib/python3.5/dist-packages/hyperopt/base.py", line 838, in evaluate | |
rval = self.fn(pyll_rval) | |
File "optimize.py", line 23, in objective | |
return cnn.main(config, params) | |
File "/home/flo/projects/master-thesis/code/cnn/tiny_cnn.py", line 265, in main | |
sess.run(train, feed_dict={images: sequences, pl_teachers: teachers, keep_prob: 0.5}) | |
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 710, in run | |
run_metadata_ptr) | |
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 908, in _run | |
feed_dict_string, options, run_metadata) | |
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 958, in _do_run | |
target_list, options, run_metadata) | |
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 978, in _do_call | |
raise type(e)(node_def, op, message) | |
tensorflow.python.framework.errors.ResourceExhaustedError: OOM when allocating tensor with shape[10,16,120,160,15] | |
[[Node: gradients_4/MaxPool3D_8_grad/MaxPool3DGrad = MaxPool3DGrad[T=DT_FLOAT, ksize=[1, 3, 3, 3, 1], padding="SAME", strides=[1, 2, 2, 2, 1], _device="/job:localhost/replica:0/task:0/gpu:0"](Relu_8, MaxPool3D_8, gradients_4/Conv3D_9_grad/tuple/control_dependency)]] | |
Caused by op 'gradients_4/MaxPool3D_8_grad/MaxPool3DGrad', defined at: | |
File "optimize.py", line 42, in <module> | |
optimize() | |
File "optimize.py", line 35, in optimize | |
best_model = hyperopt.fmin(objective, space, algo=hyperopt.tpe.suggest, max_evals=150) | |
File "/usr/local/lib/python3.5/dist-packages/hyperopt/fmin.py", line 319, in fmin | |
rval.exhaust() | |
File "/usr/local/lib/python3.5/dist-packages/hyperopt/fmin.py", line 198, in exhaust | |
self.run(self.max_evals - n_done, block_until_done=self.async) | |
File "/usr/local/lib/python3.5/dist-packages/hyperopt/fmin.py", line 172, in run | |
self.serial_evaluate() | |
File "/usr/local/lib/python3.5/dist-packages/hyperopt/fmin.py", line 89, in serial_evaluate | |
result = self.domain.evaluate(spec, ctrl) | |
File "/usr/local/lib/python3.5/dist-packages/hyperopt/base.py", line 838, in evaluate | |
rval = self.fn(pyll_rval) | |
File "optimize.py", line 23, in objective | |
return cnn.main(config, params) | |
File "/home/flo/projects/master-thesis/code/cnn/tiny_cnn.py", line 238, in main | |
train = optimizer.minimize(cost) | |
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/optimizer.py", line 196, in minimize | |
grad_loss=grad_loss) | |
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/optimizer.py", line 253, in compute_gradients | |
colocate_gradients_with_ops=colocate_gradients_with_ops) | |
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gradients.py", line 476, in gradients | |
in_grads = _AsList(grad_fn(op, *out_grads)) | |
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_grad.py", line 130, in _MaxPool3DGrad | |
padding=op.get_attr("padding")) | |
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_nn_ops.py", line 1182, in max_pool3d_grad | |
name=name) | |
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 703, in apply_op | |
op_def=op_def) | |
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 2333, in create_op | |
original_op=self._default_original_op, op_def=op_def) | |
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 1252, in __init__ | |
self._traceback = _extract_stack() | |
File "optimize.py", line 42, in <module> | |
optimize() | |
File "optimize.py", line 35, in optimize | |
best_model = hyperopt.fmin(objective, space, algo=hyperopt.tpe.suggest, max_evals=150) | |
File "/usr/local/lib/python3.5/dist-packages/hyperopt/fmin.py", line 319, in fmin | |
rval.exhaust() | |
File "/usr/local/lib/python3.5/dist-packages/hyperopt/fmin.py", line 198, in exhaust | |
self.run(self.max_evals - n_done, block_until_done=self.async) | |
File "/usr/local/lib/python3.5/dist-packages/hyperopt/fmin.py", line 172, in run | |
self.serial_evaluate() | |
File "/usr/local/lib/python3.5/dist-packages/hyperopt/fmin.py", line 89, in serial_evaluate | |
result = self.domain.evaluate(spec, ctrl) | |
File "/usr/local/lib/python3.5/dist-packages/hyperopt/base.py", line 838, in evaluate | |
rval = self.fn(pyll_rval) | |
File "optimize.py", line 23, in objective | |
return cnn.main(config, params) | |
File "/home/flo/projects/master-thesis/code/cnn/tiny_cnn.py", line 238, in main | |
train = optimizer.minimize(cost) | |
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/optimizer.py", line 196, in minimize | |
grad_loss=grad_loss) | |
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/optimizer.py", line 253, in compute_gradients | |
colocate_gradients_with_ops=colocate_gradients_with_ops) | |
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gradients.py", line 476, in gradients | |
in_grads = _AsList(grad_fn(op, *out_grads)) | |
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_grad.py", line 130, in _MaxPool3DGrad | |
padding=op.get_attr("padding")) | |
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_nn_ops.py", line 1182, in max_pool3d_grad | |
name=name) | |
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 703, in apply_op | |
op_def=op_def) | |
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 2333, in create_op | |
original_op=self._default_original_op, op_def=op_def) | |
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 1252, in __init__ | |
self._traceback = _extract_stack() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I am having the same problem. Have you solved it back then?