Last active
March 19, 2019 16:39
-
-
Save heiner/e4b3c6ef92ed3c6b15005798f7b8da91 to your computer and use it in GitHub Desktop.
libtorch Tensor + std::future bug example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ OMP_NUM_THREADS=1 gdb python3 | |
GNU gdb (Ubuntu 8.1-0ubuntu3) 8.1.0.20180409-git | |
Copyright (C) 2018 Free Software Foundation, Inc. | |
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html> | |
This is free software: you are free to change and redistribute it. | |
There is NO WARRANTY, to the extent permitted by law. Type "show copying" | |
and "show warranty" for details. | |
This GDB was configured as "x86_64-linux-gnu". | |
Type "show configuration" for configuration details. | |
For bug reporting instructions, please see: | |
<http://www.gnu.org/software/gdb/bugs/>. | |
Find the GDB manual and other documentation resources online at: | |
<http://www.gnu.org/software/gdb/documentation/>. | |
For help, type "help". | |
Type "apropos word" to search for commands related to "word"... | |
Reading symbols from python3...done. | |
(gdb) run run.py 2 | |
Starting program: /private/home/hnr/.conda/envs/extdev/bin/python3 run.py 2 | |
[Thread debugging using libthread_db enabled] | |
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". | |
[New Thread 0x7fffa9c32700 (LWP 17667)] | |
consume(): Future is ready. Should get it quickly now. | |
consume(): Got future value. All good. | |
consume(): Future is ready. Should get it quickly now. | |
consume(): Got future value. All good. | |
consume(): Future is ready. Should get it quickly now. | |
consume(): Got future value. All good. | |
consume(): Future is ready. Should get it quickly now. | |
consume(): Got future value. All good. | |
consume(): Future is ready. Should get it quickly now. | |
consume(): Got future value. All good. | |
consume(): Future is ready. Should get it quickly now. | |
consume(): Got future value. All good. | |
consume(): Future is ready. Should get it quickly now. | |
consume(): Got future value. All good. | |
consume(): Future is ready. Should get it quickly now. | |
consume(): Got future value. All good. | |
consume(): Future is ready. Should get it quickly now. | |
^C | |
Thread 1 "python3" received signal SIGINT, Interrupt. | |
0x00007ffff7bc39f3 in futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x555556c16028) | |
at ../sysdeps/unix/sysv/linux/futex-internal.h:88 | |
88 ../sysdeps/unix/sysv/linux/futex-internal.h: No such file or directory. | |
(gdb) info threads | |
Id Target Id Frame | |
* 1 Thread 0x7ffff7fc3740 (LWP 17643) "python3" 0x00007ffff7bc39f3 in futex_wait_cancelable (private=<optimized out>, | |
expected=0, futex_word=0x555556c16028) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 | |
2 Thread 0x7fffa9c32700 (LWP 17667) "python3" 0x00007ffff7bc3449 in futex_wait (private=<optimized out>, | |
expected=32767, futex_word=0x555556c1476c) at ../sysdeps/unix/sysv/linux/futex-internal.h:61 | |
(gdb) thread 2 | |
[Switching to thread 2 (Thread 0x7fffa9c32700 (LWP 17667))] | |
#0 0x00007ffff7bc3449 in futex_wait (private=<optimized out>, expected=32767, futex_word=0x555556c1476c) | |
at ../sysdeps/unix/sysv/linux/futex-internal.h:61 | |
61 ../sysdeps/unix/sysv/linux/futex-internal.h: No such file or directory. | |
(gdb) bt | |
#0 0x00007ffff7bc3449 in futex_wait (private=<optimized out>, expected=32767, futex_word=0x555556c1476c) | |
at ../sysdeps/unix/sysv/linux/futex-internal.h:61 | |
#1 futex_wait_simple (private=<optimized out>, expected=32767, futex_word=0x555556c1476c) | |
at ../sysdeps/nptl/futex-internal.h:135 | |
#2 __pthread_cond_destroy (cond=0x555556c14748) at pthread_cond_destroy.c:54 | |
#3 0x00007fffc6938bbc in std::__future_base::_State_baseV2::~_State_baseV2() () | |
from /private/home/hnr/.conda/envs/extdev/lib/python3.7/site-packages/torch/lib/libcaffe2.so | |
#4 0x00007fffe869ec75 in std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () | |
from /private/home/hnr/.conda/envs/extdev/lib/python3.7/site-packages/torch/lib/libtorch_python.so | |
#5 0x00007fffa9c41918 in std::__shared_count<(__gnu_cxx::_Lock_policy)2>::~__shared_count (this=<optimized out>, | |
__in_chrg=<optimized out>) at /usr/include/c++/7/bits/shared_ptr_base.h:684 | |
#6 std::__shared_ptr<std::__future_base::_State_baseV2, (__gnu_cxx::_Lock_policy)2>::~__shared_ptr ( | |
this=<optimized out>, __in_chrg=<optimized out>) at /usr/include/c++/7/bits/shared_ptr_base.h:1123 | |
#7 std::__shared_ptr<std::__future_base::_State_baseV2, (__gnu_cxx::_Lock_policy)2>::reset (this=<synthetic pointer>) | |
at /usr/include/c++/7/bits/shared_ptr_base.h:1235 | |
#8 std::__basic_future<std::vector<at::Tensor, std::allocator<at::Tensor> > >::_Reset::~_Reset ( | |
this=<synthetic pointer>, __in_chrg=<optimized out>) at /usr/include/c++/7/future:753 | |
#9 std::future<std::vector<at::Tensor, std::allocator<at::Tensor> > >::get (this=<synthetic pointer>) | |
at /usr/include/c++/7/future:795 | |
#10 Runner::consume (this=0x555556c16000) at bug.cc:38 | |
#11 0x00007fffa9c4fc77 in pybind11::cpp_function::cpp_function<void, Runner, , pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(void (Runner::*)(), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(Runner*)#1}::operator()(Runner*) const (c=<optimized out>, __closure=<optimized out>) | |
at /private/home/hnr/.conda/envs/extdev/lib/python3.7/site-packages/torch/include/pybind11/pybind11.h:74 | |
#12 pybind11::detail::argument_loader<Runner*>::call_impl<void, pybind11::cpp_function::cpp_function<void, Runner, , pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(void (Runner::*)(), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(Runner*)#1}&, 0ul, pybind11::gil_scoped_release>(pybind11::cpp_function::cpp_function<void, Runner, , pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(void (Runner::*)(), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(Runner*)#1}&, std::integer_sequence<unsigned long, 0ul>, pybind11::gil_scoped_release&&) ( | |
f=..., this=<optimized out>) | |
at /private/home/hnr/.conda/envs/extdev/lib/python3.7/site-packages/torch/include/pybind11/cast.h:1931 | |
#13 pybind11::detail::argument_loader<Runner*>::call<void, pybind11::gil_scoped_release, pybind11::cpp_function::cpp_function<void, Runner, , pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(void (Runner::*)(), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(Runner*)#1}&>(pybind11::cpp_function::cpp_function<void, Runner, , pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(void (Runner::*)(), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(Runner*)#1}&) && (f=..., this=0x7fffa9c315f0) | |
at /private/home/hnr/.conda/envs/extdev/lib/python3.7/site-packages/torch/include/pybind11/cast.h:1913 | |
#14 void pybind11::cpp_function::initialize<pybind11::cpp_function::initialize<void, Runner, , pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(void (Runner::*)(), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(Runner*)#1}, void, Runner*, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scop---Type <return> to continue, or q <return> to quit---q | |
Quit | |
(gdb) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* CXX=c++ python3 setup.py build develop | |
* Then python run.py | |
*/ | |
#include <chrono> | |
#include <deque> | |
#include <future> | |
#include <memory> | |
#include <mutex> | |
#include <torch/extension.h> | |
class Runner { | |
public: | |
void consume(int loops = 10) { | |
for (int i = 0; i < loops; ++i) { | |
std::promise<torch::Tensor> promise; | |
std::future<torch::Tensor> future = promise.get_future(); | |
{ | |
std::unique_lock<std::mutex> lock(mu_); | |
promises_.push_back(std::move(promise)); | |
} | |
can_produce_.notify_one(); | |
std::future_status status = future.wait_for(std::chrono::seconds(2)); | |
AT_ASSERTM(status == std::future_status::ready, | |
"Future timeout reached."); | |
std::cout << "consume(): Future is ready. Should get it quickly now." | |
<< std::endl; | |
torch::Tensor tensors = future.get(); | |
std::cout << "consume(): Got future value. All good." << std::endl; | |
} | |
} | |
void produce(torch::Tensor tensor) { | |
std::unique_lock<std::mutex> lock(mu_); | |
while (promises_.empty()) { | |
can_produce_.wait(lock); | |
} | |
const int batch_size = promises_.size(); | |
for (int b = 0; b < batch_size; ++b) { | |
promises_[b].set_value(tensor.select(0, b)); | |
} | |
promises_.erase(promises_.begin(), promises_.begin() + batch_size); | |
} | |
private: | |
std::condition_variable can_produce_; | |
std::mutex mu_; | |
std::deque<std::promise<torch::Tensor>> promises_; | |
}; | |
PYBIND11_MODULE(tensorbug, m) { | |
py::class_<Runner>(m, "Runner") | |
.def(py::init<>()) | |
.def("consume", &Runner::consume, | |
py::call_guard<py::gil_scoped_release>(), py::arg("loops")) | |
.def("produce", &Runner::produce, | |
py::call_guard<py::gil_scoped_release>(), py::arg("tensors")); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import threading | |
import torch | |
import tensorbug | |
batch_size = 1 | |
loops = 10 | |
runner = tensorbug.Runner() | |
consume_thread = threading.Thread(target=runner.consume, | |
args=(loops,)) | |
consume_thread.start() | |
for _ in range(loops): | |
runner.produce(torch.zeros(batch_size, 1)) | |
consume_thread.join() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Build with | |
# CXX=c++ python3 setup.py build develop | |
import setuptools | |
import sys | |
from torch.utils import cpp_extension | |
extra_compile_args = [] | |
extra_link_args = [] | |
if sys.platform == 'darwin': | |
extra_compile_args += ['-stdlib=libc++', '-mmacosx-version-min=10.12'] | |
extra_link_args += ['-stdlib=libc++'] | |
tensorbug = cpp_extension.CppExtension( | |
name='tensorbug', | |
sources=['bug.cc'], | |
language='c++', | |
extra_compile_args=['-std=c++17'] + extra_compile_args, | |
extra_link_args=extra_link_args, | |
) | |
setuptools.setup( | |
name='tensorbug', | |
ext_modules=[tensorbug], | |
cmdclass={'build_ext': cpp_extension.BuildExtension}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment