Created
November 20, 2018 21:27
-
-
Save bearpelican/24930f0e9974102631a5c69ff8e9f222 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Attaching to program: /home/ubuntu/anaconda3/envs/pytorch_source/bin/python, process 3936 | |
| [New LWP 3963] | |
| [New LWP 3966] | |
| [New LWP 3989] | |
| [Thread debugging using libthread_db enabled] | |
| Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". | |
| 0x00007ffd67f80b39 in clock_gettime () | |
| (gdb) bt | |
| #0 0x00007ffd67f80b39 in clock_gettime () | |
| #1 0x00007f1b4d5f3876 in __GI___clock_gettime (clock_id=4, tp=0x7ffd67ef2780) at ../sysdeps/unix/clock_gettime.c:115 | |
| #2 0x00007f1b3dc65c4e in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1 | |
| #3 0x00007f1b3dcf48d3 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1 | |
| #4 0x00007f1b3dc1133c in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1 | |
| #5 0x00007f1b3dc114f8 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1 | |
| #6 0x00007f1b3dc3aaef in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1 | |
| #7 0x00007f1b3dd71536 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1 | |
| #8 0x00007f1b3db4f1c1 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1 | |
| #9 0x00007f1b3db4f458 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1 | |
| #10 0x00007f1b3db4f49e in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1 | |
| #11 0x00007f1b3dca6680 in cuLaunchKernel () from /usr/lib/x86_64-linux-gnu/libcuda.so.1 | |
| #12 0x00007f1b2296bf52 in cudart::cudaApiLaunchKernelCommon(void const*, dim3, dim3, void**, unsigned long, CUstream_st*, bool) () from /usr/local/cuda/lib64/libnccl.so.2 | |
| #13 0x00007f1b2296c147 in cudart::cudaApiLaunchKernel(void const*, dim3, dim3, void**, unsigned long, CUstream_st*) () from /usr/local/cuda/lib64/libnccl.so.2 | |
| #14 0x00007f1b229a05ab in cudaLaunchKernel () from /usr/local/cuda/lib64/libnccl.so.2 | |
| #15 0x00007f1b229111a5 in ncclBarrierEnqueueWait (comm=0x7f1a9801e510) at misc/enqueue.cu:188 | |
| #16 0x00007f1b229049ef in ncclGroupEnd () at misc/group.cu:148 | |
| #17 0x00007f1b3bd978d4 in c10d::ProcessGroupNCCL::allreduce(std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&) () from /home/ubuntu/anaconda3/envs/pytorch_source/lib/python3.7/site-packages/torch/_C.cpython-37m-x86_64-linux-gnu.so | |
| #18 0x00007f1b3bbac939 in pybind11::cpp_function::cpp_function<std::shared_ptr<c10d::ProcessGroup::Work>, c10d::ProcessGroup, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&, pybind11::name, pybind11::is_method, pybind11::sibling, pybi | |
| nd11::call_guard<pybind11::gil_scoped_release> >(std::shared_ptr<c10d::ProcessGroup::Work> (c10d::ProcessGroup::*)(std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling c | |
| onst&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(c10d::ProcessGroup*, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&)#1}::operator()(c10d::ProcessGroup*, std::vector<at::Tensor, std::allocator<at::Tensor> >&, | |
| c10d::AllreduceOptions const&) const (args#1=..., args#0=..., c=<optimized out>, __closure=<optimized out>) at /home/ubuntu/pytorch/third_party/pybind11/include/pybind11/pybind11.h:73 | |
| #19 pybind11::detail::argument_loader<c10d::ProcessGroup*, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&>::call_impl<std::shared_ptr<c10d::ProcessGroup::Work>, pybind11::cpp_function::cpp_function<std::shared_ptr<c10d::ProcessGroup:: | |
| Work>, c10d::ProcessGroup, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(std::shared_ptr<c10d::ProcessGroup::Work> (c10d::ProcessGroup::*)(std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(c10d::ProcessGroup*, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&)#1}&, 0ul, 1ul, 2ul, pybind11::gil_scoped_release>(pybind11::cpp_function::cpp_function<std::shared_ptr<c10d::ProcessGroup::Work>, c10d::ProcessGroup, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(std::shared_ptr<c10d::ProcessGroup::Work> (c10d::ProcessGroup::*)(std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(c10d::ProcessGroup*, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&)#1}&, pybind11::detail::index_sequence<0ul, 1ul, 2ul>, pybind11::gil_scoped_release&&) (f=..., this=0x7ffd67ef3ff0) at /home/ubuntu/pytorch/third_party/pybind11/include/pybind11/cast.h:1919 | |
| #20 pybind11::detail::argument_loader<c10d::ProcessGroup*, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&>::call<std::shared_ptr<c10d::ProcessGroup::Work>, pybind11::gil_scoped_release, pybind11::cpp_function::cpp_function<std::shared---Type <return> to continue, or q <return> to quit--- | |
| _ptr<c10d::ProcessGroup::Work>, c10d::ProcessGroup, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(std::shared_ptr<c10d::Pro | |
| cessGroup::Work> (c10d::ProcessGroup::*)(std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(c10d::ProcessGroup*, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&)#1}&>(pybind11::cpp_function::cpp_function<std::shared_ptr<c10d::ProcessGroup::Work>, c10d::ProcessGroup, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(std::shared_ptr<c10d::ProcessGroup::Work> (c10d::ProcessGroup::*)(std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(c10d::ProcessGroup*, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&)#1}&) && (f=..., this=0x7ffd67ef3ff0) at /home/ubuntu/pytorch/third_party/pybind11/include/pybind11/cast.h:1896 | |
| #21 void pybind11::cpp_function::initialize<pybind11::cpp_function::initialize<std::shared_ptr<c10d::ProcessGroup::Work>, c10d::ProcessGroup, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(std::shared_ptr<c10d::ProcessGroup::Work> (c10d::ProcessGroup::*)(std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(c10d::ProcessGroup*, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&)#1}, std::shared_ptr<c10d::ProcessGroup::Work>, c10d::ProcessGroup*, std::v | |
| ector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(pybind11::cpp_function::initialize<std::shared_ptr<c10d::ProcessGroup::Work>, c10d: | |
| :ProcessGroup, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(std::shared_ptr<c10d::ProcessGroup::Work> (c10d::ProcessGroup: | |
| :*)(std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(c10d::ProcessGroup*, std::vector<at: | |
| :Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&)#1}&&, std::shared_ptr<c10d::ProcessGroup::Work> (*)(c10d::ProcessGroup*, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&), pybind11::name const&, pybind11::is_metho | |
| d const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(pybind11::detail::function_call&)#3}::operator()(pybind11::detail::function_call) const (call=..., __closure=0x0) | |
| at /home/ubuntu/pytorch/third_party/pybind11/include/pybind11/pybind11.h:154 | |
| #22 void pybind11::cpp_function::initialize<pybind11::cpp_function::initialize<std::shared_ptr<c10d::ProcessGroup::Work>, c10d::ProcessGroup, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&, pybind11::name, pybind11::is_method, pybind1 | |
| 1::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(std::shared_ptr<c10d::ProcessGroup::Work> (c10d::ProcessGroup::*)(std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&), pybind11::name const&, pybind11::is_method const&, pyb | |
| ind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(c10d::ProcessGroup*, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&)#1}, std::shared_ptr<c10d::ProcessGroup::Work>, c10d::ProcessGroup*, std::v | |
| ector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(pybind11::cpp_function::initialize<std::shared_ptr<c10d::ProcessGroup::Work>, c10d: | |
| :ProcessGroup, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(std::shared_ptr<c10d::ProcessGroup::Work> (c10d::ProcessGroup: | |
| :*)(std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(c10d::ProcessGroup*, std::vector<at: | |
| :Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&)#1}&&, std::shared_ptr<c10d::ProcessGroup::Work> (*)(c10d::ProcessGroup*, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&), pybind11::name const&, pybind11::is_metho | |
| d const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(pybind11::detail::function_call&)#3}::_FUN(pybind11::detail::function_call) () at /home/ubuntu/pytorch/third_party/pybind11/include/pybind11/pybind11.h:132 | |
| #23 0x00007f1b3b7ee34c in pybind11::cpp_function::dispatcher (self=<optimized out>, args_in=0x7f1aed043ea0, kwargs_in=0x0) at /home/ubuntu/pytorch/third_party/pybind11/include/pybind11/pybind11.h:619 | |
| #24 0x0000555c47249fc4 in _PyMethodDef_RawFastCallKeywords () | |
| #25 0x0000555c4724a0e1 in _PyCFunction_FastCallKeywords () | |
| #26 0x0000555c472a66b2 in _PyEval_EvalFrameDefault () | |
| #27 0x0000555c471e7059 in _PyEval_EvalCodeWithName () | |
| #28 0x0000555c47249307 in _PyFunction_FastCallKeywords () | |
| #29 0x0000555c472a2841 in _PyEval_EvalFrameDefault () | |
| #30 0x0000555c4724907b in _PyFunction_FastCallKeywords () | |
| #31 0x0000555c472a1a66 in _PyEval_EvalFrameDefault () | |
| #32 0x0000555c4724907b in _PyFunction_FastCallKeywords () | |
| #33 0x0000555c472a1a66 in _PyEval_EvalFrameDefault () | |
| #34 0x0000555c471e7059 in _PyEval_EvalCodeWithName () | |
| #35 0x0000555c471e7f24 in PyEval_EvalCodeEx () | |
| #36 0x0000555c471e7f4c in PyEval_EvalCode () | |
| #37 0x0000555c47300a14 in run_mod () | |
| #38 0x0000555c47309f11 in PyRun_FileExFlags () | |
| #39 0x0000555c4730a104 in PyRun_SimpleFileExFlags () | |
| #40 0x0000555c4730bbbd in pymain_main.constprop () | |
| #41 0x0000555c4730be30 in _Py_UnixMain () | |
| #42 0x00007f1b4d4fe830 in __libc_start_main (main=0x555c471c7d20 <main>, argc=4, argv=0x7ffd67ef50e8, init=<optimized out>, fini=<optimized out>, rtld_fini=<optimized out>, stack_end=0x7ffd67ef50d8) at ../csu/libc-start.c:291 | |
| #43 0x0000555c472b1052 in _start () at ../sysdeps/x86_64/elf/start.S:103 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment