Skip to content

Instantly share code, notes, and snippets.

@bearpelican
Created November 20, 2018 21:27
Show Gist options
  • Select an option

  • Save bearpelican/24930f0e9974102631a5c69ff8e9f222 to your computer and use it in GitHub Desktop.

Select an option

Save bearpelican/24930f0e9974102631a5c69ff8e9f222 to your computer and use it in GitHub Desktop.
Attaching to program: /home/ubuntu/anaconda3/envs/pytorch_source/bin/python, process 3936
[New LWP 3963]
[New LWP 3966]
[New LWP 3989]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
0x00007ffd67f80b39 in clock_gettime ()
(gdb) bt
#0 0x00007ffd67f80b39 in clock_gettime ()
#1 0x00007f1b4d5f3876 in __GI___clock_gettime (clock_id=4, tp=0x7ffd67ef2780) at ../sysdeps/unix/clock_gettime.c:115
#2 0x00007f1b3dc65c4e in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#3 0x00007f1b3dcf48d3 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#4 0x00007f1b3dc1133c in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#5 0x00007f1b3dc114f8 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#6 0x00007f1b3dc3aaef in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#7 0x00007f1b3dd71536 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#8 0x00007f1b3db4f1c1 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#9 0x00007f1b3db4f458 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#10 0x00007f1b3db4f49e in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#11 0x00007f1b3dca6680 in cuLaunchKernel () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#12 0x00007f1b2296bf52 in cudart::cudaApiLaunchKernelCommon(void const*, dim3, dim3, void**, unsigned long, CUstream_st*, bool) () from /usr/local/cuda/lib64/libnccl.so.2
#13 0x00007f1b2296c147 in cudart::cudaApiLaunchKernel(void const*, dim3, dim3, void**, unsigned long, CUstream_st*) () from /usr/local/cuda/lib64/libnccl.so.2
#14 0x00007f1b229a05ab in cudaLaunchKernel () from /usr/local/cuda/lib64/libnccl.so.2
#15 0x00007f1b229111a5 in ncclBarrierEnqueueWait (comm=0x7f1a9801e510) at misc/enqueue.cu:188
#16 0x00007f1b229049ef in ncclGroupEnd () at misc/group.cu:148
#17 0x00007f1b3bd978d4 in c10d::ProcessGroupNCCL::allreduce(std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&) () from /home/ubuntu/anaconda3/envs/pytorch_source/lib/python3.7/site-packages/torch/_C.cpython-37m-x86_64-linux-gnu.so
#18 0x00007f1b3bbac939 in pybind11::cpp_function::cpp_function<std::shared_ptr<c10d::ProcessGroup::Work>, c10d::ProcessGroup, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&, pybind11::name, pybind11::is_method, pybind11::sibling, pybi
nd11::call_guard<pybind11::gil_scoped_release> >(std::shared_ptr<c10d::ProcessGroup::Work> (c10d::ProcessGroup::*)(std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling c
onst&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(c10d::ProcessGroup*, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&)#1}::operator()(c10d::ProcessGroup*, std::vector<at::Tensor, std::allocator<at::Tensor> >&,
c10d::AllreduceOptions const&) const (args#1=..., args#0=..., c=<optimized out>, __closure=<optimized out>) at /home/ubuntu/pytorch/third_party/pybind11/include/pybind11/pybind11.h:73
#19 pybind11::detail::argument_loader<c10d::ProcessGroup*, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&>::call_impl<std::shared_ptr<c10d::ProcessGroup::Work>, pybind11::cpp_function::cpp_function<std::shared_ptr<c10d::ProcessGroup::
Work>, c10d::ProcessGroup, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(std::shared_ptr<c10d::ProcessGroup::Work> (c10d::ProcessGroup::*)(std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(c10d::ProcessGroup*, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&)#1}&, 0ul, 1ul, 2ul, pybind11::gil_scoped_release>(pybind11::cpp_function::cpp_function<std::shared_ptr<c10d::ProcessGroup::Work>, c10d::ProcessGroup, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(std::shared_ptr<c10d::ProcessGroup::Work> (c10d::ProcessGroup::*)(std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(c10d::ProcessGroup*, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&)#1}&, pybind11::detail::index_sequence<0ul, 1ul, 2ul>, pybind11::gil_scoped_release&&) (f=..., this=0x7ffd67ef3ff0) at /home/ubuntu/pytorch/third_party/pybind11/include/pybind11/cast.h:1919
#20 pybind11::detail::argument_loader<c10d::ProcessGroup*, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&>::call<std::shared_ptr<c10d::ProcessGroup::Work>, pybind11::gil_scoped_release, pybind11::cpp_function::cpp_function<std::shared---Type <return> to continue, or q <return> to quit---
_ptr<c10d::ProcessGroup::Work>, c10d::ProcessGroup, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(std::shared_ptr<c10d::Pro
cessGroup::Work> (c10d::ProcessGroup::*)(std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(c10d::ProcessGroup*, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&)#1}&>(pybind11::cpp_function::cpp_function<std::shared_ptr<c10d::ProcessGroup::Work>, c10d::ProcessGroup, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(std::shared_ptr<c10d::ProcessGroup::Work> (c10d::ProcessGroup::*)(std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(c10d::ProcessGroup*, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&)#1}&) && (f=..., this=0x7ffd67ef3ff0) at /home/ubuntu/pytorch/third_party/pybind11/include/pybind11/cast.h:1896
#21 void pybind11::cpp_function::initialize<pybind11::cpp_function::initialize<std::shared_ptr<c10d::ProcessGroup::Work>, c10d::ProcessGroup, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(std::shared_ptr<c10d::ProcessGroup::Work> (c10d::ProcessGroup::*)(std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(c10d::ProcessGroup*, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&)#1}, std::shared_ptr<c10d::ProcessGroup::Work>, c10d::ProcessGroup*, std::v
ector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(pybind11::cpp_function::initialize<std::shared_ptr<c10d::ProcessGroup::Work>, c10d:
:ProcessGroup, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(std::shared_ptr<c10d::ProcessGroup::Work> (c10d::ProcessGroup:
:*)(std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(c10d::ProcessGroup*, std::vector<at:
:Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&)#1}&&, std::shared_ptr<c10d::ProcessGroup::Work> (*)(c10d::ProcessGroup*, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&), pybind11::name const&, pybind11::is_metho
d const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(pybind11::detail::function_call&)#3}::operator()(pybind11::detail::function_call) const (call=..., __closure=0x0)
at /home/ubuntu/pytorch/third_party/pybind11/include/pybind11/pybind11.h:154
#22 void pybind11::cpp_function::initialize<pybind11::cpp_function::initialize<std::shared_ptr<c10d::ProcessGroup::Work>, c10d::ProcessGroup, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&, pybind11::name, pybind11::is_method, pybind1
1::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(std::shared_ptr<c10d::ProcessGroup::Work> (c10d::ProcessGroup::*)(std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&), pybind11::name const&, pybind11::is_method const&, pyb
ind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(c10d::ProcessGroup*, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&)#1}, std::shared_ptr<c10d::ProcessGroup::Work>, c10d::ProcessGroup*, std::v
ector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(pybind11::cpp_function::initialize<std::shared_ptr<c10d::ProcessGroup::Work>, c10d:
:ProcessGroup, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&, pybind11::name, pybind11::is_method, pybind11::sibling, pybind11::call_guard<pybind11::gil_scoped_release> >(std::shared_ptr<c10d::ProcessGroup::Work> (c10d::ProcessGroup:
:*)(std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(c10d::ProcessGroup*, std::vector<at:
:Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&)#1}&&, std::shared_ptr<c10d::ProcessGroup::Work> (*)(c10d::ProcessGroup*, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllreduceOptions const&), pybind11::name const&, pybind11::is_metho
d const&, pybind11::sibling const&, pybind11::call_guard<pybind11::gil_scoped_release> const&)::{lambda(pybind11::detail::function_call&)#3}::_FUN(pybind11::detail::function_call) () at /home/ubuntu/pytorch/third_party/pybind11/include/pybind11/pybind11.h:132
#23 0x00007f1b3b7ee34c in pybind11::cpp_function::dispatcher (self=<optimized out>, args_in=0x7f1aed043ea0, kwargs_in=0x0) at /home/ubuntu/pytorch/third_party/pybind11/include/pybind11/pybind11.h:619
#24 0x0000555c47249fc4 in _PyMethodDef_RawFastCallKeywords ()
#25 0x0000555c4724a0e1 in _PyCFunction_FastCallKeywords ()
#26 0x0000555c472a66b2 in _PyEval_EvalFrameDefault ()
#27 0x0000555c471e7059 in _PyEval_EvalCodeWithName ()
#28 0x0000555c47249307 in _PyFunction_FastCallKeywords ()
#29 0x0000555c472a2841 in _PyEval_EvalFrameDefault ()
#30 0x0000555c4724907b in _PyFunction_FastCallKeywords ()
#31 0x0000555c472a1a66 in _PyEval_EvalFrameDefault ()
#32 0x0000555c4724907b in _PyFunction_FastCallKeywords ()
#33 0x0000555c472a1a66 in _PyEval_EvalFrameDefault ()
#34 0x0000555c471e7059 in _PyEval_EvalCodeWithName ()
#35 0x0000555c471e7f24 in PyEval_EvalCodeEx ()
#36 0x0000555c471e7f4c in PyEval_EvalCode ()
#37 0x0000555c47300a14 in run_mod ()
#38 0x0000555c47309f11 in PyRun_FileExFlags ()
#39 0x0000555c4730a104 in PyRun_SimpleFileExFlags ()
#40 0x0000555c4730bbbd in pymain_main.constprop ()
#41 0x0000555c4730be30 in _Py_UnixMain ()
#42 0x00007f1b4d4fe830 in __libc_start_main (main=0x555c471c7d20 <main>, argc=4, argv=0x7ffd67ef50e8, init=<optimized out>, fini=<optimized out>, rtld_fini=<optimized out>, stack_end=0x7ffd67ef50d8) at ../csu/libc-start.c:291
#43 0x0000555c472b1052 in _start () at ../sysdeps/x86_64/elf/start.S:103
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment