Created
December 11, 2020 23:17
-
-
Save mfitton/244f118d3aeead26496b23f30f86763c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Iteration 1023: | |
- Iteration time: 9.326589822769165. | |
- Absolute time: 1607723877.3213854. | |
- Total elapsed time: 7786.904662370682. | |
2020-12-11 21:58:03,954 WARNING services.py:1640 -- WARNING: The object store is using /tmp instead of /dev/shm because /dev/shm has only 67108864 bytes available. This may slow down performance! You may be able to free up space by deleting files in /dev/shm or terminating any running plasma_store_server processes. If you are inside a Docker container, you may need to pass an argument with the flag '--shm-size' to 'docker run'. | |
(pid=raylet) E1211 21:58:05.133839404 73164 server_chttp2.cc:40] {"created":"@1607723885.133752839","description":"No address added out of total 1 resolved","file":"external/com_github_grpc_grpc/src/core/ext/transport/chttp2/server/chttp2_server.cc","file_line":394,"referenced_errors":[{"created":"@1607723885.133750775","description":"Failed to add any wildcard listeners","file":"external/com_github_grpc_grpc/src/core/lib/iomgr/tcp_server_posix.cc","file_line":340,"referenced_errors":[{"created":"@1607723885.133733031","description":"Unable to configure socket","fd":58,"file":"external/com_github_grpc_grpc/src/core/lib/iomgr/tcp_server_utils_posix_common.cc","file_line":207,"referenced_errors":[{"created":"@1607723885.133724833","description":"Address already in use","errno":98,"file":"external/com_github_grpc_grpc/src/core/lib/iomgr/tcp_server_utils_posix_common.cc","file_line":181,"os_error":"Address already in use","syscall":"bind"}]},{"created":"@1607723885.133750314","description":"Unable to configure socket","fd":58,"file":"external/com_github_grpc_grpc/src/core/lib/iomgr/tcp_server_utils_posix_common.cc","file_line":207,"referenced_errors":[{"created":"@1607723885.133746424","description":"Address already in use","errno":98,"file":"external/com_github_grpc_grpc/src/core/lib/iomgr/tcp_server_utils_posix_common.cc","file_line":181,"os_error":"Address already in use","syscall":"bind"}]}]}]} | |
(pid=raylet) [2020-12-11 21:58:05,133 E 73164 73164] logging.cc:414: *** Aborted at 1607723885 (unix time) try "date -d @1607723885" if you are using GNU date *** | |
(pid=raylet) [2020-12-11 21:58:05,134 E 73164 73164] logging.cc:414: PC: @ 0x0 (unknown) | |
(pid=raylet) [2020-12-11 21:58:05,134 E 73164 73164] logging.cc:414: *** SIGSEGV (@0x58) received by PID 73164 (TID 0x7f64441bc800) from PID 88; stack trace: *** | |
(pid=raylet) [2020-12-11 21:58:05,134 E 73164 73164] logging.cc:414: @ 0x5571527938bf google::(anonymous namespace)::FailureSignalHandler() | |
(pid=raylet) [2020-12-11 21:58:05,134 E 73164 73164] logging.cc:414: @ 0x7f644471e3c0 (unknown) | |
(pid=raylet) [2020-12-11 21:58:05,135 E 73164 73164] logging.cc:414: @ 0x557152821569 grpc::ServerInterface::RegisteredAsyncRequest::IssueRequest() | |
(pid=raylet) [2020-12-11 21:58:05,135 E 73164 73164] logging.cc:414: @ 0x5571522cad5a ray::rpc::NodeManagerService::WithAsyncMethod_RequestWorkerLease<>::RequestRequestWorkerLease() | |
(pid=raylet) [2020-12-11 21:58:05,136 E 73164 73164] logging.cc:414: @ 0x5571522ee6d5 ray::rpc::ServerCallFactoryImpl<>::CreateCall() | |
(pid=raylet) [2020-12-11 21:58:05,136 E 73164 73164] logging.cc:414: @ 0x5571526d7649 ray::rpc::GrpcServer::Run() | |
(pid=raylet) [2020-12-11 21:58:05,137 E 73164 73164] logging.cc:414: @ 0x55715236103e ray::raylet::NodeManager::NodeManager() | |
(pid=raylet) [2020-12-11 21:58:05,137 E 73164 73164] logging.cc:414: @ 0x5571522f1f73 ray::raylet::Raylet::Raylet() | |
(pid=raylet) [2020-12-11 21:58:05,137 E 73164 73164] logging.cc:414: @ 0x5571522c64cd _ZZ4mainENKUlN3ray6StatusEN5boost8optionalISt13unordered_mapISsSsSt4hashISsESt8equal_toISsESaISt4pairIKSsSsEEEEEE_clES0_SD_ | |
(pid=raylet) [2020-12-11 21:58:05,137 E 73164 73164] logging.cc:414: @ 0x5571522c7516 _ZNSt17_Function_handlerIFvN3ray6StatusERKN5boost8optionalISt13unordered_mapISsSsSt4hashISsESt8equal_toISsESaISt4pairIKSsSsEEEEEEZ4mainEUlS1_SE_E_E9_M_invokeERKSt9_Any_dataOS1_SG_ | |
(pid=raylet) [2020-12-11 21:58:05,137 E 73164 73164] logging.cc:414: @ 0x5571524751df _ZZN3ray3gcs28ServiceBasedNodeInfoAccessor22AsyncGetInternalConfigERKSt8functionIFvNS_6StatusERKN5boost8optionalISt13unordered_mapISsSsSt4hashISsESt8equal_toISsESaISt4pairIKSsSsEEEEEEEENKUlRKS3_RKNS_3rpc22GetInternalConfigReplyEE_clESO_SS_ | |
(pid=raylet) [2020-12-11 21:58:05,138 E 73164 73164] logging.cc:414: @ 0x557152430e41 _ZNSt17_Function_handlerIFvRKN3ray6StatusERKNS0_3rpc22GetInternalConfigReplyEEZNS4_12GcsRpcClient17GetInternalConfigERKNS4_24GetInternalConfigRequestERKSt8functionIS8_EEUlS3_S7_E_E9_M_invokeERKSt9_Any_dataS3_S7_ | |
(pid=raylet) [2020-12-11 21:58:05,139 E 73164 73164] logging.cc:414: @ 0x557152434b8f ray::rpc::ClientCallImpl<>::OnReplyReceived() | |
(pid=raylet) [2020-12-11 21:58:05,139 E 73164 73164] logging.cc:414: @ 0x557152342502 _ZN5boost4asio6detail18completion_handlerIZN3ray3rpc17ClientCallManager29PollEventsFromCompletionQueueEiEUlvE_E11do_completeEPvPNS1_19scheduler_operationERKNS_6system10error_codeEm | |
(pid=raylet) [2020-12-11 21:58:05,140 E 73164 73164] logging.cc:414: @ 0x557152ac7f61 boost::asio::detail::scheduler::do_run_one() | |
(pid=raylet) [2020-12-11 21:58:05,142 E 73164 73164] logging.cc:414: @ 0x557152ac9609 boost::asio::detail::scheduler::run() | |
(pid=raylet) [2020-12-11 21:58:05,142 E 73164 73164] logging.cc:414: @ 0x557152acbaf7 boost::asio::io_context::run() | |
(pid=raylet) [2020-12-11 21:58:05,143 E 73164 73164] logging.cc:414: @ 0x5571522a1757 main | |
(pid=raylet) [2020-12-11 21:58:05,143 E 73164 73164] logging.cc:414: @ 0x7f64441e80b3 __libc_start_main | |
(pid=raylet) [2020-12-11 21:58:05,144 E 73164 73164] logging.cc:414: @ 0x5571522b7a55 (unknown) | |
Traceback (most recent call last): | |
File "./workloads/node_failures.py", line 58, in <module> | |
cluster.add_node() | |
File "/root/anaconda3/lib/python3.7/site-packages/ray/cluster_utils.py", line 116, in add_node | |
self._wait_for_node(node) | |
File "/root/anaconda3/lib/python3.7/site-packages/ray/cluster_utils.py", line 172, in _wait_for_node | |
raise TimeoutError("Timed out while waiting for nodes to join.") | |
TimeoutError: Timed out while waiting for nodes to join. | |
2020-12-11 21:58:35,454 ERROR import_thread.py:89 -- ImportThread: Connection closed by server. | |
2020-12-11 21:58:35,455 ERROR worker.py:940 -- print_logs: Connection closed by server. | |
2020-12-11 21:58:35,455 ERROR worker.py:1034 -- listen_error_messages_raylet: Connection closed by server. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment