Created
March 20, 2017 17:40
-
-
Save loren/8402f82159d4b446c1f3031f3ca5eee8 to your computer and use it in GitHub Desktop.
Problem with master re-election under ZenDiscovery
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[2017-03-20T17:16:35,559][TRACE][o.e.d.z.MasterFaultDetection] [master-0-node] [master] [{master-2-node}{3jmeBAXdQlG9McfMB3J3Xg}{Nnyc66TvQCKwyEHQahpwbg}{10.0.0.89}{10.0.0.89:9300}] transport disconnected | |
[2017-03-20T17:16:35,560][DEBUG][o.e.d.z.MasterFaultDetection] [master-0-node] [master] stopping fault detection against master [{master-2-node}{3jmeBAXdQlG9McfMB3J3Xg}{Nnyc66TvQCKwyEHQahpwbg}{10.0.0.89}{10.0.0.89:9300}], reason [master failure, transport disconnected] | |
[2017-03-20T17:16:35,562][INFO ][o.e.d.z.ZenDiscovery ] [master-0-node] master_left [{master-2-node}{3jmeBAXdQlG9McfMB3J3Xg}{Nnyc66TvQCKwyEHQahpwbg}{10.0.0.89}{10.0.0.89:9300}], reason [transport disconnected] | |
[2017-03-20T17:16:35,563][WARN ][o.e.d.z.ZenDiscovery ] [master-0-node] master left (reason = transport disconnected), current nodes: nodes: | |
{master-2-node}{3jmeBAXdQlG9McfMB3J3Xg}{Nnyc66TvQCKwyEHQahpwbg}{10.0.0.89}{10.0.0.89:9300}, master | |
{ingest-0-node}{KuOUQ9bIRDquPo9ksxCOsQ}{zcrLSPVFTF2Xt9_v5IK_1w}{10.0.1.11}{10.0.1.11:1026} | |
{data-1-node}{CJ55YqftQvaUrOKPG07NyQ}{VyNooUSMRsmeRijh_Er_5Q}{10.0.3.84}{10.0.3.84:1027} | |
{coordinator-0-node}{quAH0aNTTSGvBVw-62ghBQ}{mXse27vOQCm5b--n5PEJ7w}{10.0.1.55}{10.0.1.55:1026} | |
{master-0-node}{j4AxqF1PSlOPUYgNCHL1rA}{4JNSDZLlSn2cqzQOgJit0g}{10.0.3.84}{10.0.3.84:9300}, local | |
{master-1-node}{XMiNEGkpTq6Jw7tO81j3Sw}{HX0y__0ATY2SutYvg_r7VA}{10.0.1.23}{10.0.1.23:9300} | |
{data-0-node}{G-nQsIU8TZe4lK3Po0KGBQ}{4U4yPhpvSuCMMoDOirvgbQ}{10.0.1.23}{10.0.1.23:1027} | |
[2017-03-20T17:16:35,564][TRACE][o.e.d.z.NodeJoinController] [master-0-node] starting an election context, will accumulate joins | |
[2017-03-20T17:16:35,565][TRACE][o.e.d.z.ZenDiscovery ] [master-0-node] starting to ping | |
[2017-03-20T17:16:35,571][TRACE][o.e.d.z.UnicastZenPing ] [master-0-node] resolved host [master-0-node.elastic.mesos:9300] to [10.0.3.84:9300] | |
[2017-03-20T17:16:35,571][TRACE][o.e.d.z.UnicastZenPing ] [master-0-node] resolved host [master-1-node.elastic.mesos:9300] to [10.0.1.23:9300] | |
[2017-03-20T17:16:35,571][TRACE][o.e.d.z.UnicastZenPing ] [master-0-node] resolved host [master-2-node.elastic.mesos:9300] to [10.0.0.89:9300] | |
[2017-03-20T17:16:35,572][TRACE][o.e.d.z.UnicastZenPing ] [master-0-node] [2] sending to {master-0-node}{j4AxqF1PSlOPUYgNCHL1rA}{4JNSDZLlSn2cqzQOgJit0g}{10.0.3.84}{10.0.3.84:9300} | |
[2017-03-20T17:16:35,572][TRACE][o.e.d.z.UnicastZenPing ] [master-0-node] [2] sending to {coordinator-0-node}{quAH0aNTTSGvBVw-62ghBQ}{mXse27vOQCm5b--n5PEJ7w}{10.0.1.55}{10.0.1.55:1026} | |
[2017-03-20T17:16:35,572][TRACE][o.e.d.z.UnicastZenPing ] [master-0-node] [2] sending to {data-0-node}{G-nQsIU8TZe4lK3Po0KGBQ}{4U4yPhpvSuCMMoDOirvgbQ}{10.0.1.23}{10.0.1.23:1027} | |
[2017-03-20T17:16:35,573][TRACE][o.e.d.z.UnicastZenPing ] [master-0-node] [2] received response from {master-0-node}{j4AxqF1PSlOPUYgNCHL1rA}{4JNSDZLlSn2cqzQOgJit0g}{10.0.3.84}{10.0.3.84:9300}: [ping_response{node [{data-1-node}{CJ55YqftQvaUrOKPG07NyQ}{VyNooUSMRsmeRijh_Er_5Q}{10.0.3.84}{10.0.3.84:1027}], id[7], master [{master-2-node}{3jmeBAXdQlG9McfMB3J3Xg}{Nnyc66TvQCKwyEHQahpwbg}{10.0.0.89}{10.0.0.89:9300}],cluster_state_version [37], cluster_name[elastic]}, ping_response{node [{data-0-node}{G-nQsIU8TZe4lK3Po0KGBQ}{4U4yPhpvSuCMMoDOirvgbQ}{10.0.1.23}{10.0.1.23:1027}], id[5], master [{master-2-node}{3jmeBAXdQlG9McfMB3J3Xg}{Nnyc66TvQCKwyEHQahpwbg}{10.0.0.89}{10.0.0.89:9300}],cluster_state_version [37], cluster_name[elastic]}, ping_response{node [{coordinator-0-node}{quAH0aNTTSGvBVw-62ghBQ}{mXse27vOQCm5b--n5PEJ7w}{10.0.1.55}{10.0.1.55:1026}], id[7], master [{master-2-node}{3jmeBAXdQlG9McfMB3J3Xg}{Nnyc66TvQCKwyEHQahpwbg}{10.0.0.89}{10.0.0.89:9300}],cluster_state_version [37], cluster_name[elastic]}, ping_response{node [{master-0-node}{j4AxqF1PSlOPUYgNCHL1rA}{4JNSDZLlSn2cqzQOgJit0g}{10.0.3.84}{10.0.3.84:9300}], id[19], master [{master-2-node}{3jmeBAXdQlG9McfMB3J3Xg}{Nnyc66TvQCKwyEHQahpwbg}{10.0.0.89}{10.0.0.89:9300}],cluster_state_version [37], cluster_name[elastic]}, ping_response{node [{master-0-node}{j4AxqF1PSlOPUYgNCHL1rA}{4JNSDZLlSn2cqzQOgJit0g}{10.0.3.84}{10.0.3.84:9300}], id[20], master [{master-2-node}{3jmeBAXdQlG9McfMB3J3Xg}{Nnyc66TvQCKwyEHQahpwbg}{10.0.0.89}{10.0.0.89:9300}],cluster_state_version [37], cluster_name[elastic]}] | |
[2017-03-20T17:16:35,573][TRACE][o.e.d.z.UnicastZenPing ] [master-0-node] [2] opening connection to [{master-2-node}{3jmeBAXdQlG9McfMB3J3Xg}{Nnyc66TvQCKwyEHQahpwbg}{10.0.0.89}{10.0.0.89:9300}] | |
[2017-03-20T17:16:35,570][WARN ][o.e.c.NodeConnectionsService] [master-0-node] failed to connect to node {master-2-node}{3jmeBAXdQlG9McfMB3J3Xg}{Nnyc66TvQCKwyEHQahpwbg}{10.0.0.89}{10.0.0.89:9300} (tried [1] times) | |
org.elasticsearch.transport.ConnectTransportException: [master-2-node][10.0.0.89:9300] connect_timeout[30s] | |
at org.elasticsearch.transport.netty4.Netty4Transport.connectToChannels(Netty4Transport.java:370) ~[?:?] | |
at org.elasticsearch.transport.TcpTransport.openConnection(TcpTransport.java:495) ~[elasticsearch-5.2.2.jar:5.2.2] | |
at org.elasticsearch.transport.TcpTransport.connectToNode(TcpTransport.java:460) ~[elasticsearch-5.2.2.jar:5.2.2] | |
at org.elasticsearch.transport.TransportService.connectToNode(TransportService.java:318) ~[elasticsearch-5.2.2.jar:5.2.2] | |
at org.elasticsearch.transport.TransportService.connectToNode(TransportService.java:305) ~[elasticsearch-5.2.2.jar:5.2.2] | |
at org.elasticsearch.cluster.NodeConnectionsService.validateNodeConnected(NodeConnectionsService.java:121) [elasticsearch-5.2.2.jar:5.2.2] | |
at org.elasticsearch.cluster.NodeConnectionsService.connectToNodes(NodeConnectionsService.java:87) [elasticsearch-5.2.2.jar:5.2.2] | |
at org.elasticsearch.cluster.service.ClusterService.publishAndApplyChanges(ClusterService.java:775) [elasticsearch-5.2.2.jar:5.2.2] | |
at org.elasticsearch.cluster.service.ClusterService.runTasks(ClusterService.java:628) [elasticsearch-5.2.2.jar:5.2.2] | |
at org.elasticsearch.cluster.service.ClusterService$UpdateTask.run(ClusterService.java:1112) [elasticsearch-5.2.2.jar:5.2.2] | |
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:527) [elasticsearch-5.2.2.jar:5.2.2] | |
at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.runAndClean(PrioritizedEsThreadPoolExecutor.java:238) [elasticsearch-5.2.2.jar:5.2.2] | |
at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.run(PrioritizedEsThreadPoolExecutor.java:201) [elasticsearch-5.2.2.jar:5.2.2] | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) [?:1.8.0_112] | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [?:1.8.0_112] | |
at java.lang.Thread.run(Thread.java:745) [?:1.8.0_112] | |
Caused by: io.netty.channel.AbstractChannel$AnnotatedConnectException: Connection refused: 10.0.0.89/10.0.0.89:9300 | |
at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method) ~[?:?] | |
at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717) ~[?:?] | |
at io.netty.channel.socket.nio.NioSocketChannel.doFinishConnect(NioSocketChannel.java:346) ~[?:?] | |
at io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.finishConnect(AbstractNioChannel.java:340) ~[?:?] | |
at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:630) ~[?:?] | |
at io.netty.channel.nio.NioEventLoop.processSelectedKeysPlain(NioEventLoop.java:527) ~[?:?] | |
at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:481) ~[?:?] | |
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:441) ~[?:?] | |
at io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:858) ~[?:?] | |
... 1 more | |
[2017-03-20T17:16:35,575][TRACE][o.e.d.z.UnicastZenPing ] [master-0-node] [2] sending to {master-1-node}{XMiNEGkpTq6Jw7tO81j3Sw}{HX0y__0ATY2SutYvg_r7VA}{10.0.1.23}{10.0.1.23:9300} | |
[2017-03-20T17:16:35,575][TRACE][o.e.d.z.UnicastZenPing ] [master-0-node] [2] received response from {coordinator-0-node}{quAH0aNTTSGvBVw-62ghBQ}{mXse27vOQCm5b--n5PEJ7w}{10.0.1.55}{10.0.1.55:1026}: [ping_response{node [{master-0-node}{j4AxqF1PSlOPUYgNCHL1rA}{4JNSDZLlSn2cqzQOgJit0g}{10.0.3.84}{10.0.3.84:9300}], id[19], master [{master-2-node}{3jmeBAXdQlG9McfMB3J3Xg}{Nnyc66TvQCKwyEHQahpwbg}{10.0.0.89}{10.0.0.89:9300}],cluster_state_version [37], cluster_name[elastic]}, ping_response{node [{coordinator-0-node}{quAH0aNTTSGvBVw-62ghBQ}{mXse27vOQCm5b--n5PEJ7w}{10.0.1.55}{10.0.1.55:1026}], id[8], master [null],cluster_state_version [37], cluster_name[elastic]}] | |
[2017-03-20T17:16:35,576][TRACE][o.e.d.z.UnicastZenPing ] [master-0-node] [2] sending to {data-1-node}{CJ55YqftQvaUrOKPG07NyQ}{VyNooUSMRsmeRijh_Er_5Q}{10.0.3.84}{10.0.3.84:1027} | |
[2017-03-20T17:16:35,577][TRACE][o.e.d.z.UnicastZenPing ] [master-0-node] [2] received response from {data-0-node}{G-nQsIU8TZe4lK3Po0KGBQ}{4U4yPhpvSuCMMoDOirvgbQ}{10.0.1.23}{10.0.1.23:1027}: [ping_response{node [{master-0-node}{j4AxqF1PSlOPUYgNCHL1rA}{4JNSDZLlSn2cqzQOgJit0g}{10.0.3.84}{10.0.3.84:9300}], id[19], master [{master-2-node}{3jmeBAXdQlG9McfMB3J3Xg}{Nnyc66TvQCKwyEHQahpwbg}{10.0.0.89}{10.0.0.89:9300}],cluster_state_version [37], cluster_name[elastic]}, ping_response{node [{data-0-node}{G-nQsIU8TZe4lK3Po0KGBQ}{4U4yPhpvSuCMMoDOirvgbQ}{10.0.1.23}{10.0.1.23:1027}], id[6], master [null],cluster_state_version [37], cluster_name[elastic]}] | |
[2017-03-20T17:16:35,577][TRACE][o.e.d.z.UnicastZenPing ] [master-0-node] [2] failed to ping {master-2-node}{3jmeBAXdQlG9McfMB3J3Xg}{Nnyc66TvQCKwyEHQahpwbg}{10.0.0.89}{10.0.0.89:9300} | |
org.elasticsearch.transport.ConnectTransportException: [master-2-node][10.0.0.89:9300] connect_timeout[3s] | |
at org.elasticsearch.transport.netty4.Netty4Transport.connectToChannels(Netty4Transport.java:370) ~[?:?] | |
at org.elasticsearch.transport.TcpTransport.openConnection(TcpTransport.java:495) ~[elasticsearch-5.2.2.jar:5.2.2] | |
at org.elasticsearch.transport.TcpTransport.openConnection(TcpTransport.java:113) ~[elasticsearch-5.2.2.jar:5.2.2] | |
at org.elasticsearch.transport.TransportService.openConnection(TransportService.java:331) ~[elasticsearch-5.2.2.jar:5.2.2] | |
at org.elasticsearch.discovery.zen.UnicastZenPing$PingingRound.getOrConnect(UnicastZenPing.java:391) ~[elasticsearch-5.2.2.jar:5.2.2] | |
at org.elasticsearch.discovery.zen.UnicastZenPing$3.doRun(UnicastZenPing.java:500) [elasticsearch-5.2.2.jar:5.2.2] | |
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:596) [elasticsearch-5.2.2.jar:5.2.2] | |
at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) [elasticsearch-5.2.2.jar:5.2.2] | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) [?:1.8.0_112] | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [?:1.8.0_112] | |
at java.lang.Thread.run(Thread.java:745) [?:1.8.0_112] | |
Caused by: io.netty.channel.AbstractChannel$AnnotatedConnectException: Connection refused: 10.0.0.89/10.0.0.89:9300 | |
at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method) ~[?:?] | |
at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717) ~[?:?] | |
at io.netty.channel.socket.nio.NioSocketChannel.doFinishConnect(NioSocketChannel.java:346) ~[?:?] | |
at io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.finishConnect(AbstractNioChannel.java:340) ~[?:?] | |
at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:630) ~[?:?] | |
at io.netty.channel.nio.NioEventLoop.processSelectedKeysPlain(NioEventLoop.java:527) ~[?:?] | |
at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:481) ~[?:?] | |
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:441) ~[?:?] | |
at io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:858) ~[?:?] | |
... 1 more | |
[2017-03-20T17:16:35,582][TRACE][o.e.d.z.UnicastZenPing ] [master-0-node] [2] received response from {master-1-node}{XMiNEGkpTq6Jw7tO81j3Sw}{HX0y__0ATY2SutYvg_r7VA}{10.0.1.23}{10.0.1.23:9300}: [ping_response{node [{data-0-node}{G-nQsIU8TZe4lK3Po0KGBQ}{4U4yPhpvSuCMMoDOirvgbQ}{10.0.1.23}{10.0.1.23:1027}], id[5], master [{master-2-node}{3jmeBAXdQlG9McfMB3J3Xg}{Nnyc66TvQCKwyEHQahpwbg}{10.0.0.89}{10.0.0.89:9300}],cluster_state_version [37], cluster_name[elastic]}, ping_response{node [{coordinator-0-node}{quAH0aNTTSGvBVw-62ghBQ}{mXse27vOQCm5b--n5PEJ7w}{10.0.1.55}{10.0.1.55:1026}], id[7], master [{master-2-node}{3jmeBAXdQlG9McfMB3J3Xg}{Nnyc66TvQCKwyEHQahpwbg}{10.0.0.89}{10.0.0.89:9300}],cluster_state_version [37], cluster_name[elastic]}, ping_response{node [{data-1-node}{CJ55YqftQvaUrOKPG07NyQ}{VyNooUSMRsmeRijh_Er_5Q}{10.0.3.84}{10.0.3.84:1027}], id[7], master [{master-2-node}{3jmeBAXdQlG9McfMB3J3Xg}{Nnyc66TvQCKwyEHQahpwbg}{10.0.0.89}{10.0.0.89:9300}],cluster_state_version [37], cluster_name[elastic]}, ping_response{node [{master-0-node}{j4AxqF1PSlOPUYgNCHL1rA}{4JNSDZLlSn2cqzQOgJit0g}{10.0.3.84}{10.0.3.84:9300}], id[19], master [{master-2-node}{3jmeBAXdQlG9McfMB3J3Xg}{Nnyc66TvQCKwyEHQahpwbg}{10.0.0.89}{10.0.0.89:9300}],cluster_state_version [37], cluster_name[elastic]}, ping_response{node [{master-1-node}{XMiNEGkpTq6Jw7tO81j3Sw}{HX0y__0ATY2SutYvg_r7VA}{10.0.1.23}{10.0.1.23:9300}], id[27], master [{master-2-node}{3jmeBAXdQlG9McfMB3J3Xg}{Nnyc66TvQCKwyEHQahpwbg}{10.0.0.89}{10.0.0.89:9300}],cluster_state_version [37], cluster_name[elastic]}] | |
[2017-03-20T17:16:35,583][TRACE][o.e.d.z.UnicastZenPing ] [master-0-node] [2] received response from {data-1-node}{CJ55YqftQvaUrOKPG07NyQ}{VyNooUSMRsmeRijh_Er_5Q}{10.0.3.84}{10.0.3.84:1027}: [ping_response{node [{master-0-node}{j4AxqF1PSlOPUYgNCHL1rA}{4JNSDZLlSn2cqzQOgJit0g}{10.0.3.84}{10.0.3.84:9300}], id[19], master [{master-2-node}{3jmeBAXdQlG9McfMB3J3Xg}{Nnyc66TvQCKwyEHQahpwbg}{10.0.0.89}{10.0.0.89:9300}],cluster_state_version [37], cluster_name[elastic]}, ping_response{node [{data-1-node}{CJ55YqftQvaUrOKPG07NyQ}{VyNooUSMRsmeRijh_Er_5Q}{10.0.3.84}{10.0.3.84:1027}], id[8], master [null],cluster_state_version [37], cluster_name[elastic]}] | |
[2017-03-20T17:16:36,262][INFO ][o.e.n.Node ] [master-0-node] stopping ... | |
[2017-03-20T17:16:36,267][TRACE][o.e.d.z.ZenDiscovery ] [master-0-node] pingAndWait interrupted | |
[2017-03-20T17:16:36,267][TRACE][o.e.d.z.ZenDiscovery ] [master-0-node] full ping responses: {none} | |
[2017-03-20T17:16:36,267][DEBUG][o.e.d.z.ZenDiscovery ] [master-0-node] filtered ping responses: (ignore_non_masters [false]) | |
--> ping_response{node [{master-0-node}{j4AxqF1PSlOPUYgNCHL1rA}{4JNSDZLlSn2cqzQOgJit0g}{10.0.3.84}{10.0.3.84:9300}], id[23], master [null],cluster_state_version [37], cluster_name[elastic]} | |
[2017-03-20T17:16:36,267][TRACE][o.e.d.z.ZenDiscovery ] [master-0-node] not enough master nodes [[Candidate{node={master-0-node}{j4AxqF1PSlOPUYgNCHL1rA}{4JNSDZLlSn2cqzQOgJit0g}{10.0.3.84}{10.0.3.84:9300}, clusterStateVersion=37}]] | |
[2017-03-20T17:16:36,268][TRACE][o.e.d.z.ZenDiscovery ] [master-0-node] thread is no longer in currentJoinThread. Stopping. | |
[2017-03-20T17:16:36,280][INFO ][c.a.e.s.StatsdService ] [master-0-node] StatsD reporter stopped | |
[2017-03-20T17:16:36,280][ERROR][c.a.e.s.StatsdService ] [master-0-node] Exiting StatsdReporterThread | |
[2017-03-20T17:16:36,281][INFO ][o.e.n.Node ] [master-0-node] stopped | |
[2017-03-20T17:16:36,282][INFO ][o.e.n.Node ] [master-0-node] closing ... | |
[2017-03-20T17:16:36,358][INFO ][o.e.n.Node ] [master-0-node] closed |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment