Created
January 20, 2015 00:13
-
-
Save apple-corps/6ddadefd8e1e058df2cc to your computer and use it in GitHub Desktop.
Hadoop namenode failure
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2015-01-19 06:53:50,446 WARN org.apache.hadoop.ipc.Server: IPC Server handler 0 on 8020, call org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol.blockReport from 10.51.28.157:37644 Call#8104114 Retry#0: error: java.lang.OutOfMemoryError: Java heap space | |
java.lang.OutOfMemoryError: Java heap space | |
at org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolServerSideTranslatorPB.blockReport(DatanodeProtocolServerSideTranslatorPB.java:144) | |
at org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos$DatanodeProtocolService$2.callBlockingMethod(DatanodeProtocolProtos.java:28061) | |
at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:585) | |
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1026) | |
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1986) | |
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1982) | |
at java.security.AccessController.doPrivileged(Native Method) | |
at javax.security.auth.Subject.doAs(Subject.java:415) | |
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548) | |
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1980) | |
2015-01-19 06:53:50,446 WARN org.apache.hadoop.ipc.Server: IPC Server handler 0 on 8020, call org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol.blockReport from 10.51.28.157:37644 Call#8104114 Retry#0: error: java.lang.OutOfMemoryError: Java heap space | |
java.lang.OutOfMemoryError: Java heap space | |
at org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolServerSideTranslatorPB.blockReport(DatanodeProtocolServerSideTranslatorPB.java:144) | |
at org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos$DatanodeProtocolService$2.callBlockingMethod(DatanodeProtocolProtos.java:28061) | |
at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:585) | |
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1026) | |
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1986) | |
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1982) | |
at java.security.AccessController.doPrivileged(Native Method) | |
at javax.security.auth.Subject.doAs(Subject.java:415) | |
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548) | |
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1980) | |
015-01-19 07:11:32,742 WARN org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager: Remote journal 10.51.28.143:8485 failed to write txns 49831378-49831380. Will try to write to this JN again after the next log roll. | |
org.apache.hadoop.ipc.RemoteException(java.io.IOException): IPC's epoch 6 is less than the last promised epoch 7 | |
at org.apache.hadoop.hdfs.qjournal.server.Journal.checkRequest(Journal.java:412) | |
at org.apache.hadoop.hdfs.qjournal.server.Journal.checkWriteRequest(Journal.java:440) | |
at org.apache.hadoop.hdfs.qjournal.server.Journal.journal(Journal.java:340) | |
at org.apache.hadoop.hdfs.qjournal.server.JournalNodeRpcServer.journal(JournalNodeRpcServer.java:148) | |
at org.apache.hadoop.hdfs.qjournal.protocolPB.QJournalProtocolServerSideTranslatorPB.journal(QJournalProtocolServerSideTranslatorPB.java:157) | |
at org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos$QJournalProtocolService$2.callBlockingMethod(QJournalProtocolProtos.java:25299) | |
at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:585) | |
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1026) | |
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1986) | |
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1982) | |
at java.security.AccessController.doPrivileged(Native Method) | |
at javax.security.auth.Subject.doAs(Subject.java:415) | |
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548) | |
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1980) | |
at org.apache.hadoop.ipc.Client.call(Client.java:1409) | |
at org.apache.hadoop.ipc.Client.call(Client.java:1362) | |
at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:206) | |
at com.sun.proxy.$Proxy8.journal(Unknown Source) | |
at org.apache.hadoop.hdfs.qjournal.protocolPB.QJournalProtocolTranslatorPB.journal(QJournalProtocolTranslatorPB.java:167) | |
at org.apache.hadoop.hdfs.qjournal.client.IPCLoggerChannel$7.call(IPCLoggerChannel.java:358) | |
at org.apache.hadoop.hdfs.qjournal.client.IPCLoggerChannel$7.call(IPCLoggerChannel.java:351) | |
at java.util.concurrent.FutureTask.run(FutureTask.java:262) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) | |
at java.lang.Thread.run(Thread.java:745) | |
2015-01-19 07:11:32,745 WARN org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager: Remote journal 10.51.28.142:8485 failed to write txns 49831378-49831380. Will try to write to this JN again after the next log roll. | |
org.apache.hadoop.ipc.RemoteException(java.io.IOException): IPC's epoch 6 is less than the last promised epoch 7 | |
at org.apache.hadoop.hdfs.qjournal.server.Journal.checkRequest(Journal.java:412) | |
at org.apache.hadoop.hdfs.qjournal.server.Journal.checkWriteRequest(Journal.java:440) | |
at org.apache.hadoop.hdfs.qjournal.server.Journal.journal(Journal.java:340) | |
at org.apache.hadoop.hdfs.qjournal.server.JournalNodeRpcServer.journal(JournalNodeRpcServer.java:148) | |
at org.apache.hadoop.hdfs.qjournal.protocolPB.QJournalProtocolServerSideTranslatorPB.journal(QJournalProtocolServerSideTranslatorPB.java:157) | |
at org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos$QJournalProtocolService$2.callBlockingMethod(QJournalProtocolProtos.java:25299) | |
at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:585) | |
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1026) | |
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1986) | |
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1982) | |
at java.security.AccessController.doPrivileged(Native Method) | |
at javax.security.auth.Subject.doAs(Subject.java:415) | |
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548) | |
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1980) | |
at org.apache.hadoop.ipc.Client.call(Client.java:1409) | |
at org.apache.hadoop.ipc.Client.call(Client.java:1362) | |
at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:206) | |
at com.sun.proxy.$Proxy8.journal(Unknown Source) | |
at org.apache.hadoop.hdfs.qjournal.protocolPB.QJournalProtocolTranslatorPB.journal(QJournalProtocolTranslatorPB.java:167) | |
at org.apache.hadoop.hdfs.qjournal.client.IPCLoggerChannel$7.call(IPCLoggerChannel.java:358) | |
at org.apache.hadoop.hdfs.qjournal.client.IPCLoggerChannel$7.call(IPCLoggerChannel.java:351) | |
at java.util.concurrent.FutureTask.run(FutureTask.java:262) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) | |
at java.lang.Thread.run(Thread.java:745) | |
2015-01-19 07:11:32,745 WARN org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager: Remote journal 10.51.28.144:8485 failed to write txns 49831378-49831380. Will try to write to this JN again after the next log roll. | |
org.apache.hadoop.ipc.RemoteException(java.io.IOException): IPC's epoch 6 is less than the last promised epoch 7 | |
at org.apache.hadoop.hdfs.qjournal.server.Journal.checkRequest(Journal.java:412) | |
at org.apache.hadoop.hdfs.qjournal.server.Journal.checkWriteRequest(Journal.java:440) | |
at org.apache.hadoop.hdfs.qjournal.server.Journal.journal(Journal.java:340) | |
at org.apache.hadoop.hdfs.qjournal.server.JournalNodeRpcServer.journal(JournalNodeRpcServer.java:148) | |
at org.apache.hadoop.hdfs.qjournal.protocolPB.QJournalProtocolServerSideTranslatorPB.journal(QJournalProtocolServerSideTranslatorPB.java:157) | |
at org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos$QJournalProtocolService$2.callBlockingMethod(QJournalProtocolProtos.java:25299) | |
2015-01-19 07:56:27,476 WARN org.apache.hadoop.ha.HealthMonitor: Transport-level exception trying to monitor health of NameNode at us3sm2nn010r07.comp.prod.local/10.51.28.140:8020: Call From us3sm2nn010r07.comp.prod.local/10.51.28.140 to us3sm2nn010r07.comp.prod.local:8020 failed on socket timeout exception: java.net.SocketTimeoutException: 45000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/10.51.28.140:43635 remote=us3sm2nn010r07.comp.prod.local/10.51.28.140:8020]; For more details see: http://wiki.apache.org/hadoop/SocketTimeout | |
2015-01-19 07:56:27,476 INFO org.apache.hadoop.ha.HealthMonitor: Entering state SERVICE_NOT_RESPONDING | |
2015-01-19 07:56:27,477 INFO org.apache.hadoop.ha.ZKFailoverController: Local service NameNode at us3sm2nn010r07.comp.prod.local/10.51.28.140:8020 entered state: SERVICE_NOT_RESPONDING | |
2015-01-19 07:56:27,477 INFO org.apache.hadoop.ha.ZKFailoverController: Quitting master election for NameNode at us3sm2nn010r07.comp.prod.local/10.51.28.140:8020 and marking that fencing is necessary | |
2015-01-19 07:56:27,477 INFO org.apache.hadoop.ha.ActiveStandbyElector: Yielding from election | |
2015-01-19 07:56:27,487 INFO org.apache.zookeeper.ZooKeeper: Session: 0x347fa4cd4270022 closed | |
2015-01-19 07:56:27,487 INFO org.apache.zookeeper.ClientCnxn: EventThread shut down | |
2015-01-19 07:57:13,538 WARN org.apache.hadoop.ha.HealthMonitor: Transport-level exception trying to monitor health of NameNode at us3sm2nn010r07.comp.prod.local/10.51.28.140:8020: Call From us3sm2nn010r07.comp.prod.local/10.51.28.140 to us3sm2nn010r07.comp.prod.local:8020 failed on socket timeout exception: java.net.SocketTimeoutException: 45000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/10.51.28.140:43639 remote=us3sm2nn010r07.comp.prod.local/10.51.28.140:8020]; For more details see: http://wiki.apache.org/hadoop/SocketTimeout | |
2015-01-19 07:57:59,589 WARN org.apache.hadoop.ha.HealthMonitor: Transport-level exception trying to monitor health of NameNode at us3sm2nn010r07.comp.prod.local/10.51.28.140:8020: Call From us3sm2nn010r07.comp.prod.local/10.51.28.140 to us3sm2nn010r07.comp.prod.local:8020 failed on socket timeout exception: java.net.SocketTimeoutException: 45000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/10.51.28.140:43643 remote=us3sm2nn010r07.comp.prod.local/10.51.28.140:8020]; For more details see: http://wiki.apache.org/hadoop/SocketTimeout | |
2015-01-19 07:58:45,625 WARN org.apache.hadoop.ha.HealthMonitor: Transport-level exception trying to monitor health of NameNode at us3sm2nn010r07.comp.prod.local/10.51.28.140:8020: Call From us3sm2nn010r07.comp.prod.local/10.51.28.140 to us3sm2nn010r07.comp.prod.local:8020 failed on socket timeout exception: java.net.SocketTimeoutException: 45000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/10.51.28.140:43664 remote=us3sm2nn010r07.comp.prod.local/10.51.28.140:8020]; For more details see: http://wiki.apache.org/hadoop/SocketTimeout | |
2015-01-19 07:59:31,675 WARN org.apache.hadoop.ha.HealthMonitor: Transport-level exception trying to monitor health of NameNode at us3sm2nn010r07.comp.prod.local/10.51.28.140:8020: Call From us3sm2nn010r07.comp.prod.local/10.51.28.140 to us3sm2nn010r07.comp.prod.local:8020 failed on socket timeout exception: java.net.SocketTimeoutException: 45000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/10.51.28.140:43669 remote=us3sm2nn010r07.comp.prod.local/10.51.28.140:8020]; For more details see: http://wiki.apache.org/hadoop/SocketTimeout | |
2015-01-19 08:00:17,726 WARN org.apache.hadoop.ha.HealthMonitor: Transport-level exception trying to monitor health of NameNode at us3sm2nn010r07.comp.prod.local/10.51.28.140:8020: Call From us3sm2nn010r07.comp.prod.local/10.51.28.140 to us3sm2nn010r07.comp.prod.local:8020 failed on socket timeout exception: java.net.SocketTimeoutException: 45000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/10.51.28.140:43674 remote=us3sm2nn010r07.comp.prod.local/10.51.28.140:8020]; For more details see: http://wiki.apache.org/hadoop/SocketTimeout | |
2015-01-19 08:01:03,779 WARN org.apache.hadoop.ha.HealthMonitor: Transport-level exception trying to monitor health of NameNode at us3sm2nn010r07.comp.prod.local/10.51.28.140:8020: Call From us3sm2nn010r07.comp.prod.local/10.51.28.140 to us3sm2nn010r07.comp.prod.local:8020 failed on socket timeout exception: java.net.SocketTimeoutException: 45000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/10.51.28.140:43680 remote=us3sm2nn010r07.comp.prod.local/10.51.28.140:8020]; For more details see: http://wiki.apache.org/hadoop/SocketTimeout | |
2015-01-19 08:01:49,795 WARN org.apache.hadoop.ha.HealthMonitor: Transport-level exception trying to monitor health of NameNode at us3sm2nn010r07.comp.prod.local/10.51.28.140:8020: Call From us3sm2nn010r07.comp.prod.local/10.51.28.140 to us3sm2nn010r07.comp.prod.local:8020 failed on socket timeout exception: java.net.SocketTimeoutException: 45000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/10.51.28.140:43684 remote=us3sm2nn010r07.comp.prod.local/10.51.28.140:8020]; For more details see: http://wiki.apache.org/hadoop/SocketTimeout | |
2015-01-19 08:02:35,841 WARN org.apache.hadoop.ha.HealthMonitor: Transport-level exception trying to monitor health of NameNode at us3sm2nn010r07.comp.prod.local/10.51.28.140:8020: Call From us3sm2nn010r07.comp.prod.local/10.51.28.140 to us3sm2nn010r07.comp.prod.local:8020 failed on socket timeout exception: java.net.SocketTimeoutException: 45000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/10.51.28.140:43692 remote=us3sm2nn010r07.comp.prod.local/10.51.28.140:8020]; For more details see: http://wiki.apache.org/hadoop/SocketTimeout | |
2015-01-19 08:03:21,891 WARN org.apache.hadoop.ha.HealthMonitor: Transport-level exception trying to monitor health of NameNode at us3sm2nn010r07.comp.prod.local/10.51.28.140:8020: Call From us3sm2nn010r07.comp.prod.local/10.51.28.140 to us3sm2nn010r07.comp.prod.local:8020 failed on socket timeout exception: java.net.SocketTimeoutException: 45000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/10.51.28.140:43704 remote=us3sm2nn010r07.comp.prod.local/10.51.28.140:8020]; For more details see: http://wiki.apache.org/hadoop/SocketTimeout | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment