Skip to content

Instantly share code, notes, and snippets.

@nsivabalan
Created June 17, 2021 23:02
Show Gist options
  • Save nsivabalan/f2e36ae0d90b7cdc237cd8b98e48c020 to your computer and use it in GitHub Desktop.
Save nsivabalan/f2e36ae0d90b7cdc237cd8b98e48c020 to your computer and use it in GitHub Desktop.
21/06/17 22:02:31 WARN scheduler.TaskSetManager: Lost task 13.1 in stage 7.2 (TID 660, ip-172-31-32-128.us-east-2.compute.internal, executor 16): FetchFailed(null, shuffleId=2, mapIndex=-1, mapId=-1, reduceId=13, message=
org.apache.spark.shuffle.MetadataFetchFailedException: Missing an output location for shuffle 2
at org.apache.spark.MapOutputTracker$.$anonfun$convertMapStatuses$2(MapOutputTracker.scala:1010)
at org.apache.spark.MapOutputTracker$.$anonfun$convertMapStatuses$2$adapted(MapOutputTracker.scala:1006)
at scala.collection.Iterator.foreach(Iterator.scala:941)
at scala.collection.Iterator.foreach$(Iterator.scala:941)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
at org.apache.spark.MapOutputTracker$.convertMapStatuses(MapOutputTracker.scala:1006)
at org.apache.spark.MapOutputTrackerWorker.getMapSizesByExecutorId(MapOutputTracker.scala:811)
at org.apache.spark.shuffle.sort.SortShuffleManager.getReader(SortShuffleManager.scala:128)
at org.apache.spark.sql.execution.ShuffledRowRDD.compute(ShuffledRowRDD.scala:185)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:127)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:446)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:449)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
)
21/06/17 22:02:31 WARN scheduler.TaskSetManager: Lost task 55.1 in stage 7.2 (TID 665, ip-172-31-32-128.us-east-2.compute.internal, executor 16): FetchFailed(null, shuffleId=2, mapIndex=-1, mapId=-1, reduceId=55, message=
org.apache.spark.shuffle.MetadataFetchFailedException: Missing an output location for shuffle 2
at org.apache.spark.MapOutputTracker$.$anonfun$convertMapStatuses$2(MapOutputTracker.scala:1010)
at org.apache.spark.MapOutputTracker$.$anonfun$convertMapStatuses$2$adapted(MapOutputTracker.scala:1006)
at scala.collection.Iterator.foreach(Iterator.scala:941)
at scala.collection.Iterator.foreach$(Iterator.scala:941)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
at org.apache.spark.MapOutputTracker$.convertMapStatuses(MapOutputTracker.scala:1006)
at org.apache.spark.MapOutputTrackerWorker.getMapSizesByExecutorId(MapOutputTracker.scala:811)
at org.apache.spark.shuffle.sort.SortShuffleManager.getReader(SortShuffleManager.scala:128)
at org.apache.spark.sql.execution.ShuffledRowRDD.compute(ShuffledRowRDD.scala:185)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:127)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:446)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:449)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
)
21/06/17 22:02:31 WARN scheduler.TaskSetManager: Lost task 31.1 in stage 7.2 (TID 661, ip-172-31-32-128.us-east-2.compute.internal, executor 16): FetchFailed(null, shuffleId=2, mapIndex=-1, mapId=-1, reduceId=31, message=
org.apache.spark.shuffle.MetadataFetchFailedException: Missing an output location for shuffle 2
at org.apache.spark.MapOutputTracker$.$anonfun$convertMapStatuses$2(MapOutputTracker.scala:1010)
at org.apache.spark.MapOutputTracker$.$anonfun$convertMapStatuses$2$adapted(MapOutputTracker.scala:1006)
at scala.collection.Iterator.foreach(Iterator.scala:941)
at scala.collection.Iterator.foreach$(Iterator.scala:941)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
at org.apache.spark.MapOutputTracker$.convertMapStatuses(MapOutputTracker.scala:1006)
at org.apache.spark.MapOutputTrackerWorker.getMapSizesByExecutorId(MapOutputTracker.scala:811)
at org.apache.spark.shuffle.sort.SortShuffleManager.getReader(SortShuffleManager.scala:128)
at org.apache.spark.sql.execution.ShuffledRowRDD.compute(ShuffledRowRDD.scala:185)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:127)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:446)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:449)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
)
21/06/17 22:02:31 WARN scheduler.TaskSetManager: Lost task 56.0 in stage 7.2 (TID 667, ip-172-31-32-128.us-east-2.compute.internal, executor 16): FetchFailed(null, shuffleId=2, mapIndex=-1, mapId=-1, reduceId=56, message=
org.apache.spark.shuffle.MetadataFetchFailedException: Missing an output location for shuffle 2
at org.apache.spark.MapOutputTracker$.$anonfun$convertMapStatuses$2(MapOutputTracker.scala:1010)
at org.apache.spark.MapOutputTracker$.$anonfun$convertMapStatuses$2$adapted(MapOutputTracker.scala:1006)
at scala.collection.Iterator.foreach(Iterator.scala:941)
at scala.collection.Iterator.foreach$(Iterator.scala:941)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
at org.apache.spark.MapOutputTracker$.convertMapStatuses(MapOutputTracker.scala:1006)
at org.apache.spark.MapOutputTrackerWorker.getMapSizesByExecutorId(MapOutputTracker.scala:811)
at org.apache.spark.shuffle.sort.SortShuffleManager.getReader(SortShuffleManager.scala:128)
at org.apache.spark.sql.execution.ShuffledRowRDD.compute(ShuffledRowRDD.scala:185)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:127)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:446)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:449)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
)
21/06/17 22:02:32 WARN server.TransportChannelHandler: Exception in connection from /172.31.36.248:49178
java.io.IOException: Connection reset by peer
at sun.nio.ch.FileDispatcherImpl.read0(Native Method)
at sun.nio.ch.SocketDispatcher.read(SocketDispatcher.java:39)
at sun.nio.ch.IOUtil.readIntoNativeBuffer(IOUtil.java:223)
at sun.nio.ch.IOUtil.read(IOUtil.java:192)
at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:379)
at io.netty.buffer.PooledByteBuf.setBytes(PooledByteBuf.java:253)
at io.netty.buffer.AbstractByteBuf.writeBytes(AbstractByteBuf.java:1133)
at io.netty.channel.socket.nio.NioSocketChannel.doReadBytes(NioSocketChannel.java:350)
at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:148)
at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:714)
at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:650)
at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:576)
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:493)
at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)
at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
at io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
at java.lang.Thread.run(Thread.java:748)
21/06/17 22:02:32 WARN storage.BlockManagerMasterEndpoint: No more replicas available for rdd_13_80 !
21/06/17 22:02:32 WARN storage.BlockManagerMasterEndpoint: No more replicas available for rdd_13_23 !
21/06/17 22:02:32 WARN storage.BlockManagerMasterEndpoint: No more replicas available for rdd_13_57 !
21/06/17 22:02:32 WARN storage.BlockManagerMasterEndpoint: No more replicas available for rdd_13_45 !
21/06/17 22:02:32 WARN storage.BlockManagerMasterEndpoint: No more replicas available for rdd_13_70 !
21/06/17 22:02:32 WARN storage.BlockManagerMasterEndpoint: No more replicas available for rdd_13_55 !
21/06/17 22:02:32 WARN storage.BlockManagerMasterEndpoint: No more replicas available for rdd_13_39 !
21/06/17 22:02:32 WARN storage.BlockManagerMasterEndpoint: No more replicas available for rdd_13_49 !
21/06/17 22:02:32 WARN storage.BlockManagerMasterEndpoint: No more replicas available for rdd_13_84 !
21/06/17 22:02:32 WARN storage.BlockManagerMasterEndpoint: No more replicas available for rdd_13_75 !
21/06/17 22:02:32 WARN storage.BlockManagerMasterEndpoint: No more replicas available for rdd_13_31 !
21/06/17 22:02:32 WARN storage.BlockManagerMasterEndpoint: No more replicas available for rdd_13_61 !
21/06/17 22:02:32 WARN storage.BlockManagerMasterEndpoint: No more replicas available for rdd_13_6 !
21/06/17 22:02:32 WARN storage.BlockManagerMasterEndpoint: No more replicas available for rdd_13_12 !
21/06/17 22:02:32 WARN storage.BlockManagerMasterEndpoint: No more replicas available for rdd_13_28 !
21/06/17 22:02:32 WARN storage.BlockManagerMasterEndpoint: No more replicas available for rdd_13_42 !
21/06/17 22:02:32 WARN storage.BlockManagerMasterEndpoint: No more replicas available for rdd_13_38 !
21/06/17 22:02:32 WARN storage.BlockManagerMasterEndpoint: No more replicas available for rdd_13_52 !
21/06/17 22:02:32 WARN storage.BlockManagerMasterEndpoint: No more replicas available for rdd_13_35 !
21/06/17 22:02:32 WARN storage.BlockManagerMasterEndpoint: No more replicas available for rdd_13_36 !
21/06/17 22:02:32 ERROR cluster.YarnScheduler: Lost executor 9 on ip-172-31-36-248.us-east-2.compute.internal: Container killed by YARN for exceeding physical memory limits. 9.9 GB of 9.9 GB physical memory used. Consider boosting spark.executor.memoryOverhead.
21/06/17 22:02:32 WARN cluster.YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 9 for reason Container killed by YARN for exceeding physical memory limits. 9.9 GB of 9.9 GB physical memory used. Consider boosting spark.executor.memoryOverhead.
21/06/17 22:02:32 WARN scheduler.TaskSetManager: Lost task 6.0 in stage 7.2 (TID 609, ip-172-31-36-248.us-east-2.compute.internal, executor 9): ExecutorLostFailure (executor 9 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 9.9 GB of 9.9 GB physical memory used. Consider boosting spark.executor.memoryOverhead.
21/06/17 22:02:32 WARN scheduler.TaskSetManager: Lost task 0.0 in stage 7.2 (TID 603, ip-172-31-36-248.us-east-2.compute.internal, executor 9): ExecutorLostFailure (executor 9 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 9.9 GB of 9.9 GB physical memory used. Consider boosting spark.executor.memoryOverhead.
21/06/17 22:02:32 WARN scheduler.TaskSetManager: Lost task 3.0 in stage 7.2 (TID 606, ip-172-31-36-248.us-east-2.compute.internal, executor 9): ExecutorLostFailure (executor 9 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 9.9 GB of 9.9 GB physical memory used. Consider boosting spark.executor.memoryOverhead.
21/06/17 22:02:32 WARN scheduler.TaskSetManager: Lost task 5.0 in stage 7.2 (TID 608, ip-172-31-36-248.us-east-2.compute.internal, executor 9): ExecutorLostFailure (executor 9 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 9.9 GB of 9.9 GB physical memory used. Consider boosting spark.executor.memoryOverhead.
21/06/17 22:02:32 WARN scheduler.TaskSetManager: Lost task 2.0 in stage 7.2 (TID 605, ip-172-31-36-248.us-east-2.compute.internal, executor 9): ExecutorLostFailure (executor 9 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 9.9 GB of 9.9 GB physical memory used. Consider boosting spark.executor.memoryOverhead.
21/06/17 22:02:32 WARN scheduler.TaskSetManager: Lost task 1.0 in stage 7.2 (TID 604, ip-172-31-36-248.us-east-2.compute.internal, executor 9): ExecutorLostFailure (executor 9 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 9.9 GB of 9.9 GB physical memory used. Consider boosting spark.executor.memoryOverhead.
21/06/17 22:02:32 WARN scheduler.TaskSetManager: Lost task 4.0 in stage 7.2 (TID 607, ip-172-31-36-248.us-east-2.compute.internal, executor 9): ExecutorLostFailure (executor 9 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 9.9 GB of 9.9 GB physical memory used. Consider boosting spark.executor.memoryOverhead.
21/06/17 22:02:32 WARN scheduler.TaskSetManager: Lost task 7.0 in stage 7.2 (TID 610, ip-172-31-36-248.us-east-2.compute.internal, executor 9): ExecutorLostFailure (executor 9 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 9.9 GB of 9.9 GB physical memory used. Consider boosting spark.executor.memoryOverhead.
21/06/17 22:02:34 WARN scheduler.TaskSetManager: Lost task 48.0 in stage 7.2 (TID 651, ip-172-31-33-237.us-east-2.compute.internal, executor 11): FetchFailed(BlockManagerId(10, ip-172-31-32-128.us-east-2.compute.internal, 42697, None), shuffleId=2, mapIndex=43, mapId=498, reduceId=48, message=
org.apache.spark.shuffle.FetchFailedException: The relative remote executor(Id: 10), which maintains the block data to fetch is dead.
at org.apache.spark.storage.ShuffleBlockFetcherIterator.throwFetchFailedException(ShuffleBlockFetcherIterator.scala:748)
at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:663)
at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:70)
at org.apache.spark.util.CompletionIterator.next(CompletionIterator.scala:29)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:484)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:490)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
at org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:31)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(FileFormatWriter.scala:272)
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1411)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:281)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$15(FileFormatWriter.scala:205)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:127)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:446)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:449)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.spark.ExecutorDeadException: The relative remote executor(Id: 10), which maintains the block data to fetch is dead.
at org.apache.spark.network.netty.NettyBlockTransferService$$anon$2.createAndStart(NettyBlockTransferService.scala:132)
at org.apache.spark.network.shuffle.RetryingBlockFetcher.fetchAllOutstanding(RetryingBlockFetcher.java:141)
at org.apache.spark.network.shuffle.RetryingBlockFetcher.start(RetryingBlockFetcher.java:121)
at org.apache.spark.network.netty.NettyBlockTransferService.fetchBlocks(NettyBlockTransferService.scala:143)
at org.apache.spark.storage.ShuffleBlockFetcherIterator.sendRequest(ShuffleBlockFetcherIterator.scala:278)
at org.apache.spark.storage.ShuffleBlockFetcherIterator.send$1(ShuffleBlockFetcherIterator.scala:721)
at org.apache.spark.storage.ShuffleBlockFetcherIterator.fetchUpToMaxBytes(ShuffleBlockFetcherIterator.scala:716)
at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:667)
... 20 more
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment