Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save nsivabalan/ac62044ae6a231ff7c418f5750e47c42 to your computer and use it in GitHub Desktop.
Save nsivabalan/ac62044ae6a231ff7c418f5750e47c42 to your computer and use it in GitHub Desktop.
create table gh_17Gb_updates using parquet location 's3a://siva-test-bucket-june-16/hudi_testing/gh_arch_dump/part_1_updates/' as select public, (repo_id + 1000) as repo_id, repo_name, repo_url, payload, created_at, id, other, randomId, date_col, type from gh_17Gb_date_col ORDER BY RAND() limit 1000000;
21/07/29 03:31:17 ERROR TransportResponseHandler: Still have 1 requests outstanding when connection from /172.31.33.172:44406 is closed
21/07/29 03:31:17 WARN BlockManagerMasterEndpoint: Error trying to remove broadcast 154 from block manager BlockManagerId(14, ip-172-31-33-172.us-east-2.compute.internal, 44051, None)
java.io.IOException: Connection from /172.31.33.172:44406 closed
at org.apache.spark.network.client.TransportResponseHandler.channelInactive(TransportResponseHandler.java:146)
at org.apache.spark.network.server.TransportChannelHandler.channelInactive(TransportChannelHandler.java:117)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:262)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:248)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelInactive(AbstractChannelHandlerContext.java:241)
at io.netty.channel.ChannelInboundHandlerAdapter.channelInactive(ChannelInboundHandlerAdapter.java:81)
at io.netty.handler.timeout.IdleStateHandler.channelInactive(IdleStateHandler.java:277)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:262)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:248)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelInactive(AbstractChannelHandlerContext.java:241)
at io.netty.channel.ChannelInboundHandlerAdapter.channelInactive(ChannelInboundHandlerAdapter.java:81)
at org.apache.spark.network.util.TransportFrameDecoder.channelInactive(TransportFrameDecoder.java:225)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:262)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:248)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelInactive(AbstractChannelHandlerContext.java:241)
at io.netty.channel.DefaultChannelPipeline$HeadContext.channelInactive(DefaultChannelPipeline.java:1405)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:262)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:248)
at io.netty.channel.DefaultChannelPipeline.fireChannelInactive(DefaultChannelPipeline.java:901)
at io.netty.channel.AbstractChannel$AbstractUnsafe$8.run(AbstractChannel.java:818)
at io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)
at io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:497)
at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)
at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
at io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
at java.lang.Thread.run(Thread.java:748)
21/07/29 03:31:17 ERROR TransportClient: Failed to send RPC RPC 7173114253580272302 to /172.31.33.172:44406: java.nio.channels.ClosedChannelException
java.nio.channels.ClosedChannelException
at io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)
at io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)
at io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)
at io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)
at io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)
at io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1104)
at io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)
at io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)
at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)
at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
at io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
at java.lang.Thread.run(Thread.java:748)
21/07/29 03:31:17 WARN BlockManagerMasterEndpoint: No more replicas available for broadcast_154_piece0 !
21/07/29 03:31:17 WARN BlockManagerMasterEndpoint: Error trying to remove broadcast 155 from block manager BlockManagerId(14, ip-172-31-33-172.us-east-2.compute.internal, 44051, None)
java.io.IOException: Failed to send RPC RPC 7173114253580272302 to /172.31.33.172:44406: java.nio.channels.ClosedChannelException
at org.apache.spark.network.client.TransportClient$RpcChannelListener.handleFailure(TransportClient.java:363)
at org.apache.spark.network.client.TransportClient$StdChannelListener.operationComplete(TransportClient.java:340)
at io.netty.util.concurrent.DefaultPromise.notifyListener0(DefaultPromise.java:577)
at io.netty.util.concurrent.DefaultPromise.notifyListenersNow(DefaultPromise.java:551)
at io.netty.util.concurrent.DefaultPromise.notifyListeners(DefaultPromise.java:490)
at io.netty.util.concurrent.DefaultPromise.setValue0(DefaultPromise.java:615)
at io.netty.util.concurrent.DefaultPromise.setFailure0(DefaultPromise.java:608)
at io.netty.util.concurrent.DefaultPromise.tryFailure(DefaultPromise.java:117)
at io.netty.channel.AbstractChannel$AbstractUnsafe.safeSetFailure(AbstractChannel.java:993)
at io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)
at io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)
at io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)
at io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)
at io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1104)
at io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)
at io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)
at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)
at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
at io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.nio.channels.ClosedChannelException
at io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)
... 12 more
21/07/29 03:31:18 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 14 for reason Container from a bad node: container_1627525035661_0002_01_000015 on host: ip-172-31-33-172.us-east-2.compute.internal. Exit status: 143. Diagnostics: [2021-07-29 03:31:18.243]Container killed on request. Exit code is 143
[2021-07-29 03:31:18.244]Container exited with a non-zero exit code 143.
[2021-07-29 03:31:18.244]Killed by external signal
.
21/07/29 03:31:18 ERROR YarnScheduler: Lost executor 14 on ip-172-31-33-172.us-east-2.compute.internal: Container from a bad node: container_1627525035661_0002_01_000015 on host: ip-172-31-33-172.us-east-2.compute.internal. Exit status: 143. Diagnostics: [2021-07-29 03:31:18.243]Container killed on request. Exit code is 143
[2021-07-29 03:31:18.244]Container exited with a non-zero exit code 143.
[2021-07-29 03:31:18.244]Killed by external signal
.
21/07/29 03:31:18 WARN TaskSetManager: Lost task 0.0 in stage 257.0 (TID 10142, ip-172-31-33-172.us-east-2.compute.internal, executor 14): ExecutorLostFailure (executor 14 exited caused by one of the running tasks) Reason: Container from a bad node: container_1627525035661_0002_01_000015 on host: ip-172-31-33-172.us-east-2.compute.internal. Exit status: 143. Diagnostics: [2021-07-29 03:31:18.243]Container killed on request. Exit code is 143
[2021-07-29 03:31:18.244]Container exited with a non-zero exit code 143.
[2021-07-29 03:31:18.244]Killed by external signal
.
21/07/29 03:31:19 WARN TaskSetManager: Lost task 0.1 in stage 257.0 (TID 10143, ip-172-31-38-165.us-east-2.compute.internal, executor 11): FetchFailed(null, shuffleId=41, mapIndex=-1, mapId=-1, reduceId=0, message=
org.apache.spark.shuffle.MetadataFetchFailedException: Missing an output location for shuffle 41
at org.apache.spark.MapOutputTracker$.$anonfun$convertMapStatuses$2(MapOutputTracker.scala:1010)
at org.apache.spark.MapOutputTracker$.$anonfun$convertMapStatuses$2$adapted(MapOutputTracker.scala:1006)
at scala.collection.Iterator.foreach(Iterator.scala:941)
at scala.collection.Iterator.foreach$(Iterator.scala:941)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
at org.apache.spark.MapOutputTracker$.convertMapStatuses(MapOutputTracker.scala:1006)
at org.apache.spark.MapOutputTrackerWorker.getMapSizesByExecutorId(MapOutputTracker.scala:811)
at org.apache.spark.shuffle.sort.SortShuffleManager.getReader(SortShuffleManager.scala:128)
at org.apache.spark.sql.execution.ShuffledRowRDD.compute(ShuffledRowRDD.scala:185)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:127)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:446)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:449)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
)
21/07/29 03:32:08 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 7 for reason Container from a bad node: container_1627525035661_0002_01_000008 on host: ip-172-31-33-8.us-east-2.compute.internal. Exit status: 143. Diagnostics: [2021-07-29 03:32:08.563]Container killed on request. Exit code is 143
[2021-07-29 03:32:08.564]Container exited with a non-zero exit code 143.
[2021-07-29 03:32:08.564]Killed by external signal
.
21/07/29 03:32:08 ERROR YarnScheduler: Lost executor 7 on ip-172-31-33-8.us-east-2.compute.internal: Container from a bad node: container_1627525035661_0002_01_000008 on host: ip-172-31-33-8.us-east-2.compute.internal. Exit status: 143. Diagnostics: [2021-07-29 03:32:08.563]Container killed on request. Exit code is 143
[2021-07-29 03:32:08.564]Container exited with a non-zero exit code 143.
[2021-07-29 03:32:08.564]Killed by external signal
.
21/07/29 03:32:08 WARN TaskSetManager: Lost task 0.0 in stage 257.1 (TID 10157, ip-172-31-33-8.us-east-2.compute.internal, executor 7): ExecutorLostFailure (executor 7 exited caused by one of the running tasks) Reason: Container from a bad node: container_1627525035661_0002_01_000008 on host: ip-172-31-33-8.us-east-2.compute.internal. Exit status: 143. Diagnostics: [2021-07-29 03:32:08.563]Container killed on request. Exit code is 143
[2021-07-29 03:32:08.564]Container exited with a non-zero exit code 143.
[2021-07-29 03:32:08.564]Killed by external signal
.
21/07/29 03:32:09 WARN TaskSetManager: Lost task 0.1 in stage 257.1 (TID 10158, ip-172-31-38-165.us-east-2.compute.internal, executor 12): FetchFailed(null, shuffleId=41, mapIndex=-1, mapId=-1, reduceId=0, message=
org.apache.spark.shuffle.MetadataFetchFailedException: Missing an output location for shuffle 41
at org.apache.spark.MapOutputTracker$.$anonfun$convertMapStatuses$2(MapOutputTracker.scala:1010)
at org.apache.spark.MapOutputTracker$.$anonfun$convertMapStatuses$2$adapted(MapOutputTracker.scala:1006)
at scala.collection.Iterator.foreach(Iterator.scala:941)
at scala.collection.Iterator.foreach$(Iterator.scala:941)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
at org.apache.spark.MapOutputTracker$.convertMapStatuses(MapOutputTracker.scala:1006)
at org.apache.spark.MapOutputTrackerWorker.getMapSizesByExecutorId(MapOutputTracker.scala:811)
at org.apache.spark.shuffle.sort.SortShuffleManager.getReader(SortShuffleManager.scala:128)
at org.apache.spark.sql.execution.ShuffledRowRDD.compute(ShuffledRowRDD.scala:185)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:127)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:446)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:449)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
)
21/07/29 03:32:39 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 17 for reason Container from a bad node: container_1627525035661_0002_01_000032 on host: ip-172-31-45-254.us-east-2.compute.internal. Exit status: 143. Diagnostics: [2021-07-29 03:32:39.431]Container killed on request. Exit code is 143
[2021-07-29 03:32:39.431]Container exited with a non-zero exit code 143.
[2021-07-29 03:32:39.432]Killed by external signal
.
21/07/29 03:32:39 ERROR YarnScheduler: Lost executor 17 on ip-172-31-45-254.us-east-2.compute.internal: Container from a bad node: container_1627525035661_0002_01_000032 on host: ip-172-31-45-254.us-east-2.compute.internal. Exit status: 143. Diagnostics: [2021-07-29 03:32:39.431]Container killed on request. Exit code is 143
[2021-07-29 03:32:39.431]Container exited with a non-zero exit code 143.
[2021-07-29 03:32:39.432]Killed by external signal
.
21/07/29 03:32:39 WARN TaskSetManager: Lost task 0.0 in stage 257.2 (TID 10172, ip-172-31-45-254.us-east-2.compute.internal, executor 17): ExecutorLostFailure (executor 17 exited caused by one of the running tasks) Reason: Container from a bad node: container_1627525035661_0002_01_000032 on host: ip-172-31-45-254.us-east-2.compute.internal. Exit status: 143. Diagnostics: [2021-07-29 03:32:39.431]Container killed on request. Exit code is 143
[2021-07-29 03:32:39.431]Container exited with a non-zero exit code 143.
[2021-07-29 03:32:39.432]Killed by external signal
.
21/07/29 03:32:59 ERROR YarnScheduler: Lost executor 5 on ip-172-31-47-158.us-east-2.compute.internal: Container from a bad node: container_1627525035661_0002_01_000006 on host: ip-172-31-47-158.us-east-2.compute.internal. Exit status: 143. Diagnostics: [2021-07-29 03:32:59.234]Container killed on request. Exit code is 143
[2021-07-29 03:32:59.235]Container exited with a non-zero exit code 143.
[2021-07-29 03:32:59.235]Killed by external signal
.
21/07/29 03:32:59 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 5 for reason Container from a bad node: container_1627525035661_0002_01_000006 on host: ip-172-31-47-158.us-east-2.compute.internal. Exit status: 143. Diagnostics: [2021-07-29 03:32:59.234]Container killed on request. Exit code is 143
[2021-07-29 03:32:59.235]Container exited with a non-zero exit code 143.
[2021-07-29 03:32:59.235]Killed by external signal
.
21/07/29 03:32:59 WARN TaskSetManager: Lost task 0.1 in stage 257.2 (TID 10173, ip-172-31-47-158.us-east-2.compute.internal, executor 5): ExecutorLostFailure (executor 5 exited caused by one of the running tasks) Reason: Container from a bad node: container_1627525035661_0002_01_000006 on host: ip-172-31-47-158.us-east-2.compute.internal. Exit status: 143. Diagnostics: [2021-07-29 03:32:59.234]Container killed on request. Exit code is 143
[2021-07-29 03:32:59.235]Container exited with a non-zero exit code 143.
[2021-07-29 03:32:59.235]Killed by external signal
.
21/07/29 03:33:00 WARN TaskSetManager: Lost task 0.2 in stage 257.2 (TID 10174, ip-172-31-47-28.us-east-2.compute.internal, executor 4): FetchFailed(null, shuffleId=41, mapIndex=-1, mapId=-1, reduceId=0, message=
org.apache.spark.shuffle.MetadataFetchFailedException: Missing an output location for shuffle 41
at org.apache.spark.MapOutputTracker$.$anonfun$convertMapStatuses$2(MapOutputTracker.scala:1010)
at org.apache.spark.MapOutputTracker$.$anonfun$convertMapStatuses$2$adapted(MapOutputTracker.scala:1006)
at scala.collection.Iterator.foreach(Iterator.scala:941)
at scala.collection.Iterator.foreach$(Iterator.scala:941)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
at org.apache.spark.MapOutputTracker$.convertMapStatuses(MapOutputTracker.scala:1006)
at org.apache.spark.MapOutputTrackerWorker.getMapSizesByExecutorId(MapOutputTracker.scala:811)
at org.apache.spark.shuffle.sort.SortShuffleManager.getReader(SortShuffleManager.scala:128)
at org.apache.spark.sql.execution.ShuffledRowRDD.compute(ShuffledRowRDD.scala:185)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:127)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:446)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:449)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment