Created
August 23, 2021 06:23
-
-
Save nsivabalan/21b3069689753a5ff71c7cd3e1c42264 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
spark.time(updatesDf.write.format("hudi").option("hoodie.upsert.shuffle.parallelism","500").option(PRECOMBINE_FIELD.key(), "created_at").option(RECORDKEY_FIELD.key(), "id").option(PARTITIONPATH_FIELD.key(), "type").option("hoodie.parquet.compression.codec", "SNAPPY").option(OPERATION.key(),"upsert").option("hoodie.datasource.write.table.name", "hudi_3").option("hoodie.table.name","hudi_4").mode("Append").save("s3a://siva-test-bucket-june-16/hudi_testing/hudi_4/")) | |
21/08/23 06:20:56 WARN cluster.YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 14 for reason Container from a bad node: container_1629694615075_0009_01_000015 on host: ip-172-31-41-172.us-east-2.compute.internal. Exit status: 143. Diagnostics: [2021-08-23 06:20:56.504]Container killed on request. Exit code is 143 | |
[2021-08-23 06:20:56.504]Container exited with a non-zero exit code 143. | |
[2021-08-23 06:20:56.505]Killed by external signal | |
. | |
21/08/23 06:20:56 ERROR cluster.YarnScheduler: Lost executor 14 on ip-172-31-41-172.us-east-2.compute.internal: Container from a bad node: container_1629694615075_0009_01_000015 on host: ip-172-31-41-172.us-east-2.compute.internal. Exit status: 143. Diagnostics: [2021-08-23 06:20:56.504]Container killed on request. Exit code is 143 | |
[2021-08-23 06:20:56.504]Container exited with a non-zero exit code 143. | |
[2021-08-23 06:20:56.505]Killed by external signal | |
. | |
21/08/23 06:20:56 WARN scheduler.TaskSetManager: Lost task 0.0 in stage 11.0 (TID 3649, ip-172-31-41-172.us-east-2.compute.internal, executor 14): ExecutorLostFailure (executor 14 exited caused by one of the running tasks) Reason: Container from a bad node: container_1629694615075_0009_01_000015 on host: ip-172-31-41-172.us-east-2.compute.internal. Exit status: 143. Diagnostics: [2021-08-23 06:20:56.504]Container killed on request. Exit code is 143 | |
[2021-08-23 06:20:56.504]Container exited with a non-zero exit code 143. | |
[2021-08-23 06:20:56.505]Killed by external signal | |
. | |
21/08/23 06:20:57 WARN scheduler.TaskSetManager: Lost task 0.1 in stage 11.0 (TID 3650, ip-172-31-41-129.us-east-2.compute.internal, executor 3): FetchFailed(null, shuffleId=2, mapIndex=-1, mapId=-1, reduceId=0, message= | |
org.apache.spark.shuffle.MetadataFetchFailedException: Missing an output location for shuffle 2 | |
at org.apache.spark.MapOutputTracker$.$anonfun$convertMapStatuses$2(MapOutputTracker.scala:1010) | |
at org.apache.spark.MapOutputTracker$.$anonfun$convertMapStatuses$2$adapted(MapOutputTracker.scala:1006) | |
at scala.collection.Iterator.foreach(Iterator.scala:941) | |
at scala.collection.Iterator.foreach$(Iterator.scala:941) | |
at scala.collection.AbstractIterator.foreach(Iterator.scala:1429) | |
at org.apache.spark.MapOutputTracker$.convertMapStatuses(MapOutputTracker.scala:1006) | |
at org.apache.spark.MapOutputTrackerWorker.getMapSizesByExecutorId(MapOutputTracker.scala:811) | |
at org.apache.spark.shuffle.sort.SortShuffleManager.getReader(SortShuffleManager.scala:128) | |
at org.apache.spark.sql.execution.ShuffledRowRDD.compute(ShuffledRowRDD.scala:185) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) | |
at org.apache.spark.sql.execution.SQLExecutionRDD.compute(SQLExecutionRDD.scala:55) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) | |
at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) | |
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) | |
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52) | |
at org.apache.spark.scheduler.Task.run(Task.scala:127) | |
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:446) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:449) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) | |
at java.lang.Thread.run(Thread.java:748) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment