Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save nsivabalan/30f8f0d6c0c51437219b5df8aece7979 to your computer and use it in GitHub Desktop.
Save nsivabalan/30f8f0d6c0c51437219b5df8aece7979 to your computer and use it in GitHub Desktop.
scala> spark.time(df1.write.format("hudi").option("hoodie.bulkinsert.shuffle.parallelism","170").option(PRECOMBINE_FIELD_OPT_KEY, "created_at").option(RECORDKEY_FIELD_OPT_KEY, "id").option(PARTITIONPATH_FIELD_OPT_KEY,"date_col").option("hoodie.parquet.compression.codec", "SNAPPY").option(OPERATION_OPT_KEY,"bulk_insert").option(TABLE_NAME, "hudi_1").mode(Overwrite).save("s3a://siva-test-bucket-june-16/hudi_testing/hudi_base_1/"))
21/06/23 18:34:57 WARN hudi.HoodieSparkSqlWriter$: hoodie table at s3a://siva-test-bucket-june-16/hudi_testing/hudi_base_1 already exists. Deleting existing data & overwriting with new data.
21/06/23 18:41:57 WARN scheduler.TaskSetManager: Lost task 0.0 in stage 6.0 (TID 597, ip-172-31-35-212.us-east-2.compute.internal, executor 4): java.lang.RuntimeException: org.apache.hudi.exception.HoodieException: org.apache.hudi.exception.HoodieException: java.util.concurrent.ExecutionException: org.apache.hudi.exception.HoodieRemoteException: Read timed out
at org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:121)
at scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:44)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:484)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:490)
at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:221)
at org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:349)
at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1388)
at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1298)
at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1362)
at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1186)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:360)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:311)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:127)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:446)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:449)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.hudi.exception.HoodieException: org.apache.hudi.exception.HoodieException: java.util.concurrent.ExecutionException: org.apache.hudi.exception.HoodieRemoteException: Read timed out
at org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:81)
at org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:36)
at org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:119)
... 22 more
Caused by: org.apache.hudi.exception.HoodieException: java.util.concurrent.ExecutionException: org.apache.hudi.exception.HoodieRemoteException: Read timed out
at org.apache.hudi.common.util.queue.BoundedInMemoryExecutor.execute(BoundedInMemoryExecutor.java:143)
at org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:77)
... 24 more
Caused by: java.util.concurrent.ExecutionException: org.apache.hudi.exception.HoodieRemoteException: Read timed out
at java.util.concurrent.FutureTask.report(FutureTask.java:122)
at java.util.concurrent.FutureTask.get(FutureTask.java:192)
at org.apache.hudi.common.util.queue.BoundedInMemoryExecutor.execute(BoundedInMemoryExecutor.java:141)
... 25 more
Caused by: org.apache.hudi.exception.HoodieRemoteException: Read timed out
at org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView.createMarker(RemoteHoodieTableFileSystemView.java:485)
at org.apache.hudi.table.marker.TimelineBasedMarkerFiles.create(TimelineBasedMarkerFiles.java:76)
at org.apache.hudi.table.marker.MarkerFiles.create(MarkerFiles.java:53)
at org.apache.hudi.io.HoodieWriteHandle.createMarkerFile(HoodieWriteHandle.java:181)
at org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:83)
at org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:65)
at org.apache.hudi.io.CreateHandleFactory.create(CreateHandleFactory.java:34)
at org.apache.hudi.execution.CopyOnWriteInsertHandler.consumeOneRecord(CopyOnWriteInsertHandler.java:83)
at org.apache.hudi.execution.CopyOnWriteInsertHandler.consumeOneRecord(CopyOnWriteInsertHandler.java:40)
at org.apache.hudi.common.util.queue.BoundedInMemoryQueueConsumer.consume(BoundedInMemoryQueueConsumer.java:37)
at org.apache.hudi.common.util.queue.BoundedInMemoryExecutor.lambda$null$2(BoundedInMemoryExecutor.java:121)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
... 3 more
Caused by: java.net.SocketTimeoutException: Read timed out
at java.net.SocketInputStream.socketRead0(Native Method)
at java.net.SocketInputStream.socketRead(SocketInputStream.java:116)
at java.net.SocketInputStream.read(SocketInputStream.java:171)
at java.net.SocketInputStream.read(SocketInputStream.java:141)
at org.apache.http.impl.io.SessionInputBufferImpl.streamRead(SessionInputBufferImpl.java:139)
at org.apache.http.impl.io.SessionInputBufferImpl.fillBuffer(SessionInputBufferImpl.java:155)
at org.apache.http.impl.io.SessionInputBufferImpl.readLine(SessionInputBufferImpl.java:284)
at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:140)
at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:57)
at org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:261)
at org.apache.http.impl.DefaultBHttpClientConnection.receiveResponseHeader(DefaultBHttpClientConnection.java:165)
at org.apache.http.impl.conn.CPoolProxy.receiveResponseHeader(CPoolProxy.java:167)
at org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:272)
at org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:124)
at org.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:271)
at org.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:184)
at org.apache.http.impl.execchain.RetryExec.execute(RetryExec.java:88)
at org.apache.http.impl.execchain.RedirectExec.execute(RedirectExec.java:110)
at org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:184)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:107)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:55)
at org.apache.http.client.fluent.Request.execute(Request.java:151)
at org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView.executeRequest(RemoteHoodieTableFileSystemView.java:184)
at org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView.createMarker(RemoteHoodieTableFileSystemView.java:483)
... 14 more
21/06/23 18:46:58 WARN scheduler.TaskSetManager: Lost task 0.1 in stage 6.0 (TID 598, ip-172-31-35-212.us-east-2.compute.internal, executor 4): java.lang.RuntimeException: org.apache.hudi.exception.HoodieException: org.apache.hudi.exception.HoodieException: java.util.concurrent.ExecutionException: org.apache.hudi.exception.HoodieRemoteException: Read timed out
at org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:121)
at scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:44)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:484)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:490)
at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:221)
at org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:349)
at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1388)
at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1298)
at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1362)
at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1186)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:360)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:311)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:127)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:446)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:449)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.hudi.exception.HoodieException: org.apache.hudi.exception.HoodieException: java.util.concurrent.ExecutionException: org.apache.hudi.exception.HoodieRemoteException: Read timed out
at org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:81)
at org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:36)
at org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:119)
... 22 more
Caused by: org.apache.hudi.exception.HoodieException: java.util.concurrent.ExecutionException: org.apache.hudi.exception.HoodieRemoteException: Read timed out
at org.apache.hudi.common.util.queue.BoundedInMemoryExecutor.execute(BoundedInMemoryExecutor.java:143)
at org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:77)
... 24 more
Caused by: java.util.concurrent.ExecutionException: org.apache.hudi.exception.HoodieRemoteException: Read timed out
at java.util.concurrent.FutureTask.report(FutureTask.java:122)
at java.util.concurrent.FutureTask.get(FutureTask.java:192)
at org.apache.hudi.common.util.queue.BoundedInMemoryExecutor.execute(BoundedInMemoryExecutor.java:141)
... 25 more
Caused by: org.apache.hudi.exception.HoodieRemoteException: Read timed out
at org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView.createMarker(RemoteHoodieTableFileSystemView.java:485)
at org.apache.hudi.table.marker.TimelineBasedMarkerFiles.create(TimelineBasedMarkerFiles.java:76)
at org.apache.hudi.table.marker.MarkerFiles.create(MarkerFiles.java:53)
at org.apache.hudi.io.HoodieWriteHandle.createMarkerFile(HoodieWriteHandle.java:181)
at org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:83)
at org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:65)
at org.apache.hudi.io.CreateHandleFactory.create(CreateHandleFactory.java:34)
at org.apache.hudi.execution.CopyOnWriteInsertHandler.consumeOneRecord(CopyOnWriteInsertHandler.java:83)
at org.apache.hudi.execution.CopyOnWriteInsertHandler.consumeOneRecord(CopyOnWriteInsertHandler.java:40)
at org.apache.hudi.common.util.queue.BoundedInMemoryQueueConsumer.consume(BoundedInMemoryQueueConsumer.java:37)
at org.apache.hudi.common.util.queue.BoundedInMemoryExecutor.lambda$null$2(BoundedInMemoryExecutor.java:121)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
... 3 more
Caused by: java.net.SocketTimeoutException: Read timed out
at java.net.SocketInputStream.socketRead0(Native Method)
at java.net.SocketInputStream.socketRead(SocketInputStream.java:116)
at java.net.SocketInputStream.read(SocketInputStream.java:171)
at java.net.SocketInputStream.read(SocketInputStream.java:141)
at org.apache.http.impl.io.SessionInputBufferImpl.streamRead(SessionInputBufferImpl.java:139)
at org.apache.http.impl.io.SessionInputBufferImpl.fillBuffer(SessionInputBufferImpl.java:155)
at org.apache.http.impl.io.SessionInputBufferImpl.readLine(SessionInputBufferImpl.java:284)
at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:140)
at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:57)
at org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:261)
at org.apache.http.impl.DefaultBHttpClientConnection.receiveResponseHeader(DefaultBHttpClientConnection.java:165)
at org.apache.http.impl.conn.CPoolProxy.receiveResponseHeader(CPoolProxy.java:167)
at org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:272)
at org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:124)
at org.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:271)
at org.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:184)
at org.apache.http.impl.execchain.RetryExec.execute(RetryExec.java:88)
at org.apache.http.impl.execchain.RedirectExec.execute(RedirectExec.java:110)
at org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:184)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:107)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:55)
at org.apache.http.client.fluent.Request.execute(Request.java:151)
at org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView.executeRequest(RemoteHoodieTableFileSystemView.java:184)
at org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView.createMarker(RemoteHoodieTableFileSystemView.java:483)
... 14 more
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment