Skip to content

Instantly share code, notes, and snippets.

@nsivabalan
Created August 18, 2021 07:02
Show Gist options
  • Save nsivabalan/4ee4bcbf30b41168f143cf38ce5b35ef to your computer and use it in GitHub Desktop.
Save nsivabalan/4ee4bcbf30b41168f143cf38ce5b35ef to your computer and use it in GitHub Desktop.
21/08/18 07:00:38 INFO spark.SparkContext: Created broadcast 2 from broadcast at DAGScheduler.scala:1184
21/08/18 07:00:38 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 1 (MapPartitionsRDD[6] at map at AvroDFSSource.java:65) (first 15 tasks are for partitions Vector(0))
21/08/18 07:00:38 INFO scheduler.TaskSchedulerImpl: Adding task set 1.0 with 1 tasks
21/08/18 07:00:38 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 1.0 (TID 1, localhost, executor driver, partition 0, PROCESS_LOCAL, 8017 bytes)
21/08/18 07:00:38 INFO executor.Executor: Running task 0.0 in stage 1.0 (TID 1)
21/08/18 07:00:38 INFO rdd.NewHadoopRDD: Input split: s3a://siva-test-bucket-june-16/hudi_testing/hudi-integ-test-suite/input/1/d7a2ecaa-5acc-4b04-a2fb-87b88eea6908.avro:0+158531
21/08/18 07:00:38 WARN mapreduce.AvroKeyInputFormat: Reader schema was not set. Use AvroJob.setInputKeySchema() if desired.
21/08/18 07:00:38 INFO mapreduce.AvroKeyInputFormat: Using a reader schema equal to the writer schema.
21/08/18 07:00:38 ERROR executor.Executor: Exception in task 0.0 in stage 1.0 (TID 1)
org.apache.http.ConnectionClosedException: Premature end of Content-Length delimited message body (expected: 158531; received: 16384
at org.apache.http.impl.io.ContentLengthInputStream.read(ContentLengthInputStream.java:178)
at org.apache.http.impl.io.ContentLengthInputStream.read(ContentLengthInputStream.java:198)
at org.apache.http.impl.io.ContentLengthInputStream.close(ContentLengthInputStream.java:101)
at org.apache.http.conn.BasicManagedEntity.streamClosed(BasicManagedEntity.java:166)
at org.apache.http.conn.EofSensorInputStream.checkClose(EofSensorInputStream.java:228)
at org.apache.http.conn.EofSensorInputStream.close(EofSensorInputStream.java:172)
at java.io.FilterInputStream.close(FilterInputStream.java:181)
at java.io.FilterInputStream.close(FilterInputStream.java:181)
at java.io.FilterInputStream.close(FilterInputStream.java:181)
at java.io.FilterInputStream.close(FilterInputStream.java:181)
at com.amazonaws.services.s3.model.S3ObjectInputStream.abort(S3ObjectInputStream.java:90)
at org.apache.hadoop.fs.s3a.S3AInputStream.reopen(S3AInputStream.java:72)
at org.apache.hadoop.fs.s3a.S3AInputStream.seek(S3AInputStream.java:115)
at org.apache.hadoop.fs.FSDataInputStream.seek(FSDataInputStream.java:62)
at org.apache.avro.mapred.FsInput.seek(FsInput.java:59)
at org.apache.avro.file.DataFileReader$SeekableInputStream.seek(DataFileReader.java:190)
at org.apache.avro.file.DataFileReader.seek(DataFileReader.java:114)
at org.apache.avro.file.DataFileReader.sync(DataFileReader.java:127)
at org.apache.avro.mapreduce.AvroRecordReaderBase.initialize(AvroRecordReaderBase.java:102)
at org.apache.spark.rdd.NewHadoopRDD$$anon$1.liftedTree1$1(NewHadoopRDD.scala:199)
at org.apache.spark.rdd.NewHadoopRDD$$anon$1.<init>(NewHadoopRDD.scala:196)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:151)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:70)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
21/08/18 07:00:38 WARN scheduler.TaskSetManager: Lost task 0.0 in stage 1.0 (TID 1, localhost, executor driver): org.apache.http.ConnectionClosedException: Premature end of Content-Length delimited message body (expected: 158531; received: 16384
at org.apache.http.impl.io.ContentLengthInputStream.read(ContentLengthInputStream.java:178)
at org.apache.http.impl.io.ContentLengthInputStream.read(ContentLengthInputStream.java:198)
at org.apache.http.impl.io.ContentLengthInputStream.close(ContentLengthInputStream.java:101)
at org.apache.http.conn.BasicManagedEntity.streamClosed(BasicManagedEntity.java:166)
at org.apache.http.conn.EofSensorInputStream.checkClose(EofSensorInputStream.java:228)
at org.apache.http.conn.EofSensorInputStream.close(EofSensorInputStream.java:172)
at java.io.FilterInputStream.close(FilterInputStream.java:181)
at java.io.FilterInputStream.close(FilterInputStream.java:181)
at java.io.FilterInputStream.close(FilterInputStream.java:181)
at java.io.FilterInputStream.close(FilterInputStream.java:181)
at com.amazonaws.services.s3.model.S3ObjectInputStream.abort(S3ObjectInputStream.java:90)
at org.apache.hadoop.fs.s3a.S3AInputStream.reopen(S3AInputStream.java:72)
at org.apache.hadoop.fs.s3a.S3AInputStream.seek(S3AInputStream.java:115)
at org.apache.hadoop.fs.FSDataInputStream.seek(FSDataInputStream.java:62)
at org.apache.avro.mapred.FsInput.seek(FsInput.java:59)
at org.apache.avro.file.DataFileReader$SeekableInputStream.seek(DataFileReader.java:190)
at org.apache.avro.file.DataFileReader.seek(DataFileReader.java:114)
at org.apache.avro.file.DataFileReader.sync(DataFileReader.java:127)
at org.apache.avro.mapreduce.AvroRecordReaderBase.initialize(AvroRecordReaderBase.java:102)
at org.apache.spark.rdd.NewHadoopRDD$$anon$1.liftedTree1$1(NewHadoopRDD.scala:199)
at org.apache.spark.rdd.NewHadoopRDD$$anon$1.<init>(NewHadoopRDD.scala:196)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:151)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:70)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
21/08/18 07:00:38 ERROR scheduler.TaskSetManager: Task 0 in stage 1.0 failed 1 times; aborting job
21/08/18 07:00:38 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool
21/08/18 07:00:38 INFO scheduler.TaskSchedulerImpl: Cancelling stage 1
21/08/18 07:00:38 INFO scheduler.TaskSchedulerImpl: Killing all running tasks in stage 1: Stage cancelled
21/08/18 07:00:38 INFO scheduler.DAGScheduler: ResultStage 1 (isEmpty at DeltaSync.java:437) failed in 0.183 s due to Job aborted due to stage failure: Task 0 in stage 1.0 failed 1 times, most recent failure: Lost task 0.0 in stage 1.0 (TID 1, localhost, executor driver): org.apache.http.ConnectionClosedException: Premature end of Content-Length delimited message body (expected: 158531; received: 16384
at org.apache.http.impl.io.ContentLengthInputStream.read(ContentLengthInputStream.java:178)
at org.apache.http.impl.io.ContentLengthInputStream.read(ContentLengthInputStream.java:198)
at org.apache.http.impl.io.ContentLengthInputStream.close(ContentLengthInputStream.java:101)
at org.apache.http.conn.BasicManagedEntity.streamClosed(BasicManagedEntity.java:166)
at org.apache.http.conn.EofSensorInputStream.checkClose(EofSensorInputStream.java:228)
at org.apache.http.conn.EofSensorInputStream.close(EofSensorInputStream.java:172)
at java.io.FilterInputStream.close(FilterInputStream.java:181)
at java.io.FilterInputStream.close(FilterInputStream.java:181)
at java.io.FilterInputStream.close(FilterInputStream.java:181)
at java.io.FilterInputStream.close(FilterInputStream.java:181)
at com.amazonaws.services.s3.model.S3ObjectInputStream.abort(S3ObjectInputStream.java:90)
at org.apache.hadoop.fs.s3a.S3AInputStream.reopen(S3AInputStream.java:72)
at org.apache.hadoop.fs.s3a.S3AInputStream.seek(S3AInputStream.java:115)
at org.apache.hadoop.fs.FSDataInputStream.seek(FSDataInputStream.java:62)
at org.apache.avro.mapred.FsInput.seek(FsInput.java:59)
at org.apache.avro.file.DataFileReader$SeekableInputStream.seek(DataFileReader.java:190)
at org.apache.avro.file.DataFileReader.seek(DataFileReader.java:114)
at org.apache.avro.file.DataFileReader.sync(DataFileReader.java:127)
at org.apache.avro.mapreduce.AvroRecordReaderBase.initialize(AvroRecordReaderBase.java:102)
at org.apache.spark.rdd.NewHadoopRDD$$anon$1.liftedTree1$1(NewHadoopRDD.scala:199)
at org.apache.spark.rdd.NewHadoopRDD$$anon$1.<init>(NewHadoopRDD.scala:196)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:151)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:70)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace:
21/08/18 07:00:38 INFO scheduler.DAGScheduler: Job 1 failed: isEmpty at DeltaSync.java:437, took 0.188355 s
21/08/18 07:00:38 ERROR scheduler.DagScheduler: Exception executing node
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1.0 failed 1 times, most recent failure: Lost task 0.0 in stage 1.0 (TID 1, localhost, executor driver): org.apache.http.ConnectionClosedException: Premature end of Content-Length delimited message body (expected: 158531; received: 16384
at org.apache.http.impl.io.ContentLengthInputStream.read(ContentLengthInputStream.java:178)
at org.apache.http.impl.io.ContentLengthInputStream.read(ContentLengthInputStream.java:198)
at org.apache.http.impl.io.ContentLengthInputStream.close(ContentLengthInputStream.java:101)
at org.apache.http.conn.BasicManagedEntity.streamClosed(BasicManagedEntity.java:166)
at org.apache.http.conn.EofSensorInputStream.checkClose(EofSensorInputStream.java:228)
at org.apache.http.conn.EofSensorInputStream.close(EofSensorInputStream.java:172)
at java.io.FilterInputStream.close(FilterInputStream.java:181)
at java.io.FilterInputStream.close(FilterInputStream.java:181)
at java.io.FilterInputStream.close(FilterInputStream.java:181)
at java.io.FilterInputStream.close(FilterInputStream.java:181)
at com.amazonaws.services.s3.model.S3ObjectInputStream.abort(S3ObjectInputStream.java:90)
at org.apache.hadoop.fs.s3a.S3AInputStream.reopen(S3AInputStream.java:72)
at org.apache.hadoop.fs.s3a.S3AInputStream.seek(S3AInputStream.java:115)
at org.apache.hadoop.fs.FSDataInputStream.seek(FSDataInputStream.java:62)
at org.apache.avro.mapred.FsInput.seek(FsInput.java:59)
at org.apache.avro.file.DataFileReader$SeekableInputStream.seek(DataFileReader.java:190)
at org.apache.avro.file.DataFileReader.seek(DataFileReader.java:114)
at org.apache.avro.file.DataFileReader.sync(DataFileReader.java:127)
at org.apache.avro.mapreduce.AvroRecordReaderBase.initialize(AvroRecordReaderBase.java:102)
at org.apache.spark.rdd.NewHadoopRDD$$anon$1.liftedTree1$1(NewHadoopRDD.scala:199)
at org.apache.spark.rdd.NewHadoopRDD$$anon$1.<init>(NewHadoopRDD.scala:196)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:151)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:70)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1925)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1913)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1912)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1912)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:948)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:948)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:948)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2146)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2095)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2084)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:759)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2082)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2101)
at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1409)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
at org.apache.spark.rdd.RDD.take(RDD.scala:1382)
at org.apache.spark.rdd.RDD$$anonfun$isEmpty$1.apply$mcZ$sp(RDD.scala:1517)
at org.apache.spark.rdd.RDD$$anonfun$isEmpty$1.apply(RDD.scala:1517)
at org.apache.spark.rdd.RDD$$anonfun$isEmpty$1.apply(RDD.scala:1517)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
at org.apache.spark.rdd.RDD.isEmpty(RDD.scala:1516)
at org.apache.spark.api.java.JavaRDDLike$class.isEmpty(JavaRDDLike.scala:544)
at org.apache.spark.api.java.AbstractJavaRDDLike.isEmpty(JavaRDDLike.scala:45)
at org.apache.hudi.utilities.deltastreamer.DeltaSync.readFromSource(DeltaSync.java:437)
at org.apache.hudi.utilities.deltastreamer.DeltaSync.syncOnce(DeltaSync.java:280)
at org.apache.hudi.integ.testsuite.HoodieDeltaStreamerWrapper.upsert(HoodieDeltaStreamerWrapper.java:44)
at org.apache.hudi.integ.testsuite.HoodieDeltaStreamerWrapper.insert(HoodieDeltaStreamerWrapper.java:48)
at org.apache.hudi.integ.testsuite.HoodieTestSuiteWriter.insert(HoodieTestSuiteWriter.java:158)
at org.apache.hudi.integ.testsuite.dag.nodes.InsertNode.ingest(InsertNode.java:69)
at org.apache.hudi.integ.testsuite.dag.nodes.InsertNode.execute(InsertNode.java:53)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.executeNode(DagScheduler.java:139)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.lambda$execute$0(DagScheduler.java:105)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.http.ConnectionClosedException: Premature end of Content-Length delimited message body (expected: 158531; received: 16384
at org.apache.http.impl.io.ContentLengthInputStream.read(ContentLengthInputStream.java:178)
at org.apache.http.impl.io.ContentLengthInputStream.read(ContentLengthInputStream.java:198)
at org.apache.http.impl.io.ContentLengthInputStream.close(ContentLengthInputStream.java:101)
at org.apache.http.conn.BasicManagedEntity.streamClosed(BasicManagedEntity.java:166)
at org.apache.http.conn.EofSensorInputStream.checkClose(EofSensorInputStream.java:228)
at org.apache.http.conn.EofSensorInputStream.close(EofSensorInputStream.java:172)
at java.io.FilterInputStream.close(FilterInputStream.java:181)
at java.io.FilterInputStream.close(FilterInputStream.java:181)
at java.io.FilterInputStream.close(FilterInputStream.java:181)
at java.io.FilterInputStream.close(FilterInputStream.java:181)
at com.amazonaws.services.s3.model.S3ObjectInputStream.abort(S3ObjectInputStream.java:90)
at org.apache.hadoop.fs.s3a.S3AInputStream.reopen(S3AInputStream.java:72)
at org.apache.hadoop.fs.s3a.S3AInputStream.seek(S3AInputStream.java:115)
at org.apache.hadoop.fs.FSDataInputStream.seek(FSDataInputStream.java:62)
at org.apache.avro.mapred.FsInput.seek(FsInput.java:59)
at org.apache.avro.file.DataFileReader$SeekableInputStream.seek(DataFileReader.java:190)
at org.apache.avro.file.DataFileReader.seek(DataFileReader.java:114)
at org.apache.avro.file.DataFileReader.sync(DataFileReader.java:127)
at org.apache.avro.mapreduce.AvroRecordReaderBase.initialize(AvroRecordReaderBase.java:102)
at org.apache.spark.rdd.NewHadoopRDD$$anon$1.liftedTree1$1(NewHadoopRDD.scala:199)
at org.apache.spark.rdd.NewHadoopRDD$$anon$1.<init>(NewHadoopRDD.scala:196)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:151)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:70)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
... 3 more
21/08/18 07:00:38 INFO scheduler.DagScheduler: Forcing shutdown of executor service, this might kill running tasks
21/08/18 07:00:38 ERROR testsuite.HoodieTestSuiteJob: Failed to run Test Suite
java.util.concurrent.ExecutionException: org.apache.hudi.exception.HoodieException: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1.0 failed 1 times, most recent failure: Lost task 0.0 in stage 1.0 (TID 1, localhost, executor driver): org.apache.http.ConnectionClosedException: Premature end of Content-Length delimited message body (expected: 158531; received: 16384
at org.apache.http.impl.io.ContentLengthInputStream.read(ContentLengthInputStream.java:178)
at org.apache.http.impl.io.ContentLengthInputStream.read(ContentLengthInputStream.java:198)
at org.apache.http.impl.io.ContentLengthInputStream.close(ContentLengthInputStream.java:101)
at org.apache.http.conn.BasicManagedEntity.streamClosed(BasicManagedEntity.java:166)
at org.apache.http.conn.EofSensorInputStream.checkClose(EofSensorInputStream.java:228)
at org.apache.http.conn.EofSensorInputStream.close(EofSensorInputStream.java:172)
at java.io.FilterInputStream.close(FilterInputStream.java:181)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment