Skip to content

Instantly share code, notes, and snippets.

@nsivabalan
Created March 7, 2022 18:40
Show Gist options
  • Save nsivabalan/596abfd701aa32802fe0df29a236f90c to your computer and use it in GitHub Desktop.
Save nsivabalan/596abfd701aa32802fe0df29a236f90c to your computer and use it in GitHub Desktop.
22/03/04 16:15:39 ERROR DagScheduler: Exception executing node
org.apache.hudi.exception.HoodieClusteringException: unable to transition clustering inflight to complete: 20220304161518565
at org.apache.hudi.client.SparkRDDWriteClient.completeClustering(SparkRDDWriteClient.java:394)
at org.apache.hudi.client.SparkRDDWriteClient.completeTableService(SparkRDDWriteClient.java:473)
at org.apache.hudi.client.SparkRDDWriteClient.cluster(SparkRDDWriteClient.java:360)
at org.apache.hudi.client.BaseHoodieWriteClient.lambda$inlineClustering$15(BaseHoodieWriteClient.java:1196)
at org.apache.hudi.common.util.Option.ifPresent(Option.java:96)
at org.apache.hudi.client.BaseHoodieWriteClient.inlineClustering(BaseHoodieWriteClient.java:1194)
at org.apache.hudi.client.BaseHoodieWriteClient.runTableServicesInline(BaseHoodieWriteClient.java:502)
at org.apache.hudi.client.BaseHoodieWriteClient.commitStats(BaseHoodieWriteClient.java:211)
at org.apache.hudi.client.SparkRDDWriteClient.commit(SparkRDDWriteClient.java:124)
at org.apache.hudi.client.SparkRDDWriteClient.commit(SparkRDDWriteClient.java:74)
at org.apache.hudi.client.BaseHoodieWriteClient.commit(BaseHoodieWriteClient.java:173)
at org.apache.hudi.integ.testsuite.HoodieTestSuiteWriter.commit(HoodieTestSuiteWriter.java:267)
at org.apache.hudi.integ.testsuite.dag.nodes.InsertNode.execute(InsertNode.java:54)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.executeNode(DagScheduler.java:139)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.lambda$execute$0(DagScheduler.java:105)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.spark.sql.AnalysisException: Unable to infer schema for Parquet. It must be specified manually.;
at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$7.apply(DataSource.scala:185)
at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$7.apply(DataSource.scala:185)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.execution.datasources.DataSource.getOrInferFileFormatSchema(DataSource.scala:184)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:373)
at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:223)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:211)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:178)
at org.apache.hudi.index.columnstats.ColumnStatsIndexHelper.updateColumnStatsIndexFor(ColumnStatsIndexHelper.java:315)
at org.apache.hudi.table.HoodieSparkCopyOnWriteTable.updateColumnsStatsIndex(HoodieSparkCopyOnWriteTable.java:219)
at org.apache.hudi.table.HoodieSparkCopyOnWriteTable.updateMetadataIndexes(HoodieSparkCopyOnWriteTable.java:177)
at org.apache.hudi.client.SparkRDDWriteClient.completeClustering(SparkRDDWriteClient.java:386)
... 19 more
22/03/04 16:15:39 INFO DagScheduler: Forcing shutdown of executor service, this might kill running tasks
22/03/04 16:15:39 ERROR HoodieTestSuiteJob: Failed to run Test Suite
java.util.concurrent.ExecutionException: org.apache.hudi.exception.HoodieException: org.apache.hudi.exception.HoodieClusteringException: unable to transition clustering inflight to complete: 20220304161518565
at java.util.concurrent.FutureTask.report(FutureTask.java:122)
at java.util.concurrent.FutureTask.get(FutureTask.java:206)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.execute(DagScheduler.java:113)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.schedule(DagScheduler.java:68)
at org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.runTestSuite(HoodieTestSuiteJob.java:203)
at org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.main(HoodieTestSuiteJob.java:170)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: org.apache.hudi.exception.HoodieException: org.apache.hudi.exception.HoodieClusteringException: unable to transition clustering inflight to complete: 20220304161518565
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.executeNode(DagScheduler.java:146)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.lambda$execute$0(DagScheduler.java:105)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.hudi.exception.HoodieClusteringException: unable to transition clustering inflight to complete: 20220304161518565
at org.apache.hudi.client.SparkRDDWriteClient.completeClustering(SparkRDDWriteClient.java:394)
at org.apache.hudi.client.SparkRDDWriteClient.completeTableService(SparkRDDWriteClient.java:473)
at org.apache.hudi.client.SparkRDDWriteClient.cluster(SparkRDDWriteClient.java:360)
at org.apache.hudi.client.BaseHoodieWriteClient.lambda$inlineClustering$15(BaseHoodieWriteClient.java:1196)
at org.apache.hudi.common.util.Option.ifPresent(Option.java:96)
at org.apache.hudi.client.BaseHoodieWriteClient.inlineClustering(BaseHoodieWriteClient.java:1194)
at org.apache.hudi.client.BaseHoodieWriteClient.runTableServicesInline(BaseHoodieWriteClient.java:502)
at org.apache.hudi.client.BaseHoodieWriteClient.commitStats(BaseHoodieWriteClient.java:211)
at org.apache.hudi.client.SparkRDDWriteClient.commit(SparkRDDWriteClient.java:124)
at org.apache.hudi.client.SparkRDDWriteClient.commit(SparkRDDWriteClient.java:74)
at org.apache.hudi.client.BaseHoodieWriteClient.commit(BaseHoodieWriteClient.java:173)
at org.apache.hudi.integ.testsuite.HoodieTestSuiteWriter.commit(HoodieTestSuiteWriter.java:267)
at org.apache.hudi.integ.testsuite.dag.nodes.InsertNode.execute(InsertNode.java:54)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.executeNode(DagScheduler.java:139)
... 6 more
Caused by: org.apache.spark.sql.AnalysisException: Unable to infer schema for Parquet. It must be specified manually.;
at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$7.apply(DataSource.scala:185)
at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$7.apply(DataSource.scala:185)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.execution.datasources.DataSource.getOrInferFileFormatSchema(DataSource.scala:184)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:373)
at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:223)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:211)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:178)
at org.apache.hudi.index.columnstats.ColumnStatsIndexHelper.updateColumnStatsIndexFor(ColumnStatsIndexHelper.java:315)
at org.apache.hudi.table.HoodieSparkCopyOnWriteTable.updateColumnsStatsIndex(HoodieSparkCopyOnWriteTable.java:219)
at org.apache.hudi.table.HoodieSparkCopyOnWriteTable.updateMetadataIndexes(HoodieSparkCopyOnWriteTable.java:177)
at org.apache.hudi.client.SparkRDDWriteClient.completeClustering(SparkRDDWriteClient.java:386)
... 19 more
Exception in thread "main" org.apache.hudi.exception.HoodieException: Failed to run Test Suite
at org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.runTestSuite(HoodieTestSuiteJob.java:208)
at org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.main(HoodieTestSuiteJob.java:170)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.util.concurrent.ExecutionException: org.apache.hudi.exception.HoodieException: org.apache.hudi.exception.HoodieClusteringException: unable to transition clustering inflight to complete: 20220304161518565
at java.util.concurrent.FutureTask.report(FutureTask.java:122)
at java.util.concurrent.FutureTask.get(FutureTask.java:206)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.execute(DagScheduler.java:113)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.schedule(DagScheduler.java:68)
at org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.runTestSuite(HoodieTestSuiteJob.java:203)
... 13 more
Caused by: org.apache.hudi.exception.HoodieException: org.apache.hudi.exception.HoodieClusteringException: unable to transition clustering inflight to complete: 20220304161518565
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment