nsivabalan · September 9, 2021 20:29
diff --git a/spark sql nodes b/spark sql nodes
 21/09/09 20:25:13 WARN SparkSqlCreateTableNode: ----- Running the following Spark SQL query -----
 21/09/09 20:25:13 WARN SparkSqlCreateTableNode: create table table1 (timestamp bigint,
 _row_key string,
 rider string,
 driver string,
 begin_lat double,
 begin_lon double,
 end_lat double,
 end_lon double,
 fare double,
 _hoodie_is_deleted boolean,
 test_suite_source_ordering_field int
 ) using hudi
 options ( 
 type = 'cow',
 primaryKey = '_row_key',
 preCombineField = 'test_suite_source_ordering_field'
 )
 partitioned by (rider)
 21/09/09 20:25:13 WARN SparkSqlCreateTableNode: --------------------------------------------------
 21/09/09 20:25:14 ERROR DagScheduler: Exception executing node
 org.apache.hudi.exception.HoodieException: 'path' or 'Key: 'hoodie.datasource.read.paths' , default: null description: Comma separated list of file paths to read within a Hudi table. since version: version is not defined deprecated after: version is not defined)' or both must be specified.
 	at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:79)
 	at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:316)
 	at org.apache.spark.sql.execution.command.CreateDataSourceTableCommand.run(createDataSourceTables.scala:78)
 	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
 	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
 	at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
 	at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194)
 	at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194)
 	at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3370)
 	at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
 	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
 	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
 	at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3369)
 	at org.apache.spark.sql.Dataset.<init>(Dataset.scala:194)
 	at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:79)
 	at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642)
 	at org.apache.hudi.integ.testsuite.dag.nodes.spark.sql.SparkSqlCreateTableNode.execute(SparkSqlCreateTableNode.scala:73)
 	at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.executeNode(DagScheduler.java:139)
 	at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.lambda$execute$0(DagScheduler.java:105)
 	at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
 	at java.util.concurrent.FutureTask.run(FutureTask.java:266)
 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
 	at java.lang.Thread.run(Thread.java:748)
 21/09/09 20:25:14 ERROR HoodieTestSuiteJob: Failed to run Test Suite 
 java.util.concurrent.ExecutionException: org.apache.hudi.exception.HoodieException: org.apache.hudi.exception.HoodieException: 'path' or 'Key: 'hoodie.datasource.read.paths' , default: null description: Comma separated list of file paths to read within a Hudi table. since version: version is not defined deprecated after: version is not defined)' or both must be specified.
 	at java.util.concurrent.FutureTask.report(FutureTask.java:122)
 	at java.util.concurrent.FutureTask.get(FutureTask.java:206)
 	at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.execute(DagScheduler.java:113)
 	at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.schedule(DagScheduler.java:68)
 	at org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.runTestSuite(HoodieTestSuiteJob.java:203)
 	at org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.main(HoodieTestSuiteJob.java:170)
 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
 	at java.lang.reflect.Method.invoke(Method.java:498)
 	at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
 	at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845)
 	at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161)
 	at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)
 	at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
 	at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920)
 	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929)
 	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
 Caused by: org.apache.hudi.exception.HoodieException: org.apache.hudi.exception.HoodieException: 'path' or 'Key: 'hoodie.datasource.read.paths' , default: null description: Comma separated list of file paths to read within a Hudi table. since version: version is not defined deprecated after: version is not defined)' or both must be specified.
 	at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.executeNode(DagScheduler.java:146)
 	at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.lambda$execute$0(DagScheduler.java:105)
 	at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
 	at java.util.concurrent.FutureTask.run(FutureTask.java:266)
 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
 	at java.lang.Thread.run(Thread.java:748)
 Caused by: org.apache.hudi.exception.HoodieException: 'path' or 'Key: 'hoodie.datasource.read.paths' , default: null description: Comma separated list of file paths to read within a Hudi table. since version: version is not defined deprecated after: version is not defined)' or both must be specified.
 	at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:79)
 	at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:316)
 	at org.apache.spark.sql.execution.command.CreateDataSourceTableCommand.run(createDataSourceTables.scala:78)
 	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
 	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
 	at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
 	at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194)
 	at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194)
 	at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3370)
 	at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
 	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
 	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
 	at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3369)
 	at org.apache.spark.sql.Dataset.<init>(Dataset.scala:194)
 	at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:79)
 	at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642)
 	at org.apache.hudi.integ.testsuite.dag.nodes.spark.sql.SparkSqlCreateTableNode.execute(SparkSqlCreateTableNode.scala:73)
 	at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.executeNode(DagScheduler.java:139)
 	... 6 more
 Exception in thread "main" org.apache.hudi.exception.HoodieException: Failed to run Test Suite 
 	at org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.runTestSuite(HoodieTestSuiteJob.java:208)
 	at org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.main(HoodieTestSuiteJob.java:170)
 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
 	at java.lang.reflect.Method.invoke(Method.java:498)
 	at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
 	at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845)
 	at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161)
 	at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)
 	at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
 	at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920)
 	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929)
 	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
	21/09/09 20:25:13 WARN SparkSqlCreateTableNode: ----- Running the following Spark SQL query -----
	21/09/09 20:25:13 WARN SparkSqlCreateTableNode: create table table1 (timestamp bigint,
	_row_key string,
	rider string,
	driver string,
	begin_lat double,
	begin_lon double,
	end_lat double,
	end_lon double,
	fare double,
	_hoodie_is_deleted boolean,
	test_suite_source_ordering_field int
	) using hudi
	options (
	type = 'cow',
	primaryKey = '_row_key',
	preCombineField = 'test_suite_source_ordering_field'
	)
	partitioned by (rider)
	21/09/09 20:25:13 WARN SparkSqlCreateTableNode: --------------------------------------------------
	21/09/09 20:25:14 ERROR DagScheduler: Exception executing node
	org.apache.hudi.exception.HoodieException: 'path' or 'Key: 'hoodie.datasource.read.paths' , default: null description: Comma separated list of file paths to read within a Hudi table. since version: version is not defined deprecated after: version is not defined)' or both must be specified.
	at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:79)
	at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:316)
	at org.apache.spark.sql.execution.command.CreateDataSourceTableCommand.run(createDataSourceTables.scala:78)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
	at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194)
	at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194)
	at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3370)
	at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
	at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3369)
	at org.apache.spark.sql.Dataset.<init>(Dataset.scala:194)
	at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:79)
	at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642)
	at org.apache.hudi.integ.testsuite.dag.nodes.spark.sql.SparkSqlCreateTableNode.execute(SparkSqlCreateTableNode.scala:73)
	at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.executeNode(DagScheduler.java:139)
	at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.lambda$execute$0(DagScheduler.java:105)
	at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
	at java.util.concurrent.FutureTask.run(FutureTask.java:266)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)
	21/09/09 20:25:14 ERROR HoodieTestSuiteJob: Failed to run Test Suite
	java.util.concurrent.ExecutionException: org.apache.hudi.exception.HoodieException: org.apache.hudi.exception.HoodieException: 'path' or 'Key: 'hoodie.datasource.read.paths' , default: null description: Comma separated list of file paths to read within a Hudi table. since version: version is not defined deprecated after: version is not defined)' or both must be specified.
	at java.util.concurrent.FutureTask.report(FutureTask.java:122)
	at java.util.concurrent.FutureTask.get(FutureTask.java:206)
	at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.execute(DagScheduler.java:113)
	at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.schedule(DagScheduler.java:68)
	at org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.runTestSuite(HoodieTestSuiteJob.java:203)
	at org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.main(HoodieTestSuiteJob.java:170)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
	at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845)
	at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161)
	at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)
	at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
	at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920)
	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929)
	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
	Caused by: org.apache.hudi.exception.HoodieException: org.apache.hudi.exception.HoodieException: 'path' or 'Key: 'hoodie.datasource.read.paths' , default: null description: Comma separated list of file paths to read within a Hudi table. since version: version is not defined deprecated after: version is not defined)' or both must be specified.
	at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.executeNode(DagScheduler.java:146)
	at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.lambda$execute$0(DagScheduler.java:105)
	at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
	at java.util.concurrent.FutureTask.run(FutureTask.java:266)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)
	Caused by: org.apache.hudi.exception.HoodieException: 'path' or 'Key: 'hoodie.datasource.read.paths' , default: null description: Comma separated list of file paths to read within a Hudi table. since version: version is not defined deprecated after: version is not defined)' or both must be specified.
	at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:79)
	at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:316)
	at org.apache.spark.sql.execution.command.CreateDataSourceTableCommand.run(createDataSourceTables.scala:78)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
	at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194)
	at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194)
	at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3370)
	at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
	at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3369)
	at org.apache.spark.sql.Dataset.<init>(Dataset.scala:194)
	at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:79)
	at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642)
	at org.apache.hudi.integ.testsuite.dag.nodes.spark.sql.SparkSqlCreateTableNode.execute(SparkSqlCreateTableNode.scala:73)
	at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.executeNode(DagScheduler.java:139)
	... 6 more
	Exception in thread "main" org.apache.hudi.exception.HoodieException: Failed to run Test Suite
	at org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.runTestSuite(HoodieTestSuiteJob.java:208)
	at org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.main(HoodieTestSuiteJob.java:170)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
	at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845)
	at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161)
	at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)
	at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
	at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920)
	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929)
	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)