Skip to content

Instantly share code, notes, and snippets.

@nsivabalan
Last active September 3, 2020 13:13
Show Gist options
  • Save nsivabalan/e78b0fbd193b18f1b5b43209bd6bee08 to your computer and use it in GitHub Desktop.
Save nsivabalan/e78b0fbd193b18f1b5b43209bd6bee08 to your computer and use it in GitHub Desktop.
first_insert:
config:
record_size: 10000
num_insert_partitions: 1
repeat_count: 1
num_records_insert: 1000
type: InsertNode
deps: none
first_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: first_insert
first_hive_query:
config:
hive_props:
prop2: "set spark.yarn.queue="
prop3: "set hive.strict.checks.large.query=false"
prop4: "set hive.stats.autogather=false"
hive_queries:
query1: "select count(*) from testdb.table1 group by `_row_key` having count(*) > 1"
result1: 0
query2: "select count(*) from testdb.table1"
result2: 1000
type: HiveQueryNode
deps: first_hive_sync
second_insert:
config:
record_size: 10000
num_insert_partitions: 1
repeat_count: 1
num_records_insert: 10000
deps: first_hive_query
type: InsertNode
second_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: second_insert
second_hive_query:
config:
hive_props:
prop2: "set spark.yarn.queue="
prop3: "set hive.strict.checks.large.query=false"
prop4: "set hive.stats.autogather=false"
hive_queries:
query1: "select count(*) from testdb.table1 group by `_row_key` having count(*) > 1"
result1: 0
query2: "select count(*) from testdb.table1"
result2: 11000
type: HiveQueryNode
deps: second_hive_sync
third_insert:
config:
record_size: 10000
num_insert_partitions: 1
repeat_count: 1
num_records_insert: 300
deps: second_hive_query
type: InsertNode
third_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: third_insert
third_hive_query:
config:
hive_props:
prop2: "set spark.yarn.queue="
prop3: "set hive.strict.checks.large.query=false"
prop4: "set hive.stats.autogather=false"
hive_queries:
query1: "select count(*) from testdb.table1 group by `_row_key` having count(*) > 1"
result1: 0
query2: "select count(*) from testdb.table1"
result2: 11300
type: HiveQueryNode
deps: third_hive_sync
first_rollback:
config:
deps: third_hive_query
type: RollbackNode
fourth_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: first_rollback
fourth_hive_query:
config:
hive_props:
prop2: "set spark.yarn.queue="
prop3: "set hive.strict.checks.large.query=false"
prop4: "set hive.stats.autogather=false"
hive_queries:
query1: "select count(*) from testdb.table1 group by `_row_key` having count(*) > 1"
result1: 0
query2: "select count(*) from testdb.table1"
result2: 11000
type: HiveQueryNode
deps: fourth_hive_sync
first_upsert:
config:
record_size: 10000
repeat_count: 1
num_records_upsert: 100
num_upsert_partitions: 1
type: UpsertNode
deps: fourth_hive_query
fifth_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: first_upsert
fifth_hive_query:
config:
hive_props:
prop2: "set spark.yarn.queue="
prop3: "set hive.strict.checks.large.query=false"
prop4: "set hive.stats.autogather=false"
hive_queries:
query1: "select count(*) from testdb.table1 group by `_row_key` having count(*) > 1"
result1: 0
type: HiveQueryNode
deps: fifth_hive_sync
fifth_hive_query2:
config:
hive_props:
prop2: "set spark.yarn.queue="
prop3: "set hive.strict.checks.large.query=false"
prop4: "set hive.stats.autogather=false"
hive_queries:
query1: "select count(*) from testdb.table1"
result2: 11000
type: HiveQueryNode
deps: fifth_hive_query
20/09/03 12:58:07 WARN DagScheduler: executing node: 58edece7-f297-4721-a631-df35b6d92193 of type: class org.apache.hudi.integ.testsuite.dag.nodes.HiveQueryNode {"hive_props":["set spark.yarn.queue=","set hive.strict.checks.large.query=false","set hive.stats.autogather=false"],"name":"58edece7-f297-4721-a631-df35b6d92193","hive_queries":[{"select count(*) from testdb.table1 group by `_row_key` having count(*) > 1":0},{"select count(*) from testdb.table1":11000}]}
20/09/03 12:58:13 WARN DagScheduler: executing node: 0b495579-6ca0-41b7-ad9d-84bf36f953f2 of type: class org.apache.hudi.integ.testsuite.dag.nodes.UpsertNode {"num_upsert_partitions":1,"name":"0b495579-6ca0-41b7-ad9d-84bf36f953f2","record_size":10000,"repeat_count":1,"num_records_upsert":100}
20/09/03 12:58:23 WARN GenericRecordFullPayloadGenerator: The schema does not have any collections/complex fields. Cannot achieve minPayloadSize : 10000
20/09/03 12:58:23 WARN GenericRecordFullPayloadGenerator: The schema does not have any collections/complex fields. Cannot achieve minPayloadSize : 10000
20/09/03 12:58:23 WARN GenericRecordFullPayloadGenerator: The schema does not have any collections/complex fields. Cannot achieve minPayloadSize : 10000
20/09/03 12:58:23 WARN AvroKeyInputFormat: Reader schema was not set. Use AvroJob.setInputKeySchema() if desired.
20/09/03 12:58:23 WARN AvroKeyInputFormat: Reader schema was not set. Use AvroJob.setInputKeySchema() if desired.
20/09/03 12:58:24 WARN AvroKeyInputFormat: Reader schema was not set. Use AvroJob.setInputKeySchema() if desired.
20/09/03 12:58:24 WARN AvroKeyInputFormat: Reader schema was not set. Use AvroJob.setInputKeySchema() if desired.
20/09/03 12:58:24 WARN AvroKeyInputFormat: Reader schema was not set. Use AvroJob.setInputKeySchema() if desired.
20/09/03 12:58:24 WARN AvroKeyInputFormat: Reader schema was not set. Use AvroJob.setInputKeySchema() if desired.
20/09/03 12:58:24 WARN AvroKeyInputFormat: Reader schema was not set. Use AvroJob.setInputKeySchema() if desired.
20/09/03 12:58:24 WARN AvroKeyInputFormat: Reader schema was not set. Use AvroJob.setInputKeySchema() if desired.
20/09/03 12:58:24 WARN AvroKeyInputFormat: Reader schema was not set. Use AvroJob.setInputKeySchema() if desired.
20/09/03 12:58:24 WARN AvroKeyInputFormat: Reader schema was not set. Use AvroJob.setInputKeySchema() if desired.
20/09/03 13:01:39 WARN DagScheduler: executing node: 2c33c359-c0aa-4715-b921-11b78bfd03dd of type: class org.apache.hudi.integ.testsuite.dag.nodes.HiveSyncNode {"queue_name":"adhoc","engine":"mr","name":"2c33c359-c0aa-4715-b921-11b78bfd03dd"}
20/09/03 13:01:41 WARN DagScheduler: executing node: 27b0376b-3dca-4239-8e93-c08227d1f292 of type: class org.apache.hudi.integ.testsuite.dag.nodes.HiveQueryNode {"hive_props":["set spark.yarn.queue=","set hive.strict.checks.large.query=false","set hive.stats.autogather=false"],"name":"27b0376b-3dca-4239-8e93-c08227d1f292","hive_queries":[{"select count(*) from testdb.table1 group by `_row_key` having count(*) > 1":0}]}
20/09/03 13:01:45 WARN DagScheduler: executing node: 56c0c50e-e39e-4821-8333-af28a7936bef of type: class org.apache.hudi.integ.testsuite.dag.nodes.HiveQueryNode {"hive_props":["set spark.yarn.queue=","set hive.strict.checks.large.query=false","set hive.stats.autogather=false"],"name":"56c0c50e-e39e-4821-8333-af28a7936bef","hive_queries":[{"select count(*) from testdb.table1":11000}]}
20/09/03 13:01:50 ERROR HoodieTestSuiteJob: Failed to run Test Suite
java.util.concurrent.ExecutionException: java.lang.AssertionError: QUERY: select count(*) from testdb.table1 | EXPECTED RESULT = 11000 | ACTUAL RESULT = 11300
at java.util.concurrent.FutureTask.report(FutureTask.java:122)
at java.util.concurrent.FutureTask.get(FutureTask.java:206)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.execute(DagScheduler.java:97)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.schedule(DagScheduler.java:63)
at org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.runTestSuite(HoodieTestSuiteJob.java:141)
at org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.main(HoodieTestSuiteJob.java:124)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.AssertionError: QUERY: select count(*) from testdb.table1 | EXPECTED RESULT = 11000 | ACTUAL RESULT = 11300
at org.apache.hudi.integ.testsuite.dag.nodes.HiveQueryNode.execute(HiveQueryNode.java:75)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.executeNode(DagScheduler.java:114)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.lambda$execute$0(DagScheduler.java:89)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Exception in thread "main" org.apache.hudi.exception.HoodieException: Failed to run Test Suite
at org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.runTestSuite(HoodieTestSuiteJob.java:145)
at org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.main(HoodieTestSuiteJob.java:124)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.util.concurrent.ExecutionException: java.lang.AssertionError: QUERY: select count(*) from testdb.table1 | EXPECTED RESULT = 11000 | ACTUAL RESULT = 11300
at java.util.concurrent.FutureTask.report(FutureTask.java:122)
at java.util.concurrent.FutureTask.get(FutureTask.java:206)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.execute(DagScheduler.java:97)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.schedule(DagScheduler.java:63)
at org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.runTestSuite(HoodieTestSuiteJob.java:141)
... 13 more
Caused by: java.lang.AssertionError: QUERY: select count(*) from testdb.table1 | EXPECTED RESULT = 11000 | ACTUAL RESULT = 11300
at org.apache.hudi.integ.testsuite.dag.nodes.HiveQueryNode.execute(HiveQueryNode.java:75)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.executeNode(DagScheduler.java:114)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.lambda$execute$0(DagScheduler.java:89)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment