Skip to content

Instantly share code, notes, and snippets.

@nsivabalan
Created September 14, 2020 00:04
Show Gist options
  • Save nsivabalan/4c31e54ebf382ff5be187b81495c73a4 to your computer and use it in GitHub Desktop.
Save nsivabalan/4c31e54ebf382ff5be187b81495c73a4 to your computer and use it in GitHub Desktop.
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
first_insert:
config:
record_size: 70000
num_insert_partitions: 1
repeat_count: 5
num_records_insert: 100
type: InsertNode
deps: none
second_insert:
config:
record_size: 70000
num_insert_partitions: 1
repeat_count: 5
num_records_insert: 100
deps: first_insert
type: InsertNode
third_insert:
config:
record_size: 70000
num_insert_partitions: 1
repeat_count: 2
num_records_insert: 300
deps: second_insert
type: InsertNode
first_rollback:
config:
deps: third_insert
type: RollbackNode
first_upsert:
config:
record_size: 70000
num_insert_partitions: 1
num_records_insert: 300
repeat_count: 5
num_records_upsert: 100
num_upsert_partitions: 10
type: UpsertNode
deps: first_rollback
first_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: first_upsert
first_hive_query:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveQueryNode
deps: first_hive_sync
second_upsert:
config:
record_size: 70000
num_insert_partitions: 1
num_records_insert: 300
repeat_count: 5
num_records_upsert: 100
num_upsert_partitions: 10
type: UpsertNode
deps: first_hive_query
second_hive_query:
config:
queue_name: "adhoc"
engine: "mr"
hive_queries:
query1: "select count(*) from testdb.table1 group by `_row_key` having count(*) > 1"
result1: 0
query2: "select count(*) from testdb.table1"
result2: 3100
query3: "select count(*) from testdb.table1_rt group by `_row_key` having count(*) > 1"
result3: 0
query4: "select count(*) from testdb.table1_rt"
result4: 3100
type: HiveQueryNode
deps: second_upsert
first_schedule_compact:
config:
type: ScheduleCompactNode
deps: second_hive_query
third_upsert:
config:
record_size: 70000
num_insert_partitions: 1
num_records_insert: 300
repeat_count: 5
num_records_upsert: 100
num_upsert_partitions: 10
type: UpsertNode
deps: first_schedule_compact
first_compact:
config:
type: CompactNode
deps: first_schedule_compact
third_hive_query:
config:
queue_name: "adhoc"
engine: "mr"
hive_queries:
query1: "select count(*) from testdb.table1 group by `_row_key` having count(*) > 1"
result1: 0
query2: "select count(*) from testdb.table1"
result2: 2210
type: HiveQueryNode
deps: second_upsert
20/09/14 00:04:12 WARN DagScheduler: executing node: 3a02eb61-3806-42e2-847e-8cd2cae26200 of type: class org.apache.hudi.integ.testsuite.dag.nodes.HiveQueryNode {"queue_name":"adhoc","engine":"mr","name":"3a02eb61-3806-42e2-847e-8cd2cae26200","hive_queries":[{"select count(*) from testdb.table1 group by `_row_key` having count(*) > 1":0},{"select count(*) from testdb.table1":3100},{"select count(*) from testdb.table1_rt group by `_row_key` having count(*) > 1":0},{"select count(*) from testdb.table1_rt":3100}]}
20/09/14 00:04:12 WARN DagScheduler: executing node: ac33aeb5-9ea0-4482-834b-d5093398c13d of type: class org.apache.hudi.integ.testsuite.dag.nodes.HiveQueryNode {"queue_name":"adhoc","engine":"mr","name":"ac33aeb5-9ea0-4482-834b-d5093398c13d","hive_queries":[{"select count(*) from testdb.table1 group by `_row_key` having count(*) > 1":0},{"select count(*) from testdb.table1":2210}]}
20/09/14 00:04:15 ERROR DagScheduler: Exception executing node
20/09/14 00:04:17 ERROR HoodieTestSuiteJob: Failed to run Test Suite
java.util.concurrent.ExecutionException: java.lang.AssertionError: QUERY: select count(*) from testdb.table1 | EXPECTED RESULT = 3100 | ACTUAL RESULT = 1100
at java.util.concurrent.FutureTask.report(FutureTask.java:122)
at java.util.concurrent.FutureTask.get(FutureTask.java:206)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.execute(DagScheduler.java:96)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.schedule(DagScheduler.java:62)
at org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.runTestSuite(HoodieTestSuiteJob.java:137)
at org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.main(HoodieTestSuiteJob.java:117)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.AssertionError: QUERY: select count(*) from testdb.table1 | EXPECTED RESULT = 3100 | ACTUAL RESULT = 1100
at org.apache.hudi.integ.testsuite.dag.nodes.HiveQueryNode.execute(HiveQueryNode.java:75)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.executeNode(DagScheduler.java:113)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.lambda$execute$0(DagScheduler.java:88)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Exception in thread "main" org.apache.hudi.exception.HoodieException: Failed to run Test Suite
at org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.runTestSuite(HoodieTestSuiteJob.java:141)
at org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.main(HoodieTestSuiteJob.java:117)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.util.concurrent.ExecutionException: java.lang.AssertionError: QUERY: select count(*) from testdb.table1 | EXPECTED RESULT = 3100 | ACTUAL RESULT = 1100
at java.util.concurrent.FutureTask.report(FutureTask.java:122)
at java.util.concurrent.FutureTask.get(FutureTask.java:206)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.execute(DagScheduler.java:96)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.schedule(DagScheduler.java:62)
at org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.runTestSuite(HoodieTestSuiteJob.java:137)
... 13 more
Caused by: java.lang.AssertionError: QUERY: select count(*) from testdb.table1 | EXPECTED RESULT = 3100 | ACTUAL RESULT = 1100
at org.apache.hudi.integ.testsuite.dag.nodes.HiveQueryNode.execute(HiveQueryNode.java:75)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.executeNode(DagScheduler.java:113)
at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.lambda$execute$0(DagScheduler.java:88)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment