Created
September 4, 2020 03:10
-
-
Save cadl/be433079747aeea88c9c1f45321cc2eb to your computer and use it in GitHub Desktop.
hudi-schema-compatibility-check-demo
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
val existedDf = spark.sql("select 1 as a, '' as b, 1 as __row_key, 0 as __row_version") | |
val testTable = "foo" | |
val testConf = Map( | |
"hoodie.table.name" -> testTable, | |
"hoodie.avro.schema.validate" -> "true", | |
"hoodie.datasource.write.recordkey.field" -> "__row_key", | |
"hoodie.datasource.write.table.name" -> testTable, | |
"hoodie.datasource.write.precombine.field" -> "__row_version", | |
"hoodie.datasource.write.partitionpath.field" -> "", | |
"hoodie.datasource.write.keygenerator.class" -> classOf[org.apache.hudi.keygen.NonpartitionedKeyGenerator].getName, | |
"hoodie.datasource.write.hive_style_partitioning" -> "true", | |
"hoodie.datasource.write.operation" -> "upsert" | |
) | |
existedDf.write.format("org.apache.hudi").options(testConf).mode("append").save(s"file:///jfs/cadl/hudi_data/schema/foo") | |
val newDf = spark.sql("select cast(1 as long) as a, '' as b, 1 as __row_key, 1 as __row_version") | |
newDf.write.format("org.apache.hudi").options(testConf).mode("append").save(s"file:///jfs/cadl/hudi_data/schema/foo") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
stacktrace | |
org.apache.hudi.exception.HoodieUpsertException: Failed upsert schema compatibility check. | |
at org.apache.hudi.table.HoodieTable.validateUpsertSchema(HoodieTable.java:572) | |
at org.apache.hudi.client.HoodieWriteClient.upsert(HoodieWriteClient.java:190) | |
at org.apache.hudi.DataSourceUtils.doWriteOperation(DataSourceUtils.java:260) | |
at org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:169) | |
at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:125) | |
at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) | |
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) | |
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80) | |
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80) | |
at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:676) | |
at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:676) | |
at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78) | |
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125) | |
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73) | |
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:676) | |
at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:285) | |
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:271) | |
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:229) | |
... 69 elided | |
Caused by: org.apache.hudi.exception.HoodieException: Failed schema compatibility check for writerSchema :{"type":"record","name":"foo_record","namespace":"hoodie.foo","fields":[{"name":"_hoodie_commit_time","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_commit_seqno","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_record_key","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_partition_path","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_file_name","type":["null","string"],"doc":"","default":null},{"name":"a","type":"long"},{"name":"b","type":"string"},{"name":"__row_key","type":"int"},{"name":"__row_version","type":"int"}]}, table schema :{"type":"record","name":"foo_record","namespace":"hoodie.foo","fields":[{"name":"_hoodie_commit_time","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_commit_seqno","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_record_key","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_partition_path","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_file_name","type":["null","string"],"doc":"","default":null},{"name":"a","type":"int"},{"name":"b","type":"string"},{"name":"__row_key","type":"int"},{"name":"__row_version","type":"int"}]}, base path :file:///jfs/cadl/hudi_data/schema/foo | |
at org.apache.hudi.table.HoodieTable.validateSchema(HoodieTable.java:564) | |
at org.apache.hudi.table.HoodieTable.validateUpsertSchema(HoodieTable.java:570) | |
... 94 more |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment