Last active
February 9, 2022 23:04
-
-
Save nsivabalan/2bda3e9f3335cefd44c004936b446c3e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| select * from hudi_mor3_rt; | |
| +-----------------------------------+------------------------------------+----------------------------------+--------------------------------------+-------------------------------------------------------------------------------+--------------------+-----------------------+-----------------------+------------------+-----------------------------+--+ | |
| | hudi_mor3_rt._hoodie_commit_time | hudi_mor3_rt._hoodie_commit_seqno | hudi_mor3_rt._hoodie_record_key | hudi_mor3_rt._hoodie_partition_path | hudi_mor3_rt._hoodie_file_name | hudi_mor3_rt.uuid | hudi_mor3_rt.array_1 | hudi_mor3_rt.array_2 | hudi_mor3_rt.ts | hudi_mor3_rt.partitionpath | | |
| +-----------------------------------+------------------------------------+----------------------------------+--------------------------------------+-------------------------------------------------------------------------------+--------------------+-----------------------+-----------------------+------------------+-----------------------------+--+ | |
| | 20220209224904138 | 20220209224904138_0_1 | 82 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0 | 82 | [] | [] | 82 | partition | | |
| | 20220209224851153 | 20220209224851153_0_602 | 9 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0_0-235-14661_20220209224851153.parquet | 9 | [] | [] | 9 | partition | | |
| | 20220209224904138 | 20220209224904138_0_2 | 66 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0 | 66 | [] | [] | 66 | partition | | |
| | 20220209224851153 | 20220209224851153_0_604 | 21 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0_0-235-14661_20220209224851153.parquet | 21 | [] | [] | 21 | partition | | |
| | 20220209224904138 | 20220209224904138_0_3 | 71 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0 | 71 | [] | [] | 71 | partition | | |
| | 20220209224904138 | 20220209224904138_0_4 | 55 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0 | 55 | [] | [] | 55 | partition | | |
| | 20220209224851153 | 20220209224851153_0_607 | 10 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0_0-235-14661_20220209224851153.parquet | 10 | [] | [] | 10 | partition | | |
| | 20220209224851153 | 20220209224851153_0_608 | 39 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0_0-235-14661_20220209224851153.parquet | 39 | [] | [] | 39 | partition | | |
| | 20220209224851153 | 20220209224851153_0_609 | 3 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0_0-235-14661_20220209224851153.parquet | 3 | [] | [] | 3 | partition | | |
| | 20220209224904138 | 20220209224904138_0_5 | 60 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0 | 60 | [] | [] | 60 | partition | | |
| | 20220209224904138 | 20220209224904138_0_6 | 89 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0 | 89 | [] | [] | 89 | partition | | |
| | 20220209224851153 | 20220209224851153_0_612 | 44 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0_0-235-14661_20220209224851153.parquet | 44 | [] | [] | 44 | partition | | |
| | 20220209224851153 | 20220209224851153_0_613 | 28 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0_0-235-14661_20220209224851153.parquet | 28 | [] | [] | 28 | partition | | |
| | 20220209224904138 | 20220209224904138_0_7 | 94 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0 | 94 | [] | [] | 94 | partition | | |
| | 20220209224904138 | 20220209224904138_0_8 | 78 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0 | 78 | [] | [] | 78 | partition | | |
| | 20220209224851153 | 20220209224851153_0_616 | 33 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0_0-235-14661_20220209224851153.parquet | 33 | [] | [] | 33 | partition | | |
| | 20220209224851153 | 20220209224851153_0_617 | 17 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0_0-235-14661_20220209224851153.parquet | 17 | [] | [] | 17 | partition | | |
| | 20220209224904138 | 20220209224904138_0_9 | 83 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0 | 83 | [] | [] | 83 | partition | | |
| | 20220209224904138 | 20220209224904138_0_10 | 67 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0 | 67 | [] | [] | 67 | partition | | |
| | 20220209224851153 | 20220209224851153_0_620 | 22 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0_0-235-14661_20220209224851153.parquet | 22 | [] | [] | 22 | partition | | |
| | 20220209224904138 | 20220209224904138_0_11 | 72 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0 | 72 | [] | [] | 72 | partition | | |
| | 20220209224904138 | 20220209224904138_0_12 | 56 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0 | 56 | [] | [] | 56 | partition | | |
| | 20220209224851153 | 20220209224851153_0_623 | 11 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0_0-235-14661_20220209224851153.parquet | 11 | [] | [] | 11 | partition | | |
| | 20220209224851153 | 20220209224851153_0_624 | 4 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0_0-235-14661_20220209224851153.parquet | 4 | [] | [] | 4 | partition | | |
| | 20220209224904138 | 20220209224904138_0_13 | 61 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0 | 61 | [] | [] | 61 | partition | | |
| | 20220209224851153 | 20220209224851153_0_626 | 45 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0_0-235-14661_20220209224851153.parquet | 45 | [] | [] | 45 | partition | | |
| | 20220209224851153 | 20220209224851153_0_627 | 29 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0_0-235-14661_20220209224851153.parquet | 29 | [] | [] | 29 | partition | | |
| | 20220209224904138 | 20220209224904138_0_14 | 95 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0 | 95 | [] | [] | 95 | partition | | |
| | 20220209224904138 | 20220209224904138_0_15 | 50 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| select * from hudi_mor3_rt; | |
| +-----------------------------------+------------------------------------+----------------------------------+--------------------------------------+-------------------------------------------------------------------------------+--------------------+-----------------------+-----------------------+------------------+-----------------------------+--+ | |
| | hudi_mor3_rt._hoodie_commit_time | hudi_mor3_rt._hoodie_commit_seqno | hudi_mor3_rt._hoodie_record_key | hudi_mor3_rt._hoodie_partition_path | hudi_mor3_rt._hoodie_file_name | hudi_mor3_rt.uuid | hudi_mor3_rt.array_1 | hudi_mor3_rt.array_2 | hudi_mor3_rt.ts | hudi_mor3_rt.partitionpath | | |
| +-----------------------------------+------------------------------------+----------------------------------+--------------------------------------+-------------------------------------------------------------------------------+--------------------+-----------------------+-----------------------+------------------+-----------------------------+--+ | |
| | 20220209224904138 | 20220209224904138_0_1 | 82 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0 | 82 | [] | [] | 82 | partition | | |
| | 20220209224851153 | 20220209224851153_0_602 | 9 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0_0-235-14661_20220209224851153.parquet | 9 | [] | [] | 9 | partition | | |
| | 20220209224904138 | 20220209224904138_0_2 | 66 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0 | 66 | [] | [] | 66 | partition | | |
| | 20220209224851153 | 20220209224851153_0_604 | 21 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0_0-235-14661_20220209224851153.parquet | 21 | [] | [] | 21 | partition | | |
| | 20220209224904138 | 20220209224904138_0_3 | 71 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0 | 71 | [] | [] | 71 | partition | | |
| | 20220209224904138 | 20220209224904138_0_4 | 55 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0 | 55 | [] | [] | 55 | partition | | |
| | 20220209224851153 | 20220209224851153_0_607 | 10 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0_0-235-14661_20220209224851153.parquet | 10 | [] | [] | 10 | partition | | |
| | 20220209224851153 | 20220209224851153_0_608 | 39 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0_0-235-14661_20220209224851153.parquet | 39 | [] | [] | 39 | partition | | |
| | 20220209224851153 | 20220209224851153_0_609 | 3 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0_0-235-14661_20220209224851153.parquet | 3 | [] | [] | 3 | partition | | |
| | 20220209224904138 | 20220209224904138_0_5 | 60 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0 | 60 | [] | [] | 60 | partition | | |
| | 20220209224904138 | 20220209224904138_0_6 | 89 | partition | 19a65467-4945-444b-8ec1-4d168fa2a8a2-0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| select * from hudi_mor4_rt; | |
| Query 20220209_230102_00002_xn9dw, FAILED, 1 node | |
| Splits: 17 total, 0 done (0.00%) | |
| 0:07 [0 rows, 0B] [0 rows/s, 0B/s] | |
| Query 20220209_230102_00002_xn9dw failed: readDirect unsupported in RemoteBlockReader | |
| select * from hudi_mor4_ro; | |
| _hoodie_commit_time | _hoodie_commit_seqno | _hoodie_record_key | _hoodie_partition_path | _h | |
| ---------------------+-------------------------+--------------------+------------------------+------------------------------- | |
| 20220209225514907 | 20220209225514907_0_1 | 82 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_2 | 9 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_3 | 66 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_4 | 21 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_5 | 71 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_6 | 55 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_7 | 10 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_8 | 39 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_9 | 3 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_10 | 60 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_11 | 89 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_12 | 44 | partition | 3030dd9e-640d-4bc3-8d3c-612186 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def gen_data(start, stop): | |
| return [ | |
| { | |
| "uuid": str(i), | |
| "partitionpath": "partition", | |
| "array_1": [], # array does not need to be populated to reproduce issue | |
| "array_2": [], # two arrays need to be defined in the schema to reproduce | |
| "ts": str(i), | |
| } | |
| for i in range(start, stop) | |
| ] | |
| from pyspark.sql.types import StructType, StructField, StringType, ArrayType | |
| schema = StructType( | |
| [ | |
| StructField("uuid", StringType(), True), | |
| StructField("partitionpath", StringType(), True), | |
| StructField("array_1", ArrayType( | |
| StructType( | |
| [ | |
| StructField("field_1", StringType(), True), | |
| StructField("field_2", StringType(), True), | |
| ] | |
| ) | |
| ), True), | |
| StructField("array_2", ArrayType( | |
| StructType( | |
| [ | |
| StructField("field_3", StringType(), True), | |
| StructField("field_4", StringType(), True), | |
| ] | |
| ) | |
| ), True), | |
| StructField("ts", StringType(), True) | |
| ] | |
| ) | |
| destination = "/user/hive/warehouse/hudi_mor3" | |
| hudi_write_options = { | |
| "hoodie.table.name": "hudi_mor3", | |
| "hoodie.datasource.write.operation": "upsert", | |
| "hoodie.datasource.write.table.name": "hudi_mor3", | |
| "hoodie.datasource.write.table.type": "MERGE_ON_READ", | |
| "hoodie.datasource.write.partitionpath.field" : "partitionpath", | |
| "hoodie.datasource.write.recordkey.field" : "uuid", | |
| "hoodie.datasource.hive_sync.enable": True, | |
| "hoodie.datasource.hive_sync.jdbcurl": "jdbc:hive2://hiveserver:10000", | |
| "hoodie.datasource.hive_sync.database": "default", | |
| "hoodie.datasource.hive_sync.table": "hudi_mor3", | |
| "hoodie.datasource.hive_sync.partition_fields": "partitionpath", | |
| "hoodie.datasource.hive_sync.partition_extractor_class": "org.apache.hudi.hive.MultiPartKeysValueExtractor" | |
| } | |
| df = spark.read.json(spark.sparkContext.parallelize(gen_data(0, 100)), schema) | |
| df.write.format("hudi").options(**hudi_write_options).mode("overwrite").save(destination) | |
| df = spark.read.json(spark.sparkContext.parallelize(gen_data(50, 100)), schema) | |
| df.write.format("hudi").options(**hudi_write_options).mode("append").save(destination) | |
| spark.sql("select * from default.hudi_mor3_rt").show() | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| spark.sql("select * from default.hudi_mor3_rt").show() | |
| +-------------------+--------------------+------------------+----------------------+--------------------+----+-------------+-------+-------+---+ | |
| |_hoodie_commit_time|_hoodie_commit_seqno|_hoodie_record_key|_hoodie_partition_path| _hoodie_file_name|uuid|partitionpath|array_1|array_2| ts| | |
| +-------------------+--------------------+------------------+----------------------+--------------------+----+-------------+-------+-------+---+ | |
| | 20220209224904138|20220209224904138...| 82| partition|19a65467-4945-444...| 82| partition| []| []| 82| | |
| | 20220209224851153|20220209224851153...| 9| partition|19a65467-4945-444...| 9| partition| []| []| 9| | |
| | 20220209224904138|20220209224904138...| 66| partition|19a65467-4945-444...| 66| partition| []| []| 66| | |
| | 20220209224851153|20220209224851153...| 21| partition|19a65467-4945-444...| 21| partition| []| []| 21| | |
| | 20220209224904138|20220209224904138...| 71| partition|19a65467-4945-444...| 71| partition| []| []| 71| | |
| | 20220209224904138|20220209224904138...| 55| partition|19a65467-4945-444...| 55| partition| []| []| 55| | |
| | 20220209224851153|20220209224851153...| 10| partition|19a65467-4945-444...| 10| partition| []| []| 10| | |
| | 20220209224851153|20220209224851153...| 39| partition|19a65467-4945-444...| 39| partition| []| []| 39| | |
| | 20220209224851153|20220209224851153...| 3| partition|19a65467-4945-444...| 3| partition| []| []| 3| | |
| | 20220209224904138|20220209224904138...| 60| partition|19a65467-4945-444...| 60| partition| []| []| 60| | |
| | 20220209224904138|20220209224904138...| 89| partition|19a65467-4945-444...| 89| partition| []| []| 89| | |
| | 20220209224851153|20220209224851153...| 44| partition|19a65467-4945-444...| 44| partition| []| []| 44| | |
| | 20220209224851153|20220209224851153...| 28| partition|19a65467-4945-444...| 28| partition| []| []| 28| | |
| | 20220209224904138|20220209224904138...| 94| partition|19a65467-4945-444...| 94| partition| []| []| 94| | |
| | 20220209224904138|20220209224904138...| 78| partition|19a65467-4945-444...| 78| partition| []| []| 78| | |
| | 20220209224851153|20220209224851153...| 33| partition|19a65467-4945-444...| 33| partition| []| []| 33| | |
| | 20220209224851153|20220209224851153...| 17| partition|19a65467-4945-444...| 17| partition| []| []| 17| | |
| | 20220209224904138|20220209224904138...| 83| partition|19a65467-4945-444...| 83| partition| []| []| 83| | |
| | 20220209224904138|20220209224904138...| 67| partition|19a65467-4945-444...| 67| partition| []| []| 67| | |
| | 20220209224851153|20220209224851153...| 22| partition|19a65467-4945-444...| 22| partition| []| []| 22| | |
| +-------------------+--------------------+------------------+----------------------+--------------------+----+-------------+-------+-------+---+ | |
| only showing top 20 rows |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| trino:default> select * from hudi_mor4_ro; | |
| _hoodie_commit_time | _hoodie_commit_seqno | _hoodie_record_key | _hoodie_partition_path | _h | |
| ---------------------+-------------------------+--------------------+------------------------+------------------------------- | |
| 20220209225514907 | 20220209225514907_0_1 | 82 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_2 | 9 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_3 | 66 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_4 | 21 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_5 | 71 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_6 | 55 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_7 | 10 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_16 | 33 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_17 | 17 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_18 | 83 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_19 | 67 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_20 | 22 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_21 | 72 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| . | |
| . | |
| . | |
| trino:default> select * from hudi_mor4_rt; | |
| _hoodie_commit_time | _hoodie_commit_seqno | _hoodie_record_key | _hoodie_partition_path | _h | |
| ---------------------+-------------------------+--------------------+------------------------+------------------------------- | |
| 20220209225514907 | 20220209225514907_0_1 | 82 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_64 | 91 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_65 | 75 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_66 | 30 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_67 | 59 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_68 | 14 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_69 | 80 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_70 | 7 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_71 | 64 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_72 | 48 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_73 | 98 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_74 | 53 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_75 | 37 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
| 20220209225514907 | 20220209225514907_0_76 | 1 | partition | 3030dd9e-640d-4bc3-8d3c-612186 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment