Skip to content

Instantly share code, notes, and snippets.

View nsivabalan's full-sized avatar

Sivabalan Narayanan nsivabalan

View GitHub Profile
22/03/04 16:15:39 ERROR DagScheduler: Exception executing node
org.apache.hudi.exception.HoodieClusteringException: unable to transition clustering inflight to complete: 20220304161518565
at org.apache.hudi.client.SparkRDDWriteClient.completeClustering(SparkRDDWriteClient.java:394)
at org.apache.hudi.client.SparkRDDWriteClient.completeTableService(SparkRDDWriteClient.java:473)
at org.apache.hudi.client.SparkRDDWriteClient.cluster(SparkRDDWriteClient.java:360)
at org.apache.hudi.client.BaseHoodieWriteClient.lambda$inlineClustering$15(BaseHoodieWriteClient.java:1196)
at org.apache.hudi.common.util.Option.ifPresent(Option.java:96)
at org.apache.hudi.client.BaseHoodieWriteClient.inlineClustering(BaseHoodieWriteClient.java:1194)
at org.apache.hudi.client.BaseHoodieWriteClient.runTableServicesInline(BaseHoodieWriteClient.java:502)
at org.apache.hudi.client.BaseHoodieWriteClient.commitStats(BaseHoodieWriteClient.java:211)
select * from hudi_mor3_rt;
+-----------------------------------+------------------------------------+----------------------------------+--------------------------------------+-------------------------------------------------------------------------------+--------------------+-----------------------+-----------------------+------------------+-----------------------------+--+
| hudi_mor3_rt._hoodie_commit_time | hudi_mor3_rt._hoodie_commit_seqno | hudi_mor3_rt._hoodie_record_key | hudi_mor3_rt._hoodie_partition_path | hudi_mor3_rt._hoodie_file_name | hudi_mor3_rt.uuid | hudi_mor3_rt.array_1 | hudi_mor3_rt.array_2 | hudi_mor3_rt.ts | hudi_mor3_rt.partitionpath |
+-----------------------------------+------------------------------------+----------------------------------+--------------------------------------+-------------------------------------------------------------------------------+--------------------+-----------------------+-----------------------+--
"schema" : "{\"type\":\"record\",\"name\":\"hoodie_source\",\"namespace\":\"hoodie.source\",\"fields\":[{\"name\":\"_airbyte_ab_id\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"_airbyte_emitted_at\",\"type\":[\"null\",{\"type\":\"long\",\"logicalType\":\"timestamp-micros\"}],\"default\":null},{\"name\":\"ts\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"name\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"team\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"text\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"type\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"user\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"icons\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"icons\",\"namespace\":\"hoodie.source.hoodie_source\",\"fields\":[{\"name\":\"emoji\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"_airbyte_additional_properties\",\"type\":[\"null\",{\"type\":\"map\",\"values\":[\"strin
root@adhoc-1:/opt# $SPARK_INSTALL/bin/pyspark --master local[2] --driver-class-path $HADOOP_CONF_DIR --conf spark.sql.hive.convertMetastoreParquet=false --deploy-mode client --driver-memory 1G --executor-memory 3G --num-executors 1 --packages org.apache.spark:spark-avro_2.11:2.4.4,org.apache.hudi:hudi-spark-bundle_2.11:0.10.0
Python 3.5.3 (default, Sep 27 2018, 17:25:39)
[GCC 6.3.0 20170516] on linux
Type "help", "copyright", "credits" or "license" for more information.
Ivy Default Cache set to: /root/.ivy2/cache
The jars for the packages stored in: /root/.ivy2/jars
:: loading settings :: url = jar:file:/opt/spark/jars/ivy-2.4.0.jar!/org/apache/ivy/core/settings/ivysettings.xml
org.apache.spark#spark-avro_2.11 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-2691e4f6-45cb-4e2a-b0e7-70adc6e80e3f;1.0
confs: [default]
multi writer testing comprehensive
option("hoodie.cleaner.policy.failed.writes","LAZY").
option("hoodie.write.concurrency.mode","OPTIMISTIC_CONCURRENCY_CONTROL").
option("hoodie.write.lock.provider","org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider").
option("hoodie.write.lock.zookeeper.url","localhost").
option("hoodie.write.lock.zookeeper.port","2181").
option("hoodie.write.lock.zookeeper.lock_key","locks").
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
TestInsertTable
Test Different Type of Partition Column
Expected Array([1,a1,10,2021-05-20 01:00:00], [2,a2,10,2021-05-20 01:00:00]), but got Array([1,a1,10.0,2021-05-19 13:00:00], [2,a2,10.0,2021-05-19 13:00:00])
ScalaTestFailureLocation: org.apache.spark.sql.hudi.TestHoodieSqlBase at (TestHoodieSqlBase.scala:84)
[INFO] Running org.apache.hudi.common.functional.TestHoodieLogFormat
Formatting using clusterid: testClusterID
89190 [main] WARN org.apache.hadoop.metrics2.impl.MetricsConfig - Cannot locate configuration: tried hadoop-metrics2-namenode.properties,hadoop-metrics2.properties
104464 [LeaseRenewer:nsb@localhost:51228] WARN org.apache.hadoop.hdfs.LeaseRenewer - Failed to renew lease for [DFSClient_NONMAPREDUCE_357411051_1] for 30 seconds. Will retry shortly ...
java.net.ConnectException: Call From Sivabalans-MacBook-Pro.local/127.0.0.1 to localhost:51228 failed on connection exception: java.net.ConnectException: Connection refused; For more details see: http://wiki.apache.org/hadoop/ConnectionRefused
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.ne
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
import org.apache.hudi.QuickstartUtils._
import scala.collection.JavaConversions._
import org.apache.spark.sql.SaveMode._
import org.apache.hudi.DataSourceReadOptions._
import org.apache.hudi.DataSourceWriteOptions._
import org.apache.hudi.config.HoodieWriteConfig._
val tableName = "hudi_trips_cow"