August 14, 2021 19:35 · August 12, 2021 15:01 · August 6, 2021 15:27 · August 6, 2021 03:45 · August 6, 2021 02:46 · August 5, 2021 21:24
 hudi:hudi_trips_cow1->savepoint create --commit 
 Commit null not found in Commits org.apache.hudi.common.table.timeline.HoodieDefaultTimeline: [20210814151921__commit__COMPLETED],[20210814151951__commit__COMPLETED]
 hudi:hudi_trips_cow1->savepoint create --commit 20210814151951
 21/08/14 15:26:49 WARN Utils: Your hostname, Sivabalans-MacBook-Pro.local resolves to a loopback address: 127.0.0.1; using 10.0.0.202 instead (on interface en0)
 21/08/14 15:26:49 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
 21/08/14 15:26:49 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
 log4j:WARN No appenders could be found for logger (org.apache.hudi.cli.commands.SparkMain).
 log4j:WARN Please initialize the log4j system properly.
 log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
 Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties

 public class SparkKeyGenUtils {

  public static String getPartitionColumns(Map<String, String> parameters) throws IOException {
    TypedProperties props = new TypedProperties();
    props.putAll(parameters);
    return getPartitionColumns(props);
  }

  public static String getPartitionColumns(TypedProperties props) throws IOException {
 org.apache.spark.shuffle.FetchFailedException: The relative remote executor(Id: 1), which maintains the block data to fetch is dead.

 org.apache.spark.shuffle.FetchFailedException: The relative remote executor(Id: 1), which maintains the block data to fetch is dead.
 	at org.apache.spark.storage.ShuffleBlockFetcherIterator.throwFetchFailedException(ShuffleBlockFetcherIterator.scala:748)
 	at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:663)
 	at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:70)
 	at org.apache.spark.util.CompletionIterator.next(CompletionIterator.scala:29)
 	at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:484)
 	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:490)
 	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
 cat /tmp/temp1.out 
 diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
 index 520a956a4..353104340 100644
 --- a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
 +++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
 @@ -18,14 +18,6 @@
 
 package org.apache.hudi.common.fs;
 
 -import org.apache.hadoop.conf.Configuration;
 scala> spark.time(df1.write.format("hudi").option("hoodie.bulkinsert.shuffle.parallelism","120").option(PRECOMBINE_FIELD.key(), "created_at").option(RECORDKEY_FIELD.key(), "id").option(PARTITIONPATH_FIELD.key(), "date_col").option("hoodie.parquet.compression.codec", "SNAPPY").option(OPERATION.key(),"bulk_insert").
     | option("hoodie.datasource.write.row.writer.enable", "true").option(TABLE_NAME.key(), "hudi_3").mode(Overwrite).save("s3a://siva-test-bucket-june-16/hudi_testing/hudi_3/"))
 21/08/06 02:43:37 WARN HoodieSparkSqlWriter$: hoodie table at s3a://siva-test-bucket-june-16/hudi_testing/hudi_3 already exists. Deleting existing data & overwriting with new data.
 21/08/06 02:44:20 ERROR AppendDataExec: Data source write support org.apache.hudi.spark3.internal.HoodieDataSourceInternalBatchWrite@2237dc21 is aborting.
 21/08/06 02:44:20 ERROR DataSourceInternalWriterHelper: Commit 20210806024337 aborted 
 21/08/06 02:44:25 ERROR AppendDataExec: Data source write support org.apache.hudi.spark3.internal.HoodieDa

 create table hudi_test4 (id int, name string, price double, ts long) using hudi options(primaryKey = 'id', precombineField = 'ts') partitioned by (name) location 'file:///tmp/hudi_testing/hudi_test4';


 insert into hudi_test4 values(1, 100.0, 100000010, "abc");
 insert into hudi_test4 values(2, 200.0, 200000010, "abc");
 insert into hudi_test4 values(3, 300.0, 300000010, "abc");

 // this throws DuplicateKeyException ? Even though operation w/ hudi is "upsert". 
 insert into hudi_test4 values(1, 600.0, 600000010, "abc");

 create table hudi_test2 (id int, name string, price double, ts long) using hudi options(precombineField = 'ts') partitioned by (name) location 'file:///tmp/hudi_testing/hudi_test2';

 insert into hudi_test2 values(1, 100.0, 100000010, "abc");
 insert into hudi_test2 values(1, 200.0, 200000010, "abc");
 insert into hudi_test2 values(1, 300.0, 300000010, "abc");

 insert into hudi_test2 values(1, 100.0, 100000010, "def");
 insert into hudi_test2 values(1, 200.0, 200000010, "def");
 insert into hudi_test2 values(1, 300.0, 300000010, "def");
 [INFO]  T E S T S
 [INFO] -------------------------------------------------------
 [INFO] Running org.apache.hudi.TestConvertFilterToCatalystExpression
 [INFO] Tests run: 2, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 2.048 s - in org.apache.hudi.TestConvertFilterToCatalystExpression
 [INFO] Running org.apache.hudi.TestDataSourceUtils
 SLF4J: Class path contains multiple SLF4J bindings.
 SLF4J: Found binding in [jar:file:/Users/nsb/.m2/repository/org/slf4j/slf4j-log4j12/1.7.16/slf4j-log4j12-1.7.16.jar!/org/slf4j/impl/StaticLoggerBinder.class]
 SLF4J: Found binding in [jar:file:/Users/nsb/.m2/repository/org/apache/logging/log4j/log4j-slf4j-impl/2.6.2/log4j-slf4j-impl-2.6.2.jar!/org/slf4j/impl/StaticLoggerBinder.class]
 SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
 SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
 scala> spark.time(df1.write.format("hudi").option("hoodie.bulkinsert.shuffle.parallelism","500").option(PRECOMBINE_FIELD_OPT_KEY.key(), "created_at").option(RECORDKEY_FIELD_OPT_KEY.key(), "id").option(PARTITIONPATH_FIELD_OPT_KEY.key(),"date_col").option("hoodie.parquet.compression.codec", "SNAPPY").option(OPERATION_OPT_KEY.key(),"bulk_insert").option("hoodie.datasource.write.row.writer.enable", "true").option("hoodie.bulkinsert.sort.mode","NONE").option(TABLE_NAME.key(), "hudi_2").mode(Overwrite).save("s3a://siva-test-bucket-june-16/hudi_testing/hudi_2/"))
 21/08/02 04:16:40 WARN HoodieSparkSqlWriter$: hoodie table at s3a://siva-test-bucket-june-16/hudi_testing/hudi_2 already exists. Deleting existing data & overwriting with new data.
 21/08/02 04:37:28 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_8_3615 !
 21/08/02 04:37:28 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_8_517 !
 21/08/02 04:37:28 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_8_127
         > CREATE OR REPLACE TABLE delta_ext (
         > public BOOLEAN,
         > repo_id BIGINT,
         > repo_name STRING,
         > repo_url STRING,
         > payload STRING,
         > created_at TIMESTAMP,
         > id STRING,
         > other STRING,
         > randomId DOUBLE,
	hudi:hudi_trips_cow1->savepoint create --commit
	Commit null not found in Commits org.apache.hudi.common.table.timeline.HoodieDefaultTimeline: [20210814151921__commit__COMPLETED],[20210814151951__commit__COMPLETED]
	hudi:hudi_trips_cow1->savepoint create --commit 20210814151951
	21/08/14 15:26:49 WARN Utils: Your hostname, Sivabalans-MacBook-Pro.local resolves to a loopback address: 127.0.0.1; using 10.0.0.202 instead (on interface en0)
	21/08/14 15:26:49 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
	21/08/14 15:26:49 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
	log4j:WARN No appenders could be found for logger (org.apache.hudi.cli.commands.SparkMain).
	log4j:WARN Please initialize the log4j system properly.
	log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
	Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties

	public class SparkKeyGenUtils {

	public static String getPartitionColumns(Map<String, String> parameters) throws IOException {
	TypedProperties props = new TypedProperties();
	props.putAll(parameters);
	return getPartitionColumns(props);
	}

	public static String getPartitionColumns(TypedProperties props) throws IOException {
	cat /tmp/temp1.out
	diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
	index 520a956a4..353104340 100644
	--- a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
	+++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
	@@ -18,14 +18,6 @@

	package org.apache.hudi.common.fs;

	-import org.apache.hadoop.conf.Configuration;
	scala> spark.time(df1.write.format("hudi").option("hoodie.bulkinsert.shuffle.parallelism","120").option(PRECOMBINE_FIELD.key(), "created_at").option(RECORDKEY_FIELD.key(), "id").option(PARTITIONPATH_FIELD.key(), "date_col").option("hoodie.parquet.compression.codec", "SNAPPY").option(OPERATION.key(),"bulk_insert").
	\| option("hoodie.datasource.write.row.writer.enable", "true").option(TABLE_NAME.key(), "hudi_3").mode(Overwrite).save("s3a://siva-test-bucket-june-16/hudi_testing/hudi_3/"))
	21/08/06 02:43:37 WARN HoodieSparkSqlWriter$: hoodie table at s3a://siva-test-bucket-june-16/hudi_testing/hudi_3 already exists. Deleting existing data & overwriting with new data.
	21/08/06 02:44:20 ERROR AppendDataExec: Data source write support org.apache.hudi.spark3.internal.HoodieDataSourceInternalBatchWrite@2237dc21 is aborting.
	21/08/06 02:44:20 ERROR DataSourceInternalWriterHelper: Commit 20210806024337 aborted
	21/08/06 02:44:25 ERROR AppendDataExec: Data source write support org.apache.hudi.spark3.internal.HoodieDa

	create table hudi_test2 (id int, name string, price double, ts long) using hudi options(precombineField = 'ts') partitioned by (name) location 'file:///tmp/hudi_testing/hudi_test2';

	insert into hudi_test2 values(1, 100.0, 100000010, "abc");
	insert into hudi_test2 values(1, 200.0, 200000010, "abc");
	insert into hudi_test2 values(1, 300.0, 300000010, "abc");

	insert into hudi_test2 values(1, 100.0, 100000010, "def");
	insert into hudi_test2 values(1, 200.0, 200000010, "def");
	insert into hudi_test2 values(1, 300.0, 300000010, "def");
	[INFO] T E S T S
	[INFO] -------------------------------------------------------
	[INFO] Running org.apache.hudi.TestConvertFilterToCatalystExpression
	[INFO] Tests run: 2, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 2.048 s - in org.apache.hudi.TestConvertFilterToCatalystExpression
	[INFO] Running org.apache.hudi.TestDataSourceUtils
	SLF4J: Class path contains multiple SLF4J bindings.
	SLF4J: Found binding in [jar:file:/Users/nsb/.m2/repository/org/slf4j/slf4j-log4j12/1.7.16/slf4j-log4j12-1.7.16.jar!/org/slf4j/impl/StaticLoggerBinder.class]
	SLF4J: Found binding in [jar:file:/Users/nsb/.m2/repository/org/apache/logging/log4j/log4j-slf4j-impl/2.6.2/log4j-slf4j-impl-2.6.2.jar!/org/slf4j/impl/StaticLoggerBinder.class]
	SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
	SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
	scala> spark.time(df1.write.format("hudi").option("hoodie.bulkinsert.shuffle.parallelism","500").option(PRECOMBINE_FIELD_OPT_KEY.key(), "created_at").option(RECORDKEY_FIELD_OPT_KEY.key(), "id").option(PARTITIONPATH_FIELD_OPT_KEY.key(),"date_col").option("hoodie.parquet.compression.codec", "SNAPPY").option(OPERATION_OPT_KEY.key(),"bulk_insert").option("hoodie.datasource.write.row.writer.enable", "true").option("hoodie.bulkinsert.sort.mode","NONE").option(TABLE_NAME.key(), "hudi_2").mode(Overwrite).save("s3a://siva-test-bucket-june-16/hudi_testing/hudi_2/"))
	21/08/02 04:16:40 WARN HoodieSparkSqlWriter$: hoodie table at s3a://siva-test-bucket-june-16/hudi_testing/hudi_2 already exists. Deleting existing data & overwriting with new data.
	21/08/02 04:37:28 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_8_3615 !
	21/08/02 04:37:28 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_8_517 !
	21/08/02 04:37:28 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_8_127
	> CREATE OR REPLACE TABLE delta_ext (
	> public BOOLEAN,
	> repo_id BIGINT,
	> repo_name STRING,
	> repo_url STRING,
	> payload STRING,
	> created_at TIMESTAMP,
	> id STRING,
	> other STRING,
	> randomId DOUBLE,