Skip to content

Instantly share code, notes, and snippets.

View nsivabalan's full-sized avatar

Sivabalan Narayanan nsivabalan

View GitHub Profile
hudi:hudi_trips_cow1->savepoint create --commit
Commit null not found in Commits org.apache.hudi.common.table.timeline.HoodieDefaultTimeline: [20210814151921__commit__COMPLETED],[20210814151951__commit__COMPLETED]
hudi:hudi_trips_cow1->savepoint create --commit 20210814151951
21/08/14 15:26:49 WARN Utils: Your hostname, Sivabalans-MacBook-Pro.local resolves to a loopback address: 127.0.0.1; using 10.0.0.202 instead (on interface en0)
21/08/14 15:26:49 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
21/08/14 15:26:49 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
log4j:WARN No appenders could be found for logger (org.apache.hudi.cli.commands.SparkMain).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
public class SparkKeyGenUtils {
public static String getPartitionColumns(Map<String, String> parameters) throws IOException {
TypedProperties props = new TypedProperties();
props.putAll(parameters);
return getPartitionColumns(props);
}
public static String getPartitionColumns(TypedProperties props) throws IOException {
org.apache.spark.shuffle.FetchFailedException: The relative remote executor(Id: 1), which maintains the block data to fetch is dead.
org.apache.spark.shuffle.FetchFailedException: The relative remote executor(Id: 1), which maintains the block data to fetch is dead.
at org.apache.spark.storage.ShuffleBlockFetcherIterator.throwFetchFailedException(ShuffleBlockFetcherIterator.scala:748)
at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:663)
at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:70)
at org.apache.spark.util.CompletionIterator.next(CompletionIterator.scala:29)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:484)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:490)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
cat /tmp/temp1.out
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
index 520a956a4..353104340 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
@@ -18,14 +18,6 @@
package org.apache.hudi.common.fs;
-import org.apache.hadoop.conf.Configuration;
scala> spark.time(df1.write.format("hudi").option("hoodie.bulkinsert.shuffle.parallelism","120").option(PRECOMBINE_FIELD.key(), "created_at").option(RECORDKEY_FIELD.key(), "id").option(PARTITIONPATH_FIELD.key(), "date_col").option("hoodie.parquet.compression.codec", "SNAPPY").option(OPERATION.key(),"bulk_insert").
| option("hoodie.datasource.write.row.writer.enable", "true").option(TABLE_NAME.key(), "hudi_3").mode(Overwrite).save("s3a://siva-test-bucket-june-16/hudi_testing/hudi_3/"))
21/08/06 02:43:37 WARN HoodieSparkSqlWriter$: hoodie table at s3a://siva-test-bucket-june-16/hudi_testing/hudi_3 already exists. Deleting existing data & overwriting with new data.
21/08/06 02:44:20 ERROR AppendDataExec: Data source write support org.apache.hudi.spark3.internal.HoodieDataSourceInternalBatchWrite@2237dc21 is aborting.
21/08/06 02:44:20 ERROR DataSourceInternalWriterHelper: Commit 20210806024337 aborted
21/08/06 02:44:25 ERROR AppendDataExec: Data source write support org.apache.hudi.spark3.internal.HoodieDa
create table hudi_test4 (id int, name string, price double, ts long) using hudi options(primaryKey = 'id', precombineField = 'ts') partitioned by (name) location 'file:///tmp/hudi_testing/hudi_test4';
insert into hudi_test4 values(1, 100.0, 100000010, "abc");
insert into hudi_test4 values(2, 200.0, 200000010, "abc");
insert into hudi_test4 values(3, 300.0, 300000010, "abc");
// this throws DuplicateKeyException ? Even though operation w/ hudi is "upsert".
insert into hudi_test4 values(1, 600.0, 600000010, "abc");
create table hudi_test2 (id int, name string, price double, ts long) using hudi options(precombineField = 'ts') partitioned by (name) location 'file:///tmp/hudi_testing/hudi_test2';
insert into hudi_test2 values(1, 100.0, 100000010, "abc");
insert into hudi_test2 values(1, 200.0, 200000010, "abc");
insert into hudi_test2 values(1, 300.0, 300000010, "abc");
insert into hudi_test2 values(1, 100.0, 100000010, "def");
insert into hudi_test2 values(1, 200.0, 200000010, "def");
insert into hudi_test2 values(1, 300.0, 300000010, "def");
[INFO] T E S T S
[INFO] -------------------------------------------------------
[INFO] Running org.apache.hudi.TestConvertFilterToCatalystExpression
[INFO] Tests run: 2, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 2.048 s - in org.apache.hudi.TestConvertFilterToCatalystExpression
[INFO] Running org.apache.hudi.TestDataSourceUtils
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/Users/nsb/.m2/repository/org/slf4j/slf4j-log4j12/1.7.16/slf4j-log4j12-1.7.16.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/Users/nsb/.m2/repository/org/apache/logging/log4j/log4j-slf4j-impl/2.6.2/log4j-slf4j-impl-2.6.2.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
scala> spark.time(df1.write.format("hudi").option("hoodie.bulkinsert.shuffle.parallelism","500").option(PRECOMBINE_FIELD_OPT_KEY.key(), "created_at").option(RECORDKEY_FIELD_OPT_KEY.key(), "id").option(PARTITIONPATH_FIELD_OPT_KEY.key(),"date_col").option("hoodie.parquet.compression.codec", "SNAPPY").option(OPERATION_OPT_KEY.key(),"bulk_insert").option("hoodie.datasource.write.row.writer.enable", "true").option("hoodie.bulkinsert.sort.mode","NONE").option(TABLE_NAME.key(), "hudi_2").mode(Overwrite).save("s3a://siva-test-bucket-june-16/hudi_testing/hudi_2/"))
21/08/02 04:16:40 WARN HoodieSparkSqlWriter$: hoodie table at s3a://siva-test-bucket-june-16/hudi_testing/hudi_2 already exists. Deleting existing data & overwriting with new data.
21/08/02 04:37:28 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_8_3615 !
21/08/02 04:37:28 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_8_517 !
21/08/02 04:37:28 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_8_127
> CREATE OR REPLACE TABLE delta_ext (
> public BOOLEAN,
> repo_id BIGINT,
> repo_name STRING,
> repo_url STRING,
> payload STRING,
> created_at TIMESTAMP,
> id STRING,
> other STRING,
> randomId DOUBLE,