Skip to content

Instantly share code, notes, and snippets.

View nsivabalan's full-sized avatar

Sivabalan Narayanan nsivabalan

View GitHub Profile
root@adhoc-2:/opt# /var/hoodie/ws/hudi-sync/hudi-hive-sync/run_sync_tool.sh \
> --jdbc-url jdbc:hive2://hiveserver:10000 \
> --user hive \
> --pass hive \
> --partitioned-by ppath \
> --base-path /tmp/hudi_timestamp_tbl2 \
> --database testdb \
> --table timestamp_tbl3 \
> --partition-value-extractor org.apache.hudi.hive.MultiPartKeysValueExtractor \
> --spark-datasource
./bin/spark-shell --packages org.apache.spark:spark-avro_2.11:2.4.4,org.apache.spark:spark-sql-kafka-0-10_2.11:2.4.4 \
--conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' --driver-memory 8g --executor-memory 9g --jars ~/Documents/personal/projects/nov26/hudi/packaging/hudi-spark-bundle/target/hudi-spark-bundle_2.11-0.10.0-SNAPSHOT.jar --conf spark.driver.extraJavaOptions="-Dlog4j.configuration=file:/Users/nsb/Documents/personal/tools/log4j/debug_hudi_log4j.properties" --conf spark.executor.extraJavaOptions="-Dlog4j.configuration=file:/Users/nsb/Documents/personal/tools/log4j/debug_hudi_log4j.properties"
// Define kafka flow
val dataStreamReader = spark.
readStream.
format("kafka").
./bin/spark-shell --packages org.apache.spark:spark-avro_2.11:2.4.4,org.apache.spark:spark-sql-kafka-0-10_2.11:2.4.4 \
--conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' --driver-memory 8g --executor-memory 9g --jars ~/Documents/personal/projects/nov26/hudi/packaging/hudi-spark-bundle/target/hudi-spark-bundle_2.11-0.10.0-SNAPSHOT.jar --conf spark.driver.extraJavaOptions="-Dlog4j.configuration=file:/Users/nsb/Documents/personal/tools/log4j/debug_hudi_log4j.properties" --conf spark.executor.extraJavaOptions="-Dlog4j.configuration=file:/Users/nsb/Documents/personal/tools/log4j/debug_hudi_log4j.properties"
// Define kafka flow
val dataStreamReader = spark.
readStream.
format("kafka").
import org.apache.hudi.QuickstartUtils._
import scala.collection.JavaConversions._
import org.apache.spark.sql.SaveMode._
import org.apache.hudi.DataSourceReadOptions._
import org.apache.hudi.DataSourceWriteOptions._
import org.apache.hudi.config.HoodieWriteConfig._
val tableName = "hudi_trips_cow"
val basePath = "file:///tmp/hudi_trips_cow"
- @ParameterizedTest
- @EnumSource(value = HoodieTableType.class, names = {"COPY_ON_WRITE", "MERGE_ON_READ"})
- public void testHoodieClientBasicMultiWriter(HoodieTableType tableType) throws Exception {
+ //@ParameterizedTest
+ //@EnumSource(value = HoodieTableType.class, names = {"COPY_ON_WRITE", "MERGE_ON_READ"})
+ @RepeatedTest(20)
+ public void testHoodieClientBasicMultiWriter() throws Exception {
+ HoodieTableType tableType = HoodieTableType.MERGE_ON_READ;
if (tableType == HoodieTableType.MERGE_ON_READ) {
run docker set up.
root@adhoc-1:/opt# cp hadoop-2.8.4/etc/hadoop/hive-site.xml spark/conf/
root@adhoc-1:/opt# cp hadoop-2.8.4/etc/hadoop/core-site.xml spark/conf/
root@adhoc-1:/opt# cp hadoop-2.8.4/etc/hadoop/hdfs-site.xml spark/conf/
$SPARK_INSTALL/bin/spark-shell --jars $HUDI_SPARK_BUNDLE --master local[2] --driver-class-path $HADOOP_CONF_DIR --conf spark.sql.hive.convertMetastoreParquet=false --conf spark.sql.catalogImplementation=hive --deploy-mode client --driver-memory 1G --executor-memory 3G --num-executors 1 --packages org.apache.spark:spark-avro_2.11:2.4.4
run docker set up.
root@adhoc-1:/opt# cp hadoop-2.8.4/etc/hadoop/hive-site.xml spark/conf/
root@adhoc-1:/opt# cp hadoop-2.8.4/etc/hadoop/core-site.xml spark/conf/
root@adhoc-1:/opt# cp hadoop-2.8.4/etc/hadoop/hdfs-site.xml spark/conf/
$SPARK_INSTALL/bin/spark-shell --jars $HUDI_SPARK_BUNDLE --master local[2] --driver-class-path $HADOOP_CONF_DIR --conf spark.sql.hive.convertMetastoreParquet=false --conf spark.sql.catalogImplementation=hive --deploy-mode client --driver-memory 1G --executor-memory 3G --num-executors 1 --packages org.apache.spark:spark-avro_2.11:2.4.4
// spark-shell
import org.apache.hudi.QuickstartUtils._
import scala.collection.JavaConversions._
import org.apache.spark.sql.SaveMode._
import org.apache.hudi.DataSourceReadOptions._
import org.apache.hudi.DataSourceWriteOptions._
import org.apache.hudi.config.HoodieWriteConfig._
import org.apache.spark.sql.types._
step1: mvn package -DskipTests
step2: set up docker.
cd docker;
./setup_demo.sh
Step3:
Copy jars and requried files to docker.
cd ../
docker cp packaging/hudi-integ-test-bundle/target/hudi-integ-test-bundle-0.10.0-SNAPSHOT.jar adhoc-2:/opt/
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0