Skip to content

Instantly share code, notes, and snippets.

View nsivabalan's full-sized avatar

Sivabalan Narayanan nsivabalan

View GitHub Profile
@nsivabalan
nsivabalan / Hoodie Simple Index
Last active March 6, 2020 01:22
Simple Index using spark joins
// HoodieSparkJoinIndex.java
/**
* Find <HoodieKey, HoodieRecordLocation> for all incoming HoodieKeys
*/
@VisibleForTesting
JavaPairRDD<HoodieKey, HoodieRecordLocation> findMatchingFilesForRecordKeysLocal(JavaSparkContext jsc,
List<Tuple2<String, String>> partitionToFileIndexInfo,
JavaPairRDD<String, String> partitionRecordKeyPairRDD, HoodieTable hoodieTable) {
// Step 1: Create JavaPairRDD< Tuple2<PartitionPath, RecordKey>, Optional<HoodieRecordLocation> > from input with Optional<HoodieRecordLocation> as Empty.
public class UberCallOptions {
private boolean isRetryEnabled = false;
private RetryPolicy retryPolicy = null;
private long timeout = 30000;
public void setRetryEnabled() {
isRetryEnabled = true;
}
diff --git a/hudi-spark/src/main/java/org/apache/hudi/keygen/GlobalDeleteKeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/keygen/GlobalDeleteKeyGenerator.java
index 315c2659..a699e0c8 100644
--- a/hudi-spark/src/main/java/org/apache/hudi/keygen/GlobalDeleteKeyGenerator.java
+++ b/hudi-spark/src/main/java/org/apache/hudi/keygen/GlobalDeleteKeyGenerator.java
@@ -25,6 +25,7 @@ import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.exception.HoodieKeyException;
import org.apache.avro.generic.GenericRecord;
+import org.apache.spark.sql.Row;
public class UberCallOptions {
private boolean isRetryEnabled = false;
private int retryCount = 0;
private long timeout = 30000;
private String callType = "GRPC";
.
.
.
public interface UberCallHeaders {
String getHeaderValue(String key);
void setHeader(String key, String value);
Set<String> getAllHeaderKeys();
Map<String, String> getHeadersAsMap();
public interface UberNetworkRequestStatus {
public int getResponseCode();
public boolean isSuccess() ;
public Throwable getThrowable();
}
/**
* Interface for client side UberInterceptor at mobile networking layer
* @param <ReqT> request object of generic type.
* @param <RespT> response object of generic type.
*/
public abstract class UberInterceptor<ReqT, RespT> {
/**
* Fetch MethodInfo for the call in flux.
* @param <T> type of MethodInfo
* @return the instance of MethodInfo
public interface UberCallHeaders {
String getHeaderValue(String key);
void setHeader(String key, String value);
Set<String> getAllHeaderKeys();
Map<String, String> getHeadersAsMap();
16020 [main] WARN org.apache.spark.util.Utils - Service 'SparkUI' could not bind on port 4040. Attempting port 4041.
16020 [main] WARN org.apache.spark.util.Utils - Service 'SparkUI' could not bind on port 4041. Attempting port 4042.
16834 [main] WARN org.apache.spark.sql.SparkSession$Builder - Using an existing SparkSession; some configuration may not take effect.
16841 [main] WARN org.apache.spark.sql.SparkSession$Builder - Using an existing SparkSession; some configuration may not take effect.
18296 [dispatcher-event-loop-0] WARN org.apache.spark.scheduler.TaskSetManager - Stage 0 contains a task of very large size (196 KB). The maximum recommended task size is 100 KB.
20747 [dispatcher-event-loop-1] WARN org.apache.spark.scheduler.TaskSetManager - Stage 1 contains a task of very large size (196 KB). The maximum recommended task size is 100 KB.
21805 [dispatcher-event-loop-0] WARN org.apache.spark.scheduler.TaskSetManager - Stage 2 contains a task of very large size (196 KB). The maximum reco
spark-submit \
> --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer $HUDI_UTILITIES_BUNDLE \
> --table-type COPY_ON_WRITE \
> --source-class org.apache.hudi.utilities.sources.JsonKafkaSource \
> --source-ordering-field ts \
> --target-base-path /user/hive/warehouse/stock_ticks_cow \
> --target-table stock_ticks_cow \
> --transformer-class org.apache.hudi.utilities.transform.DeleteTransformer \
> --props /var/demo/config/kafka-source.properties \
> --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider