Skip to content

Instantly share code, notes, and snippets.

View nsivabalan's full-sized avatar

Sivabalan Narayanan nsivabalan

View GitHub Profile
> CREATE OR REPLACE TABLE delta_ext (
> public BOOLEAN,
> repo_id BIGINT,
> repo_name STRING,
> repo_url STRING,
> payload STRING,
> created_at TIMESTAMP,
> id STRING,
> other STRING,
> randomId DOUBLE,
create table gh_17Gb_updates using parquet location 's3a://siva-test-bucket-june-16/hudi_testing/gh_arch_dump/part_1_updates/' as select public, (repo_id + 1000) as repo_id, repo_name, repo_url, payload, created_at, id, other, randomId, date_col, type from gh_17Gb_date_col ORDER BY RAND() limit 1000000;
21/07/29 03:31:17 ERROR TransportResponseHandler: Still have 1 requests outstanding when connection from /172.31.33.172:44406 is closed
21/07/29 03:31:17 WARN BlockManagerMasterEndpoint: Error trying to remove broadcast 154 from block manager BlockManagerId(14, ip-172-31-33-172.us-east-2.compute.internal, 44051, None)
java.io.IOException: Connection from /172.31.33.172:44406 closed
at org.apache.spark.network.client.TransportResponseHandler.channelInactive(TransportResponseHandler.java:146)
at org.apache.spark.network.server.TransportChannelHandler.channelInactive(TransportChannelHandler.java:117)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:262)
@nsivabalan
nsivabalan / gist:5147fde404e970fab66515af2eddcdcb
Created July 28, 2021 22:53
abstract log record scanner. createHoodieRecord
2021-07-28T22:49:24,833 INFO [LocalJobRunner Map Task Executor #0]: log.AbstractHoodieLogRecordScanner (AbstractHoodieLogRecordScanner.java:scan(151)) - Scanning log file HoodieLogFile{pathStr='hdfs://namenode:8020/user/hive/warehouse/stock_ticks_mor/2018/08/31/.3cca15e7-c88d-4ede-ae90-833b06ce52ae-0_20210728224800.log.1_0-16-211', fileLen=0}
2021-07-28T22:49:24,839 INFO [LocalJobRunner Map Task Executor #0]: log.AbstractHoodieLogRecordScanner (AbstractHoodieLogRecordScanner.java:scan(178)) - Reading a data block from file hdfs://namenode:8020/user/hive/warehouse/stock_ticks_mor/2018/08/31/.3cca15e7-c88d-4ede-ae90-833b06ce52ae-0_20210728224800.log.1_0-16-211 at instant 20210728224829
2021-07-28T22:49:24,839 INFO [LocalJobRunner Map Task Executor #0]: log.AbstractHoodieLogRecordScanner (AbstractHoodieLogRecordScanner.java:scan(267)) - Merging the final data blocks
2021-07-28T22:49:24,840 INFO [LocalJobRunner Map Task Executor #0]: log.AbstractHoodieLogRecordScanner (AbstractHoodieLogRecordScanner.java:proc
0: jdbc:hive2://hiveserver:10000> select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = 'GOOG';
WARNING: Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases.
Error: org.apache.hive.service.cli.HiveSQLException: Error while processing statement: FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask
at org.apache.hive.service.cli.operation.Operation.toSQLException(Operation.java:380)
at org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:257)
at org.apache.hive.service.cli.operation.SQLOperation.access$800(SQLOperation.java:91)
at org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:348)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGrou
select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = 'GOOG';
WARNING: Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases.
Error: org.apache.hive.service.cli.HiveSQLException: Error while processing statement: FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask
at org.apache.hive.service.cli.operation.Operation.toSQLException(Operation.java:380)
at org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:257)
at org.apache.hive.service.cli.operation.SQLOperation.access$800(SQLOperation.java:91)
at org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:348)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1840)
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/InsertIntoHoodieTableCommand.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/InsertIntoHoodieTableCommand.scala
index 6ce307027..2a9f0a8f7 100644
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/InsertIntoHoodieTableCommand.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/InsertIntoHoodieTableCommand.scala
@@ -231,7 +231,7 @@ object InsertIntoHoodieTableCommand {
} else {
classOf[DefaultHoodieRecordPayload].getCanonicalName
}
- val enableHive = isEnableHive(sparkSession)
+ //val enableHive = isEnableHive(sparkSession)
21/07/22 11:23:32 ERROR SparkSQLDriver: Failed in [create table hudi_gh_ext using hudi location 'file:///tmp/hudi-gh1' as select type, public, payload, repo, actor, org, id, other from gh_raw]
java.lang.NoClassDefFoundError: org/apache/calcite/rel/type/RelDataTypeSystem
at org.apache.hadoop.hive.ql.parse.SemanticAnalyzerFactory.get(SemanticAnalyzerFactory.java:318)
at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:484)
at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1317)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1457)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1237)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1227)
at org.apache.hudi.hive.HoodieHiveClient.updateHiveSQLs(HoodieHiveClient.java:458)
at org.apache.hudi.hive.HoodieHiveClient.updateHiveSQLUsingHiveDriver(HoodieHiveClient.java:448)
// spark-shell
import org.apache.hudi.QuickstartUtils._
import scala.collection.JavaConversions._
import org.apache.spark.sql.SaveMode._
import org.apache.hudi.DataSourceReadOptions._
import org.apache.hudi.DataSourceWriteOptions._
import org.apache.hudi.config.HoodieWriteConfig._
import org.apache.spark.sql.types._
/Library/Java/JavaVirtualMachines/jdk1.8.0_192.jdk/Contents/Home/bin/java -agentlib:jdwp=transport=dt_socket,address=127.0.0.1:49636,suspend=y,server=n -javaagent:/Users/nsb/Library/Caches/JetBrains/IdeaIC2021.1/captureAgent/debugger-agent.jar -Dfile.encoding=UTF-8 -classpath "/Users/nsb/Library/Application Support/JetBrains/IdeaIC2021.1/plugins/Scala/lib/runners.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_192.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_192.jdk/Contents/Home/jre/lib/deploy.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_192.jdk/Contents/Home/jre/lib/ext/cldrdata.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_192.jdk/Contents/Home/jre/lib/ext/dnsns.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_192.jdk/Contents/Home/jre/lib/ext/jaccess.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_192.jdk/Contents/Home/jre/lib/ext/jfxrt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_192.jdk/Contents/Home/jre/lib/ext/localedata.jar:/Library/Java/JavaVirtualMachines/jdk1.
21/07/06 05:35:00 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
21/07/06 05:35:07 WARN Client: Neither spark.yarn.jars nor spark.yarn.archive is set, falling back to uploading libraries under SPARK_HOME.
21/07/06 08:08:30 WARN TaskSetManager: Lost task 222.0 in stage 14.0 (TID 10306, ip-172-31-36-52.us-east-2.compute.internal, executor 10): java.lang.RuntimeException: org.apache.hudi.exception.HoodieException: org.apache.hudi.exception.HoodieException: java.util.concurrent.ExecutionException: org.apache.hudi.exception.HoodieRemoteException: Server Error
at org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:121)
at scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:44)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:484)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:490)
at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:221)