Created
August 18, 2021 10:07
-
-
Save fuyi/7cb925ae6704349a8bac1e4d2bbc66c9 to your computer and use it in GitHub Desktop.
stacktrace
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--------------------------------------------------------------------------- | |
Py4JJavaError Traceback (most recent call last) | |
<command-2690650308556142> in <module> | |
5 primary_key=["customer_id"]) | |
6 | |
----> 7 add_to_cart_last_7days_fg.save(empty_add_to_cart_last_7days) | |
/databricks/python/lib/python3.8/site-packages/hsfs/feature_group.py in save(self, features, write_options) | |
642 | |
643 # fg_job is used only if the hive engine is used | |
--> 644 fg_job = self._feature_group_engine.save(self, feature_dataframe, write_options) | |
645 if self.statistics_config.enabled and engine.get_type() == "spark": | |
646 # Only compute statistics if the engine is Spark. | |
/databricks/python/lib/python3.8/site-packages/hsfs/core/feature_group_engine.py in save(self, feature_group, feature_dataframe, write_options) | |
58 online_write_options = self.get_kafka_config(write_options) | |
59 | |
---> 60 return engine.get_instance().save_dataframe( | |
61 feature_group, | |
62 feature_dataframe, | |
/databricks/python/lib/python3.8/site-packages/hsfs/engine/spark.py in save_dataframe(self, feature_group, dataframe, operation, online_enabled, storage, offline_write_options, online_write_options, validation_id) | |
212 self._save_online_dataframe(feature_group, dataframe, online_write_options) | |
213 elif online_enabled and storage is None: | |
--> 214 self._save_offline_dataframe( | |
215 feature_group, | |
216 dataframe, | |
/databricks/python/lib/python3.8/site-packages/hsfs/engine/spark.py in _save_offline_dataframe(self, feature_group, dataframe, operation, write_options, validation_id) | |
283 self._spark_context, | |
284 ) | |
--> 285 hudi_engine_instance.save_hudi_fg( | |
286 dataframe, self.APPEND, operation, write_options, validation_id | |
287 ) | |
/databricks/python/lib/python3.8/site-packages/hsfs/core/hudi_engine.py in save_hudi_fg(self, dataset, save_mode, operation, write_options, validation_id) | |
99 self, dataset, save_mode, operation, write_options, validation_id=None | |
100 ): | |
--> 101 fg_commit = self._write_hudi_dataset( | |
102 dataset, save_mode, operation, write_options | |
103 ) | |
/databricks/python/lib/python3.8/site-packages/hsfs/core/hudi_engine.py in _write_hudi_dataset(self, dataset, save_mode, operation, write_options) | |
125 def _write_hudi_dataset(self, dataset, save_mode, operation, write_options): | |
126 hudi_options = self._setup_hudi_write_opts(operation, write_options) | |
--> 127 dataset.write.format(HudiEngine.HUDI_SPARK_FORMAT).options(**hudi_options).mode( | |
128 save_mode | |
129 ).save(self._base_path) | |
/databricks/spark/python/pyspark/sql/readwriter.py in save(self, path, format, mode, partitionBy, **options) | |
1134 self._jwrite.save() | |
1135 else: | |
-> 1136 self._jwrite.save(path) | |
1137 | |
1138 @since(1.4) | |
/databricks/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py in __call__(self, *args) | |
1302 | |
1303 answer = self.gateway_client.send_command(command) | |
-> 1304 return_value = get_return_value( | |
1305 answer, self.gateway_client, self.target_id, self.name) | |
1306 | |
/databricks/spark/python/pyspark/sql/utils.py in deco(*a, **kw) | |
115 def deco(*a, **kw): | |
116 try: | |
--> 117 return f(*a, **kw) | |
118 except py4j.protocol.Py4JJavaError as e: | |
119 converted = convert_exception(e.java_exception) | |
/databricks/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name) | |
324 value = OUTPUT_CONVERTER[type](answer[2:], gateway_client) | |
325 if answer[1] == REFERENCE_TYPE: | |
--> 326 raise Py4JJavaError( | |
327 "An error occurred while calling {0}{1}{2}.\n". | |
328 format(target_id, ".", name), value) | |
Py4JJavaError: An error occurred while calling o398.save. | |
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1.0 failed 4 times, most recent failure: Lost task 0.3 in stage 1.0 (TID 3) (10.38.32.8 executor 0): java.io.InvalidClassException: org.apache.hudi.common.config.HoodieMetadataConfig; local class incompatible: stream classdesc serialVersionUID = -2456222726147592986, local class serialVersionUID = 7673976252736450116 | |
at java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:699) | |
at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:2003) | |
at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1850) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2160) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.readArray(ObjectInputStream.java:2093) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1655) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.readArray(ObjectInputStream.java:2093) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1655) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503) | |
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461) | |
at scala.collection.immutable.List$SerializationProxy.readObject(List.scala:488) | |
at sun.reflect.GeneratedMethodAccessor6.invoke(Unknown Source) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1184) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2296) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503) | |
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461) | |
at scala.collection.immutable.List$SerializationProxy.readObject(List.scala:488) | |
at sun.reflect.GeneratedMethodAccessor6.invoke(Unknown Source) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1184) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2296) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503) | |
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461) | |
at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:76) | |
at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:115) | |
at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$2(ResultTask.scala:65) | |
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110) | |
at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$1(ResultTask.scala:65) | |
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:55) | |
at org.apache.spark.scheduler.Task.doRunTask(Task.scala:150) | |
at org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:119) | |
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110) | |
at org.apache.spark.scheduler.Task.run(Task.scala:91) | |
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$13(Executor.scala:789) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1643) | |
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:792) | |
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) | |
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:648) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) | |
at java.lang.Thread.run(Thread.java:748) | |
Driver stacktrace: | |
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2778) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2725) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2719) | |
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) | |
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) | |
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2719) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1255) | |
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1255) | |
at scala.Option.foreach(Option.scala:407) | |
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1255) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2986) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2927) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2915) | |
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) | |
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:1028) | |
at org.apache.spark.SparkContext.runJobInternal(SparkContext.scala:2459) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2442) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2480) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2499) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2524) | |
at org.apache.spark.rdd.RDD.count(RDD.scala:1263) | |
at org.apache.hudi.HoodieSparkSqlWriter$.commitAndPerformPostOperations(HoodieSparkSqlWriter.scala:470) | |
at org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:223) | |
at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:145) | |
at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:48) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:75) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:73) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:96) | |
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:213) | |
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:257) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:165) | |
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:253) | |
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:209) | |
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:165) | |
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:164) | |
at org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:1079) | |
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$5(SQLExecution.scala:126) | |
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:267) | |
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$1(SQLExecution.scala:104) | |
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:852) | |
at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:77) | |
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:217) | |
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:1079) | |
at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:468) | |
at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:438) | |
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:303) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) | |
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380) | |
at py4j.Gateway.invoke(Gateway.java:295) | |
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) | |
at py4j.commands.CallCommand.execute(CallCommand.java:79) | |
at py4j.GatewayConnection.run(GatewayConnection.java:251) | |
at java.lang.Thread.run(Thread.java:748) | |
Caused by: java.io.InvalidClassException: org.apache.hudi.common.config.HoodieMetadataConfig; local class incompatible: stream classdesc serialVersionUID = -2456222726147592986, local class serialVersionUID = 7673976252736450116 | |
at java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:699) | |
at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:2003) | |
at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1850) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2160) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.readArray(ObjectInputStream.java:2093) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1655) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.readArray(ObjectInputStream.java:2093) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1655) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503) | |
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461) | |
at scala.collection.immutable.List$SerializationProxy.readObject(List.scala:488) | |
at sun.reflect.GeneratedMethodAccessor6.invoke(Unknown Source) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1184) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2296) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503) | |
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461) | |
at scala.collection.immutable.List$SerializationProxy.readObject(List.scala:488) | |
at sun.reflect.GeneratedMethodAccessor6.invoke(Unknown Source) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1184) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2296) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405) | |
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329) | |
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) | |
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) | |
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503) | |
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461) | |
at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:76) | |
at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:115) | |
at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$2(ResultTask.scala:65) | |
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110) | |
at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$1(ResultTask.scala:65) | |
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:55) | |
at org.apache.spark.scheduler.Task.doRunTask(Task.scala:150) | |
at org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:119) | |
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110) | |
at org.apache.spark.scheduler.Task.run(Task.scala:91) | |
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$13(Executor.scala:789) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1643) | |
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:792) | |
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) | |
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:648) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) | |
... 1 more |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Py4JJavaError Traceback (most recent call last)
in
5 primary_key=["customer_id"])
6
----> 7 add_to_cart_last_7days_fg.save(empty_add_to_cart_last_7days)
/databricks/python/lib/python3.8/site-packages/hsfs/feature_group.py in save(self, features, write_options)
642
643 # fg_job is used only if the hive engine is used
--> 644 fg_job = self._feature_group_engine.save(self, feature_dataframe, write_options)
645 if self.statistics_config.enabled and engine.get_type() == "spark":
646 # Only compute statistics if the engine is Spark.
/databricks/python/lib/python3.8/site-packages/hsfs/core/feature_group_engine.py in save(self, feature_group, feature_dataframe, write_options)
58 online_write_options = self.get_kafka_config(write_options)
59
---> 60 return engine.get_instance().save_dataframe(
61 feature_group,
62 feature_dataframe,
/databricks/python/lib/python3.8/site-packages/hsfs/engine/spark.py in save_dataframe(self, feature_group, dataframe, operation, online_enabled, storage, offline_write_options, online_write_options, validation_id)
212 self._save_online_dataframe(feature_group, dataframe, online_write_options)
213 elif online_enabled and storage is None:
--> 214 self._save_offline_dataframe(
215 feature_group,
216 dataframe,
/databricks/python/lib/python3.8/site-packages/hsfs/engine/spark.py in _save_offline_dataframe(self, feature_group, dataframe, operation, write_options, validation_id)
283 self._spark_context,
284 )
--> 285 hudi_engine_instance.save_hudi_fg(
286 dataframe, self.APPEND, operation, write_options, validation_id
287 )
/databricks/python/lib/python3.8/site-packages/hsfs/core/hudi_engine.py in save_hudi_fg(self, dataset, save_mode, operation, write_options, validation_id)
99 self, dataset, save_mode, operation, write_options, validation_id=None
100 ):
--> 101 fg_commit = self._write_hudi_dataset(
102 dataset, save_mode, operation, write_options
103 )
/databricks/python/lib/python3.8/site-packages/hsfs/core/hudi_engine.py in _write_hudi_dataset(self, dataset, save_mode, operation, write_options)
125 def _write_hudi_dataset(self, dataset, save_mode, operation, write_options):
126 hudi_options = self._setup_hudi_write_opts(operation, write_options)
--> 127 dataset.write.format(HudiEngine.HUDI_SPARK_FORMAT).options(**hudi_options).mode(
128 save_mode
129 ).save(self._base_path)
/databricks/spark/python/pyspark/sql/readwriter.py in save(self, path, format, mode, partitionBy, **options)
1134 self._jwrite.save()
1135 else:
-> 1136 self._jwrite.save(path)
1137
1138 @SInCE(1.4)
/databricks/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py in call(self, *args)
1302
1303 answer = self.gateway_client.send_command(command)
-> 1304 return_value = get_return_value(
1305 answer, self.gateway_client, self.target_id, self.name)
1306
/databricks/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
115 def deco(*a, **kw):
116 try:
--> 117 return f(*a, **kw)
118 except py4j.protocol.Py4JJavaError as e:
119 converted = convert_exception(e.java_exception)
/databricks/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
324 value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
325 if answer[1] == REFERENCE_TYPE:
--> 326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
328 format(target_id, ".", name), value)
Py4JJavaError: An error occurred while calling o401.save.
: org.apache.hudi.exception.HoodieException: Got runtime exception when hive syncing add_to_cart_last_7days_v3_1
at org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:122)
at org.apache.hudi.HoodieSparkSqlWriter$.syncHive(HoodieSparkSqlWriter.scala:391)
at org.apache.hudi.HoodieSparkSqlWriter$.$anonfun$metaSync$4(HoodieSparkSqlWriter.scala:440)
at org.apache.hudi.HoodieSparkSqlWriter$.$anonfun$metaSync$4$adapted(HoodieSparkSqlWriter.scala:436)
at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
at org.apache.hudi.HoodieSparkSqlWriter$.metaSync(HoodieSparkSqlWriter.scala:436)
at org.apache.hudi.HoodieSparkSqlWriter$.commitAndPerformPostOperations(HoodieSparkSqlWriter.scala:497)
at org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:223)
at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:145)
at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:48)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:75)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:73)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:96)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:213)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:257)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:165)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:253)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:209)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:165)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:164)
at org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:1079)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$5(SQLExecution.scala:126)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:267)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$1(SQLExecution.scala:104)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:852)
at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:77)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:217)
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:1079)
at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:468)
at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:438)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:303)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)
at py4j.Gateway.invoke(Gateway.java:295)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:251)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.hudi.hive.HoodieHiveSyncException: Could not convert field Type from TIMESTAMP to bigint for field max_event_timestamp
at org.apache.hudi.hive.util.HiveSchemaUtil.getSchemaDifference(HiveSchemaUtil.java:98)
at org.apache.hudi.hive.HiveSyncTool.syncSchema(HiveSyncTool.java:203)
at org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:154)
at org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:108)
... 41 more