Created
September 15, 2025 17:25
-
-
Save paulnicholsen27/be785f3607c31b869289afc30bd6ebbf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Writing DataFrame to Parquet file: /data/users/nicholsenpm/airflow_extractions/BTRIS_CB_Color_Coding_09152025_115611/output/PRES_subjects_processed/PRES_subjects_processed.parquet | |
| 25/09/15 13:24:06 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory | |
| Scaling row group sizes to 95.00% for 8 writers | |
| 25/09/15 13:24:06 ERROR Utils: Aborting task (0 + 8) / 8] | |
| org.apache.spark.SparkUpgradeException: [INCONSISTENT_BEHAVIOR_CROSS_VERSION.WRITE_ANCIENT_DATETIME] You may get a different result due to the upgrading to Spark >= 3.0: | |
| writing dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z | |
| into Parquet files can be dangerous, as the files may be read by Spark 2.x | |
| or legacy versions of Hive later, which uses a legacy hybrid calendar that | |
| is different from Spark 3.0+'s Proleptic Gregorian calendar. See more | |
| details in SPARK-31404. You can set "spark.sql.parquet.datetimeRebaseModeInWrite" to "LEGACY" to rebase the | |
| datetime values w.r.t. the calendar difference during writing, to get maximum | |
| interoperability. Or set the config to "CORRECTED" to write the datetime | |
| values as it is, if you are sure that the written files will only be read by | |
| Spark 3.0+ or other systems that use Proleptic Gregorian calendar. | |
| at org.apache.spark.sql.errors.QueryExecutionErrors$.sparkUpgradeInWritingDatesError(QueryExecutionErrors.scala:760) | |
| at org.apache.spark.sql.execution.datasources.DataSourceUtils$.newRebaseExceptionInWrite(DataSourceUtils.scala:187) | |
| at org.apache.spark.sql.execution.datasources.DataSourceUtils$.$anonfun$createDateRebaseFuncInWrite$1(DataSourceUtils.scala:207) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$4(ParquetWriteSupport.scala:184) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$4$adapted(ParquetWriteSupport.scala:183) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$writeFields$1(ParquetWriteSupport.scala:161) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeField(ParquetWriteSupport.scala:483) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.writeFields(ParquetWriteSupport.scala:161) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$write$1(ParquetWriteSupport.scala:151) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeMessage(ParquetWriteSupport.scala:471) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:151) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:53) | |
| at org.apache.parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:138) | |
| at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:181) | |
| at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:43) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.write(ParquetOutputWriter.scala:39) | |
| at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.write(FileFormatDataWriter.scala:175) | |
| at org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithMetrics(FileFormatDataWriter.scala:85) | |
| at org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithIterator(FileFormatDataWriter.scala:92) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(FileFormatWriter.scala:403) | |
| at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1397) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:410) | |
| at org.apache.spark.sql.execution.datasources.WriteFilesExec.$anonfun$doExecuteWrite$1(WriteFiles.scala:100) | |
| at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:893) | |
| at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:893) | |
| at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
| at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:367) | |
| at org.apache.spark.rdd.RDD.iterator(RDD.scala:331) | |
| at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93) | |
| at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166) | |
| at org.apache.spark.scheduler.Task.run(Task.scala:141) | |
| at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:621) | |
| at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) | |
| at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) | |
| at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) | |
| at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:624) | |
| at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) | |
| at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) | |
| at java.base/java.lang.Thread.run(Thread.java:829) | |
| 25/09/15 13:24:06 ERROR Utils: Aborting task | |
| org.apache.spark.SparkUpgradeException: [INCONSISTENT_BEHAVIOR_CROSS_VERSION.WRITE_ANCIENT_DATETIME] You may get a different result due to the upgrading to Spark >= 3.0: | |
| writing dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z | |
| into Parquet files can be dangerous, as the files may be read by Spark 2.x | |
| or legacy versions of Hive later, which uses a legacy hybrid calendar that | |
| is different from Spark 3.0+'s Proleptic Gregorian calendar. See more | |
| details in SPARK-31404. You can set "spark.sql.parquet.datetimeRebaseModeInWrite" to "LEGACY" to rebase the | |
| datetime values w.r.t. the calendar difference during writing, to get maximum | |
| interoperability. Or set the config to "CORRECTED" to write the datetime | |
| values as it is, if you are sure that the written files will only be read by | |
| Spark 3.0+ or other systems that use Proleptic Gregorian calendar. | |
| at org.apache.spark.sql.errors.QueryExecutionErrors$.sparkUpgradeInWritingDatesError(QueryExecutionErrors.scala:760) | |
| at org.apache.spark.sql.execution.datasources.DataSourceUtils$.newRebaseExceptionInWrite(DataSourceUtils.scala:187) | |
| at org.apache.spark.sql.execution.datasources.DataSourceUtils$.$anonfun$createDateRebaseFuncInWrite$1(DataSourceUtils.scala:207) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$4(ParquetWriteSupport.scala:184) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$4$adapted(ParquetWriteSupport.scala:183) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$writeFields$1(ParquetWriteSupport.scala:161) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeField(ParquetWriteSupport.scala:483) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.writeFields(ParquetWriteSupport.scala:161) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$write$1(ParquetWriteSupport.scala:151) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeMessage(ParquetWriteSupport.scala:471) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:151) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:53) | |
| at org.apache.parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:138) | |
| at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:181) | |
| at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:43) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.write(ParquetOutputWriter.scala:39) | |
| at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.write(FileFormatDataWriter.scala:175) | |
| at org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithMetrics(FileFormatDataWriter.scala:85) | |
| at org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithIterator(FileFormatDataWriter.scala:92) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(FileFormatWriter.scala:403) | |
| at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1397) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:410) | |
| at org.apache.spark.sql.execution.datasources.WriteFilesExec.$anonfun$doExecuteWrite$1(WriteFiles.scala:100) | |
| at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:893) | |
| at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:893) | |
| at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
| at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:367) | |
| at org.apache.spark.rdd.RDD.iterator(RDD.scala:331) | |
| at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93) | |
| at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166) | |
| at org.apache.spark.scheduler.Task.run(Task.scala:141) | |
| at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:621) | |
| at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) | |
| at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) | |
| at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) | |
| at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:624) | |
| at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) | |
| at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) | |
| at java.base/java.lang.Thread.run(Thread.java:829) | |
| 25/09/15 13:24:06 ERROR Utils: Aborting task | |
| org.apache.spark.SparkUpgradeException: [INCONSISTENT_BEHAVIOR_CROSS_VERSION.WRITE_ANCIENT_DATETIME] You may get a different result due to the upgrading to Spark >= 3.0: | |
| writing dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z | |
| into Parquet files can be dangerous, as the files may be read by Spark 2.x | |
| or legacy versions of Hive later, which uses a legacy hybrid calendar that | |
| is different from Spark 3.0+'s Proleptic Gregorian calendar. See more | |
| details in SPARK-31404. You can set "spark.sql.parquet.datetimeRebaseModeInWrite" to "LEGACY" to rebase the | |
| datetime values w.r.t. the calendar difference during writing, to get maximum | |
| interoperability. Or set the config to "CORRECTED" to write the datetime | |
| values as it is, if you are sure that the written files will only be read by | |
| Spark 3.0+ or other systems that use Proleptic Gregorian calendar. | |
| at org.apache.spark.sql.errors.QueryExecutionErrors$.sparkUpgradeInWritingDatesError(QueryExecutionErrors.scala:760) | |
| at org.apache.spark.sql.execution.datasources.DataSourceUtils$.newRebaseExceptionInWrite(DataSourceUtils.scala:187) | |
| at org.apache.spark.sql.execution.datasources.DataSourceUtils$.$anonfun$createDateRebaseFuncInWrite$1(DataSourceUtils.scala:207) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$4(ParquetWriteSupport.scala:184) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$4$adapted(ParquetWriteSupport.scala:183) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$writeFields$1(ParquetWriteSupport.scala:161) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeField(ParquetWriteSupport.scala:483) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.writeFields(ParquetWriteSupport.scala:161) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$write$1(ParquetWriteSupport.scala:151) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeMessage(ParquetWriteSupport.scala:471) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:151) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:53) | |
| at org.apache.parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:138) | |
| at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:181) | |
| at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:43) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.write(ParquetOutputWriter.scala:39) | |
| at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.write(FileFormatDataWriter.scala:175) | |
| at org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithMetrics(FileFormatDataWriter.scala:85) | |
| at org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithIterator(FileFormatDataWriter.scala:92) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(FileFormatWriter.scala:403) | |
| at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1397) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:410) | |
| at org.apache.spark.sql.execution.datasources.WriteFilesExec.$anonfun$doExecuteWrite$1(WriteFiles.scala:100) | |
| at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:893) | |
| at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:893) | |
| at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
| at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:367) | |
| at org.apache.spark.rdd.RDD.iterator(RDD.scala:331) | |
| at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93) | |
| at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166) | |
| at org.apache.spark.scheduler.Task.run(Task.scala:141) | |
| at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:621) | |
| at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) | |
| at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) | |
| at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) | |
| at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:624) | |
| at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) | |
| at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) | |
| at java.base/java.lang.Thread.run(Thread.java:829) | |
| 25/09/15 13:24:07 ERROR FileFormatWriter: Job job_202509151324051339976293129366793_0003 aborted. | |
| 25/09/15 13:24:07 ERROR Executor: Exception in task 5.0 in stage 3.0 (TID 15) | |
| org.apache.spark.SparkException: [TASK_WRITE_FAILED] Task failed while writing rows to file:/data/users/nicholsenpm/airflow_extractions/BTRIS_CB_Color_Coding_09152025_115611/output/PRES_subjects_processed/PRES_subjects_processed.parquet. | |
| at org.apache.spark.sql.errors.QueryExecutionErrors$.taskFailedWhileWritingRowsError(QueryExecutionErrors.scala:775) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:420) | |
| at org.apache.spark.sql.execution.datasources.WriteFilesExec.$anonfun$doExecuteWrite$1(WriteFiles.scala:100) | |
| at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:893) | |
| at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:893) | |
| at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
| at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:367) | |
| at org.apache.spark.rdd.RDD.iterator(RDD.scala:331) | |
| at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93) | |
| at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166) | |
| at org.apache.spark.scheduler.Task.run(Task.scala:141) | |
| at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:621) | |
| at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) | |
| at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) | |
| at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) | |
| at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:624) | |
| at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) | |
| at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) | |
| at java.base/java.lang.Thread.run(Thread.java:829) | |
| Caused by: org.apache.spark.SparkUpgradeException: [INCONSISTENT_BEHAVIOR_CROSS_VERSION.WRITE_ANCIENT_DATETIME] You may get a different result due to the upgrading to Spark >= 3.0: | |
| writing dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z | |
| into Parquet files can be dangerous, as the files may be read by Spark 2.x | |
| or legacy versions of Hive later, which uses a legacy hybrid calendar that | |
| is different from Spark 3.0+'s Proleptic Gregorian calendar. See more | |
| details in SPARK-31404. You can set "spark.sql.parquet.datetimeRebaseModeInWrite" to "LEGACY" to rebase the | |
| datetime values w.r.t. the calendar difference during writing, to get maximum | |
| interoperability. Or set the config to "CORRECTED" to write the datetime | |
| values as it is, if you are sure that the written files will only be read by | |
| Spark 3.0+ or other systems that use Proleptic Gregorian calendar. | |
| at org.apache.spark.sql.errors.QueryExecutionErrors$.sparkUpgradeInWritingDatesError(QueryExecutionErrors.scala:760) | |
| at org.apache.spark.sql.execution.datasources.DataSourceUtils$.newRebaseExceptionInWrite(DataSourceUtils.scala:187) | |
| at org.apache.spark.sql.execution.datasources.DataSourceUtils$.$anonfun$createDateRebaseFuncInWrite$1(DataSourceUtils.scala:207) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$4(ParquetWriteSupport.scala:184) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$4$adapted(ParquetWriteSupport.scala:183) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$writeFields$1(ParquetWriteSupport.scala:161) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeField(ParquetWriteSupport.scala:483) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.writeFields(ParquetWriteSupport.scala:161) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$write$1(ParquetWriteSupport.scala:151) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeMessage(ParquetWriteSupport.scala:471) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:151) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:53) | |
| at org.apache.parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:138) | |
| at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:181) | |
| at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:43) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.write(ParquetOutputWriter.scala:39) | |
| at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.write(FileFormatDataWriter.scala:175) | |
| at org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithMetrics(FileFormatDataWriter.scala:85) | |
| at org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithIterator(FileFormatDataWriter.scala:92) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(FileFormatWriter.scala:403) | |
| at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1397) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:410) | |
| ... 17 more | |
| 25/09/15 13:24:07 ERROR FileFormatWriter: Job job_202509151324051339976293129366793_0003 aborted. | |
| 25/09/15 13:24:07 ERROR Executor: Exception in task 2.0 in stage 3.0 (TID 12) | |
| org.apache.spark.SparkException: [TASK_WRITE_FAILED] Task failed while writing rows to file:/data/users/nicholsenpm/airflow_extractions/BTRIS_CB_Color_Coding_09152025_115611/output/PRES_subjects_processed/PRES_subjects_processed.parquet. | |
| at org.apache.spark.sql.errors.QueryExecutionErrors$.taskFailedWhileWritingRowsError(QueryExecutionErrors.scala:775) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:420) | |
| at org.apache.spark.sql.execution.datasources.WriteFilesExec.$anonfun$doExecuteWrite$1(WriteFiles.scala:100) | |
| at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:893) | |
| at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:893) | |
| at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
| at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:367) | |
| at org.apache.spark.rdd.RDD.iterator(RDD.scala:331) | |
| at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93) | |
| at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166) | |
| at org.apache.spark.scheduler.Task.run(Task.scala:141) | |
| at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:621) | |
| at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) | |
| at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) | |
| at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) | |
| at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:624) | |
| at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) | |
| at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) | |
| at java.base/java.lang.Thread.run(Thread.java:829) | |
| Caused by: org.apache.spark.SparkUpgradeException: [INCONSISTENT_BEHAVIOR_CROSS_VERSION.WRITE_ANCIENT_DATETIME] You may get a different result due to the upgrading to Spark >= 3.0: | |
| writing dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z | |
| into Parquet files can be dangerous, as the files may be read by Spark 2.x | |
| or legacy versions of Hive later, which uses a legacy hybrid calendar that | |
| is different from Spark 3.0+'s Proleptic Gregorian calendar. See more | |
| details in SPARK-31404. You can set "spark.sql.parquet.datetimeRebaseModeInWrite" to "LEGACY" to rebase the | |
| datetime values w.r.t. the calendar difference during writing, to get maximum | |
| interoperability. Or set the config to "CORRECTED" to write the datetime | |
| values as it is, if you are sure that the written files will only be read by | |
| Spark 3.0+ or other systems that use Proleptic Gregorian calendar. | |
| at org.apache.spark.sql.errors.QueryExecutionErrors$.sparkUpgradeInWritingDatesError(QueryExecutionErrors.scala:760) | |
| at org.apache.spark.sql.execution.datasources.DataSourceUtils$.newRebaseExceptionInWrite(DataSourceUtils.scala:187) | |
| at org.apache.spark.sql.execution.datasources.DataSourceUtils$.$anonfun$createDateRebaseFuncInWrite$1(DataSourceUtils.scala:207) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$4(ParquetWriteSupport.scala:184) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$4$adapted(ParquetWriteSupport.scala:183) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$writeFields$1(ParquetWriteSupport.scala:161) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeField(ParquetWriteSupport.scala:483) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.writeFields(ParquetWriteSupport.scala:161) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$write$1(ParquetWriteSupport.scala:151) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeMessage(ParquetWriteSupport.scala:471) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:151) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:53) | |
| at org.apache.parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:138) | |
| at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:181) | |
| at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:43) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.write(ParquetOutputWriter.scala:39) | |
| at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.write(FileFormatDataWriter.scala:175) | |
| at org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithMetrics(FileFormatDataWriter.scala:85) | |
| at org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithIterator(FileFormatDataWriter.scala:92) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(FileFormatWriter.scala:403) | |
| at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1397) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:410) | |
| ... 17 more | |
| 25/09/15 13:24:07 ERROR FileFormatWriter: Job job_202509151324051339976293129366793_0003 aborted. | |
| 25/09/15 13:24:07 ERROR Executor: Exception in task 1.0 in stage 3.0 (TID 11) | |
| org.apache.spark.SparkException: [TASK_WRITE_FAILED] Task failed while writing rows to file:/data/users/nicholsenpm/airflow_extractions/BTRIS_CB_Color_Coding_09152025_115611/output/PRES_subjects_processed/PRES_subjects_processed.parquet. | |
| at org.apache.spark.sql.errors.QueryExecutionErrors$.taskFailedWhileWritingRowsError(QueryExecutionErrors.scala:775) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:420) | |
| at org.apache.spark.sql.execution.datasources.WriteFilesExec.$anonfun$doExecuteWrite$1(WriteFiles.scala:100) | |
| at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:893) | |
| at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:893) | |
| at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
| at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:367) | |
| at org.apache.spark.rdd.RDD.iterator(RDD.scala:331) | |
| at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93) | |
| at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166) | |
| at org.apache.spark.scheduler.Task.run(Task.scala:141) | |
| at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:621) | |
| at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) | |
| at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) | |
| at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) | |
| at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:624) | |
| at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) | |
| at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) | |
| at java.base/java.lang.Thread.run(Thread.java:829) | |
| Caused by: org.apache.spark.SparkUpgradeException: [INCONSISTENT_BEHAVIOR_CROSS_VERSION.WRITE_ANCIENT_DATETIME] You may get a different result due to the upgrading to Spark >= 3.0: | |
| writing dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z | |
| into Parquet files can be dangerous, as the files may be read by Spark 2.x | |
| or legacy versions of Hive later, which uses a legacy hybrid calendar that | |
| is different from Spark 3.0+'s Proleptic Gregorian calendar. See more | |
| details in SPARK-31404. You can set "spark.sql.parquet.datetimeRebaseModeInWrite" to "LEGACY" to rebase the | |
| datetime values w.r.t. the calendar difference during writing, to get maximum | |
| interoperability. Or set the config to "CORRECTED" to write the datetime | |
| values as it is, if you are sure that the written files will only be read by | |
| Spark 3.0+ or other systems that use Proleptic Gregorian calendar. | |
| at org.apache.spark.sql.errors.QueryExecutionErrors$.sparkUpgradeInWritingDatesError(QueryExecutionErrors.scala:760) | |
| at org.apache.spark.sql.execution.datasources.DataSourceUtils$.newRebaseExceptionInWrite(DataSourceUtils.scala:187) | |
| at org.apache.spark.sql.execution.datasources.DataSourceUtils$.$anonfun$createDateRebaseFuncInWrite$1(DataSourceUtils.scala:207) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$4(ParquetWriteSupport.scala:184) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$4$adapted(ParquetWriteSupport.scala:183) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$writeFields$1(ParquetWriteSupport.scala:161) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeField(ParquetWriteSupport.scala:483) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.writeFields(ParquetWriteSupport.scala:161) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$write$1(ParquetWriteSupport.scala:151) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeMessage(ParquetWriteSupport.scala:471) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:151) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:53) | |
| at org.apache.parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:138) | |
| at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:181) | |
| at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:43) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.write(ParquetOutputWriter.scala:39) | |
| at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.write(FileFormatDataWriter.scala:175) | |
| at org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithMetrics(FileFormatDataWriter.scala:85) | |
| at org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithIterator(FileFormatDataWriter.scala:92) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(FileFormatWriter.scala:403) | |
| at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1397) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:410) | |
| ... 17 more | |
| 25/09/15 13:24:07 WARN TaskSetManager: Lost task 2.0 in stage 3.0 (TID 12) (ncias-d3613-v.nci.nih.gov executor driver): org.apache.spark.SparkException: [TASK_WRITE_FAILED] Task failed while writing rows to file:/data/users/nicholsenpm/airflow_extractions/BTRIS_CB_Color_Coding_09152025_115611/output/PRES_subjects_processed/PRES_subjects_processed.parquet. | |
| at org.apache.spark.sql.errors.QueryExecutionErrors$.taskFailedWhileWritingRowsError(QueryExecutionErrors.scala:775) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:420) | |
| at org.apache.spark.sql.execution.datasources.WriteFilesExec.$anonfun$doExecuteWrite$1(WriteFiles.scala:100) | |
| at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:893) | |
| at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:893) | |
| at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
| at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:367) | |
| at org.apache.spark.rdd.RDD.iterator(RDD.scala:331) | |
| at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93) | |
| at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166) | |
| at org.apache.spark.scheduler.Task.run(Task.scala:141) | |
| at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:621) | |
| at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) | |
| at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) | |
| at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) | |
| at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:624) | |
| at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) | |
| at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) | |
| at java.base/java.lang.Thread.run(Thread.java:829) | |
| Caused by: org.apache.spark.SparkUpgradeException: [INCONSISTENT_BEHAVIOR_CROSS_VERSION.WRITE_ANCIENT_DATETIME] You may get a different result due to the upgrading to Spark >= 3.0: | |
| writing dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z | |
| into Parquet files can be dangerous, as the files may be read by Spark 2.x | |
| or legacy versions of Hive later, which uses a legacy hybrid calendar that | |
| is different from Spark 3.0+'s Proleptic Gregorian calendar. See more | |
| details in SPARK-31404. You can set "spark.sql.parquet.datetimeRebaseModeInWrite" to "LEGACY" to rebase the | |
| datetime values w.r.t. the calendar difference during writing, to get maximum | |
| interoperability. Or set the config to "CORRECTED" to write the datetime | |
| values as it is, if you are sure that the written files will only be read by | |
| Spark 3.0+ or other systems that use Proleptic Gregorian calendar. | |
| at org.apache.spark.sql.errors.QueryExecutionErrors$.sparkUpgradeInWritingDatesError(QueryExecutionErrors.scala:760) | |
| at org.apache.spark.sql.execution.datasources.DataSourceUtils$.newRebaseExceptionInWrite(DataSourceUtils.scala:187) | |
| at org.apache.spark.sql.execution.datasources.DataSourceUtils$.$anonfun$createDateRebaseFuncInWrite$1(DataSourceUtils.scala:207) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$4(ParquetWriteSupport.scala:184) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$4$adapted(ParquetWriteSupport.scala:183) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$writeFields$1(ParquetWriteSupport.scala:161) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeField(ParquetWriteSupport.scala:483) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.writeFields(ParquetWriteSupport.scala:161) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$write$1(ParquetWriteSupport.scala:151) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeMessage(ParquetWriteSupport.scala:471) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:151) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:53) | |
| at org.apache.parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:138) | |
| at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:181) | |
| at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:43) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.write(ParquetOutputWriter.scala:39) | |
| at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.write(FileFormatDataWriter.scala:175) | |
| at org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithMetrics(FileFormatDataWriter.scala:85) | |
| at org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithIterator(FileFormatDataWriter.scala:92) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(FileFormatWriter.scala:403) | |
| at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1397) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:410) | |
| ... 17 more | |
| 25/09/15 13:24:07 ERROR TaskSetManager: Task 2 in stage 3.0 failed 1 times; aborting job | |
| 25/09/15 13:24:07 ERROR FileFormatWriter: Aborting job adcbb46f-5694-47cb-b97d-af019a4af85b. | |
| org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 3.0 failed 1 times, most recent failure: Lost task 2.0 in stage 3.0 (TID 12) (ncias-d3613-v.nci.nih.gov executor driver): org.apache.spark.SparkException: [TASK_WRITE_FAILED] Task failed while writing rows to file:/data/users/nicholsenpm/airflow_extractions/BTRIS_CB_Color_Coding_09152025_115611/output/PRES_subjects_processed/PRES_subjects_processed.parquet. | |
| at org.apache.spark.sql.errors.QueryExecutionErrors$.taskFailedWhileWritingRowsError(QueryExecutionErrors.scala:775) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:420) | |
| at org.apache.spark.sql.execution.datasources.WriteFilesExec.$anonfun$doExecuteWrite$1(WriteFiles.scala:100) | |
| at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:893) | |
| at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:893) | |
| at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
| at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:367) | |
| at org.apache.spark.rdd.RDD.iterator(RDD.scala:331) | |
| at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93) | |
| at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166) | |
| at org.apache.spark.scheduler.Task.run(Task.scala:141) | |
| at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:621) | |
| at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) | |
| at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) | |
| at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) | |
| at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:624) | |
| at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) | |
| at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) | |
| at java.base/java.lang.Thread.run(Thread.java:829) | |
| Caused by: org.apache.spark.SparkUpgradeException: [INCONSISTENT_BEHAVIOR_CROSS_VERSION.WRITE_ANCIENT_DATETIME] You may get a different result due to the upgrading to Spark >= 3.0: | |
| writing dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z | |
| into Parquet files can be dangerous, as the files may be read by Spark 2.x | |
| or legacy versions of Hive later, which uses a legacy hybrid calendar that | |
| is different from Spark 3.0+'s Proleptic Gregorian calendar. See more | |
| details in SPARK-31404. You can set "spark.sql.parquet.datetimeRebaseModeInWrite" to "LEGACY" to rebase the | |
| datetime values w.r.t. the calendar difference during writing, to get maximum | |
| interoperability. Or set the config to "CORRECTED" to write the datetime | |
| values as it is, if you are sure that the written files will only be read by | |
| Spark 3.0+ or other systems that use Proleptic Gregorian calendar. | |
| at org.apache.spark.sql.errors.QueryExecutionErrors$.sparkUpgradeInWritingDatesError(QueryExecutionErrors.scala:760) | |
| at org.apache.spark.sql.execution.datasources.DataSourceUtils$.newRebaseExceptionInWrite(DataSourceUtils.scala:187) | |
| at org.apache.spark.sql.execution.datasources.DataSourceUtils$.$anonfun$createDateRebaseFuncInWrite$1(DataSourceUtils.scala:207) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$4(ParquetWriteSupport.scala:184) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$4$adapted(ParquetWriteSupport.scala:183) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$writeFields$1(ParquetWriteSupport.scala:161) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeField(ParquetWriteSupport.scala:483) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.writeFields(ParquetWriteSupport.scala:161) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$write$1(ParquetWriteSupport.scala:151) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeMessage(ParquetWriteSupport.scala:471) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:151) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:53) | |
| at org.apache.parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:138) | |
| at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:181) | |
| at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:43) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.write(ParquetOutputWriter.scala:39) | |
| at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.write(FileFormatDataWriter.scala:175) | |
| at org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithMetrics(FileFormatDataWriter.scala:85) | |
| at org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithIterator(FileFormatDataWriter.scala:92) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(FileFormatWriter.scala:403) | |
| at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1397) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:410) | |
| ... 17 more | |
| Driver stacktrace: | |
| at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2898) | |
| at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2834) | |
| at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2833) | |
| at scala.collection.immutable.List.foreach(List.scala:333) | |
| at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2833) | |
| at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1253) | |
| at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1253) | |
| at scala.Option.foreach(Option.scala:437) | |
| at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1253) | |
| at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3102) | |
| at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3036) | |
| at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3025) | |
| at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) | |
| at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:995) | |
| at org.apache.spark.SparkContext.runJob(SparkContext.scala:2393) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeWrite$4(FileFormatWriter.scala:307) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.writeAndCommit(FileFormatWriter.scala:271) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeWrite(FileFormatWriter.scala:304) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:190) | |
| at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:190) | |
| at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113) | |
| at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111) | |
| at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125) | |
| at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:107) | |
| at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:125) | |
| at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:201) | |
| at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:108) | |
| at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900) | |
| at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66) | |
| at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:107) | |
| at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98) | |
| at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:461) | |
| at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76) | |
| at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:461) | |
| at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32) | |
| at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267) | |
| at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263) | |
| at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32) | |
| at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32) | |
| at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:437) | |
| at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:98) | |
| at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:85) | |
| at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:83) | |
| at org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:142) | |
| at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:869) | |
| at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:391) | |
| at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:364) | |
| at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:243) | |
| at org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:802) | |
| at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
| at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
| at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
| at java.base/java.lang.reflect.Method.invoke(Method.java:566) | |
| at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) | |
| at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374) | |
| at py4j.Gateway.invoke(Gateway.java:282) | |
| at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) | |
| at py4j.commands.CallCommand.execute(CallCommand.java:79) | |
| at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182) | |
| at py4j.ClientServerConnection.run(ClientServerConnection.java:106) | |
| at java.base/java.lang.Thread.run(Thread.java:829) | |
| Caused by: org.apache.spark.SparkException: [TASK_WRITE_FAILED] Task failed while writing rows to file:/data/users/nicholsenpm/airflow_extractions/BTRIS_CB_Color_Coding_09152025_115611/output/PRES_subjects_processed/PRES_subjects_processed.parquet. | |
| at org.apache.spark.sql.errors.QueryExecutionErrors$.taskFailedWhileWritingRowsError(QueryExecutionErrors.scala:775) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:420) | |
| at org.apache.spark.sql.execution.datasources.WriteFilesExec.$anonfun$doExecuteWrite$1(WriteFiles.scala:100) | |
| at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:893) | |
| at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:893) | |
| at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
| at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:367) | |
| at org.apache.spark.rdd.RDD.iterator(RDD.scala:331) | |
| at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93) | |
| at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166) | |
| at org.apache.spark.scheduler.Task.run(Task.scala:141) | |
| at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:621) | |
| at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) | |
| at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) | |
| at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) | |
| at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:624) | |
| at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) | |
| at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) | |
| ... 1 more | |
| Caused by: org.apache.spark.SparkUpgradeException: [INCONSISTENT_BEHAVIOR_CROSS_VERSION.WRITE_ANCIENT_DATETIME] You may get a different result due to the upgrading to Spark >= 3.0: | |
| writing dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z | |
| into Parquet files can be dangerous, as the files may be read by Spark 2.x | |
| or legacy versions of Hive later, which uses a legacy hybrid calendar that | |
| is different from Spark 3.0+'s Proleptic Gregorian calendar. See more | |
| details in SPARK-31404. You can set "spark.sql.parquet.datetimeRebaseModeInWrite" to "LEGACY" to rebase the | |
| datetime values w.r.t. the calendar difference during writing, to get maximum | |
| interoperability. Or set the config to "CORRECTED" to write the datetime | |
| values as it is, if you are sure that the written files will only be read by | |
| Spark 3.0+ or other systems that use Proleptic Gregorian calendar. | |
| at org.apache.spark.sql.errors.QueryExecutionErrors$.sparkUpgradeInWritingDatesError(QueryExecutionErrors.scala:760) | |
| at org.apache.spark.sql.execution.datasources.DataSourceUtils$.newRebaseExceptionInWrite(DataSourceUtils.scala:187) | |
| at org.apache.spark.sql.execution.datasources.DataSourceUtils$.$anonfun$createDateRebaseFuncInWrite$1(DataSourceUtils.scala:207) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$4(ParquetWriteSupport.scala:184) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$4$adapted(ParquetWriteSupport.scala:183) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$writeFields$1(ParquetWriteSupport.scala:161) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeField(ParquetWriteSupport.scala:483) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.writeFields(ParquetWriteSupport.scala:161) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$write$1(ParquetWriteSupport.scala:151) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeMessage(ParquetWriteSupport.scala:471) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:151) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:53) | |
| at org.apache.parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:138) | |
| at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:181) | |
| at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:43) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.write(ParquetOutputWriter.scala:39) | |
| at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.write(FileFormatDataWriter.scala:175) | |
| at org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithMetrics(FileFormatDataWriter.scala:85) | |
| at org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithIterator(FileFormatDataWriter.scala:92) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(FileFormatWriter.scala:403) | |
| at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1397) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:410) | |
| ... 17 more | |
| Traceback (most recent call last): | |
| File "/opt/pipelines/pipeline_extractor/src/csv_to_pyspark.py", line 62, in <module> | |
| main() | |
| File "/opt/pipelines/pipeline_extractor/src/csv_to_pyspark.py", line 56, in main | |
| df.write.mode('overwrite').parquet(parquet_file) | |
| File "/opt/miniconda/envs/pipeline_extractor/lib/python3.12/site-packages/pyspark/sql/readwriter.py", line 1721, in parquet | |
| self._jwrite.parquet(path) | |
| File "/opt/miniconda/envs/pipeline_extractor/lib/python3.12/site-packages/py4j/java_gateway.py", line 1322, in __call__ | |
| return_value = get_return_value( | |
| ^^^^^^^^^^^^^^^^^ | |
| File "/opt/miniconda/envs/pipeline_extractor/lib/python3.12/site-packages/pyspark/errors/exceptions/captured.py", line 179, in deco | |
| return f(*a, **kw) | |
| ^^^^^^^^^^^ | |
| File "/opt/miniconda/envs/pipeline_extractor/lib/python3.12/site-packages/py4j/protocol.py", line 326, in get_return_value | |
| raise Py4JJavaError( | |
| py4j.protocol.Py4JJavaError: An error occurred while calling o34.parquet. | |
| : org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 3.0 failed 1 times, most recent failure: Lost task 2.0 in stage 3.0 (TID 12) (ncias-d3613-v.nci.nih.gov executor driver): org.apache.spark.SparkException: [TASK_WRITE_FAILED] Task failed while writing rows to file:/data/users/nicholsenpm/airflow_extractions/BTRIS_CB_Color_Coding_09152025_115611/output/PRES_subjects_processed/PRES_subjects_processed.parquet. | |
| at org.apache.spark.sql.errors.QueryExecutionErrors$.taskFailedWhileWritingRowsError(QueryExecutionErrors.scala:775) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:420) | |
| at org.apache.spark.sql.execution.datasources.WriteFilesExec.$anonfun$doExecuteWrite$1(WriteFiles.scala:100) | |
| at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:893) | |
| at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:893) | |
| at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
| at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:367) | |
| at org.apache.spark.rdd.RDD.iterator(RDD.scala:331) | |
| at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93) | |
| at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166) | |
| at org.apache.spark.scheduler.Task.run(Task.scala:141) | |
| at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:621) | |
| at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) | |
| at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) | |
| at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) | |
| at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:624) | |
| at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) | |
| at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) | |
| at java.base/java.lang.Thread.run(Thread.java:829) | |
| Caused by: org.apache.spark.SparkUpgradeException: [INCONSISTENT_BEHAVIOR_CROSS_VERSION.WRITE_ANCIENT_DATETIME] You may get a different result due to the upgrading to Spark >= 3.0: | |
| writing dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z | |
| into Parquet files can be dangerous, as the files may be read by Spark 2.x | |
| or legacy versions of Hive later, which uses a legacy hybrid calendar that | |
| is different from Spark 3.0+'s Proleptic Gregorian calendar. See more | |
| details in SPARK-31404. You can set "spark.sql.parquet.datetimeRebaseModeInWrite" to "LEGACY" to rebase the | |
| datetime values w.r.t. the calendar difference during writing, to get maximum | |
| interoperability. Or set the config to "CORRECTED" to write the datetime | |
| values as it is, if you are sure that the written files will only be read by | |
| Spark 3.0+ or other systems that use Proleptic Gregorian calendar. | |
| at org.apache.spark.sql.errors.QueryExecutionErrors$.sparkUpgradeInWritingDatesError(QueryExecutionErrors.scala:760) | |
| at org.apache.spark.sql.execution.datasources.DataSourceUtils$.newRebaseExceptionInWrite(DataSourceUtils.scala:187) | |
| at org.apache.spark.sql.execution.datasources.DataSourceUtils$.$anonfun$createDateRebaseFuncInWrite$1(DataSourceUtils.scala:207) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$4(ParquetWriteSupport.scala:184) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$4$adapted(ParquetWriteSupport.scala:183) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$writeFields$1(ParquetWriteSupport.scala:161) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeField(ParquetWriteSupport.scala:483) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.writeFields(ParquetWriteSupport.scala:161) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$write$1(ParquetWriteSupport.scala:151) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeMessage(ParquetWriteSupport.scala:471) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:151) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:53) | |
| at org.apache.parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:138) | |
| at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:181) | |
| at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:43) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.write(ParquetOutputWriter.scala:39) | |
| at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.write(FileFormatDataWriter.scala:175) | |
| at org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithMetrics(FileFormatDataWriter.scala:85) | |
| at org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithIterator(FileFormatDataWriter.scala:92) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(FileFormatWriter.scala:403) | |
| at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1397) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:410) | |
| ... 17 more | |
| Driver stacktrace: | |
| at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2898) | |
| at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2834) | |
| at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2833) | |
| at scala.collection.immutable.List.foreach(List.scala:333) | |
| at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2833) | |
| at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1253) | |
| at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1253) | |
| at scala.Option.foreach(Option.scala:437) | |
| at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1253) | |
| at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3102) | |
| at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3036) | |
| at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3025) | |
| at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) | |
| at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:995) | |
| at org.apache.spark.SparkContext.runJob(SparkContext.scala:2393) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeWrite$4(FileFormatWriter.scala:307) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.writeAndCommit(FileFormatWriter.scala:271) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeWrite(FileFormatWriter.scala:304) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:190) | |
| at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:190) | |
| at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113) | |
| at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111) | |
| at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125) | |
| at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:107) | |
| at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:125) | |
| at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:201) | |
| at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:108) | |
| at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900) | |
| at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66) | |
| at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:107) | |
| at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98) | |
| at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:461) | |
| at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76) | |
| at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:461) | |
| at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32) | |
| at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267) | |
| at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263) | |
| at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32) | |
| at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32) | |
| at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:437) | |
| at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:98) | |
| at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:85) | |
| at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:83) | |
| at org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:142) | |
| at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:869) | |
| at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:391) | |
| at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:364) | |
| at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:243) | |
| at org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:802) | |
| at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
| at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
| at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
| at java.base/java.lang.reflect.Method.invoke(Method.java:566) | |
| at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) | |
| at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374) | |
| at py4j.Gateway.invoke(Gateway.java:282) | |
| at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) | |
| at py4j.commands.CallCommand.execute(CallCommand.java:79) | |
| at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182) | |
| at py4j.ClientServerConnection.run(ClientServerConnection.java:106) | |
| at java.base/java.lang.Thread.run(Thread.java:829) | |
| Caused by: org.apache.spark.SparkException: [TASK_WRITE_FAILED] Task failed while writing rows to file:/data/users/nicholsenpm/airflow_extractions/BTRIS_CB_Color_Coding_09152025_115611/output/PRES_subjects_processed/PRES_subjects_processed.parquet. | |
| at org.apache.spark.sql.errors.QueryExecutionErrors$.taskFailedWhileWritingRowsError(QueryExecutionErrors.scala:775) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:420) | |
| at org.apache.spark.sql.execution.datasources.WriteFilesExec.$anonfun$doExecuteWrite$1(WriteFiles.scala:100) | |
| at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:893) | |
| at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:893) | |
| at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
| at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:367) | |
| at org.apache.spark.rdd.RDD.iterator(RDD.scala:331) | |
| at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93) | |
| at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166) | |
| at org.apache.spark.scheduler.Task.run(Task.scala:141) | |
| at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:621) | |
| at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) | |
| at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) | |
| at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) | |
| at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:624) | |
| at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) | |
| at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) | |
| ... 1 more | |
| Caused by: org.apache.spark.SparkUpgradeException: [INCONSISTENT_BEHAVIOR_CROSS_VERSION.WRITE_ANCIENT_DATETIME] You may get a different result due to the upgrading to Spark >= 3.0: | |
| writing dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z | |
| into Parquet files can be dangerous, as the files may be read by Spark 2.x | |
| or legacy versions of Hive later, which uses a legacy hybrid calendar that | |
| is different from Spark 3.0+'s Proleptic Gregorian calendar. See more | |
| details in SPARK-31404. You can set "spark.sql.parquet.datetimeRebaseModeInWrite" to "LEGACY" to rebase the | |
| datetime values w.r.t. the calendar difference during writing, to get maximum | |
| interoperability. Or set the config to "CORRECTED" to write the datetime | |
| values as it is, if you are sure that the written files will only be read by | |
| Spark 3.0+ or other systems that use Proleptic Gregorian calendar. | |
| at org.apache.spark.sql.errors.QueryExecutionErrors$.sparkUpgradeInWritingDatesError(QueryExecutionErrors.scala:760) | |
| at org.apache.spark.sql.execution.datasources.DataSourceUtils$.newRebaseExceptionInWrite(DataSourceUtils.scala:187) | |
| at org.apache.spark.sql.execution.datasources.DataSourceUtils$.$anonfun$createDateRebaseFuncInWrite$1(DataSourceUtils.scala:207) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$4(ParquetWriteSupport.scala:184) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$4$adapted(ParquetWriteSupport.scala:183) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$writeFields$1(ParquetWriteSupport.scala:161) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeField(ParquetWriteSupport.scala:483) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.writeFields(ParquetWriteSupport.scala:161) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$write$1(ParquetWriteSupport.scala:151) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeMessage(ParquetWriteSupport.scala:471) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:151) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:53) | |
| at org.apache.parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:138) | |
| at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:181) | |
| at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:43) | |
| at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.write(ParquetOutputWriter.scala:39) | |
| at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.write(FileFormatDataWriter.scala:175) | |
| at org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithMetrics(FileFormatDataWriter.scala:85) | |
| at org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithIterator(FileFormatDataWriter.scala:92) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(FileFormatWriter.scala:403) | |
| at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1397) | |
| at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:410) | |
| ... 17 more |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment