Last active
August 29, 2015 14:25
-
-
Save ssimeonov/4aa837eb037b79235389 to your computer and use it in GitHub Desktop.
I/O error in saveAsTable
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This code is pasted into spark-shell | |
import org.apache.spark.sql.hive.HiveContext | |
import org.apache.spark.sql.SaveMode | |
val ctx = sqlContext.asInstanceOf[HiveContext] | |
import ctx.implicits._ | |
val devRoot = "/home/ubuntu/spx" | |
ctx. | |
jsonFile("file://" + devRoot + "/data/swoop-ml-nlp/dimensions/component_variations.jsonlines"). | |
saveAsTable("dimension_components", SaveMode.Overwrite) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
15/07/25 04:53:08 ERROR InsertIntoHadoopFsRelation: Aborting task. | |
java.io.IOException: Mkdirs failed to create file:/user/hive/warehouse/dimension_components/_temporary/0/_temporary/attempt_201507250453_0001_m_000001_0 (exists=false, cwd=file:/home/ubuntu) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:442) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:428) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:908) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:889) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:786) | |
at parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:154) | |
at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:279) | |
at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:252) | |
at org.apache.spark.sql.parquet.ParquetOutputWriter.<init>(newParquet.scala:83) | |
at org.apache.spark.sql.parquet.ParquetRelation2$$anon$4.newInstance(newParquet.scala:229) | |
at org.apache.spark.sql.sources.DefaultWriterContainer.initWriters(commands.scala:470) | |
at org.apache.spark.sql.sources.BaseWriterContainer.executorSideSetup(commands.scala:360) | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:172) | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160) | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63) | |
at org.apache.spark.scheduler.Task.run(Task.scala:70) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) | |
at java.lang.Thread.run(Thread.java:745) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ubuntu@ip-10-88-50-23:~$ ~/spark/bin/spark-shell --packages com.databricks:spark-csv_2.10:1.0.3 --driver-memory 52g --conf "spark.driver.extraJavaOptions=-XX:MaxPermSize=512m" --conf "spark.local.dir=/data/spark/tmp" | |
Ivy Default Cache set to: /home/ubuntu/.ivy2/cache | |
The jars for the packages stored in: /home/ubuntu/.ivy2/jars | |
:: loading settings :: url = jar:file:/home/ubuntu/spark-1.4.1-bin-hadoop2.6/lib/spark-assembly-1.4.1-hadoop2.6.0.jar!/org/apache/ivy/core/settings/ivysettings.xml | |
com.databricks#spark-csv_2.10 added as a dependency | |
:: resolving dependencies :: org.apache.spark#spark-submit-parent;1.0 | |
confs: [default] | |
found com.databricks#spark-csv_2.10;1.0.3 in central | |
found org.apache.commons#commons-csv;1.1 in central | |
:: resolution report :: resolve 213ms :: artifacts dl 6ms | |
:: modules in use: | |
com.databricks#spark-csv_2.10;1.0.3 from central in [default] | |
org.apache.commons#commons-csv;1.1 from central in [default] | |
--------------------------------------------------------------------- | |
| | modules || artifacts | | |
| conf | number| search|dwnlded|evicted|| number|dwnlded| | |
--------------------------------------------------------------------- | |
| default | 2 | 0 | 0 | 0 || 2 | 0 | | |
--------------------------------------------------------------------- | |
:: retrieving :: org.apache.spark#spark-submit-parent | |
confs: [default] | |
0 artifacts copied, 2 already retrieved (0kB/6ms) | |
log4j:WARN No appenders could be found for logger (org.apache.hadoop.metrics2.lib.MutableMetricsFactory). | |
log4j:WARN Please initialize the log4j system properly. | |
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info. | |
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties | |
15/07/25 04:52:37 INFO SecurityManager: Changing view acls to: ubuntu | |
15/07/25 04:52:37 INFO SecurityManager: Changing modify acls to: ubuntu | |
15/07/25 04:52:37 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(ubuntu); users with modify permissions: Set(ubuntu) | |
15/07/25 04:52:38 INFO HttpServer: Starting HTTP Server | |
15/07/25 04:52:38 INFO Utils: Successfully started service 'HTTP class server' on port 41293. | |
Welcome to | |
____ __ | |
/ __/__ ___ _____/ /__ | |
_\ \/ _ \/ _ `/ __/ '_/ | |
/___/ .__/\_,_/_/ /_/\_\ version 1.4.1 | |
/_/ | |
Using Scala version 2.10.4 (OpenJDK 64-Bit Server VM, Java 1.7.0_79) | |
Type in expressions to have them evaluated. | |
Type :help for more information. | |
15/07/25 04:52:41 WARN Utils: Your hostname, ip-10-88-50-23 resolves to a loopback address: 127.0.0.1; using 10.88.50.23 instead (on interface eth0) | |
15/07/25 04:52:41 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address | |
15/07/25 04:52:41 INFO SparkContext: Running Spark version 1.4.1 | |
15/07/25 04:52:41 WARN SparkConf: In Spark 1.0 and later spark.local.dir will be overridden by the value set by the cluster manager (via SPARK_LOCAL_DIRS in mesos/standalone and LOCAL_DIRS in YARN). | |
15/07/25 04:52:41 INFO SecurityManager: Changing view acls to: ubuntu | |
15/07/25 04:52:41 INFO SecurityManager: Changing modify acls to: ubuntu | |
15/07/25 04:52:41 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(ubuntu); users with modify permissions: Set(ubuntu) | |
15/07/25 04:52:41 INFO Slf4jLogger: Slf4jLogger started | |
15/07/25 04:52:41 INFO Remoting: Starting remoting | |
15/07/25 04:52:41 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://[email protected]:48209] | |
15/07/25 04:52:41 INFO Utils: Successfully started service 'sparkDriver' on port 48209. | |
15/07/25 04:52:41 INFO SparkEnv: Registering MapOutputTracker | |
15/07/25 04:52:41 INFO SparkEnv: Registering BlockManagerMaster | |
15/07/25 04:52:41 INFO DiskBlockManager: Created local directory at /data/spark/tmp/spark-c63fc2de-44ae-4c4f-9519-7048e03eb139/blockmgr-7f0fab04-b3c9-41a9-a1ff-cac6a51f1064 | |
15/07/25 04:52:41 INFO MemoryStore: MemoryStore started with capacity 26.9 GB | |
15/07/25 04:52:41 INFO HttpFileServer: HTTP File server directory is /data/spark/tmp/spark-c63fc2de-44ae-4c4f-9519-7048e03eb139/httpd-8dce885d-7dd5-4148-81e2-fc80c885311e | |
15/07/25 04:52:41 INFO HttpServer: Starting HTTP Server | |
15/07/25 04:52:41 INFO Utils: Successfully started service 'HTTP file server' on port 49127. | |
15/07/25 04:52:41 INFO SparkEnv: Registering OutputCommitCoordinator | |
15/07/25 04:52:41 INFO Utils: Successfully started service 'SparkUI' on port 4040. | |
15/07/25 04:52:41 INFO SparkUI: Started SparkUI at http://10.88.50.23:4040 | |
15/07/25 04:52:41 INFO SparkContext: Added JAR file:/home/ubuntu/.ivy2/jars/com.databricks_spark-csv_2.10-1.0.3.jar at http://10.88.50.23:49127/jars/com.databricks_spark-csv_2.10-1.0.3.jar with timestamp 1437799961945 | |
15/07/25 04:52:41 INFO SparkContext: Added JAR file:/home/ubuntu/.ivy2/jars/org.apache.commons_commons-csv-1.1.jar at http://10.88.50.23:49127/jars/org.apache.commons_commons-csv-1.1.jar with timestamp 1437799961945 | |
15/07/25 04:52:41 INFO Executor: Starting executor ID driver on host localhost | |
15/07/25 04:52:41 INFO Executor: Using REPL class URI: http://10.88.50.23:41293 | |
15/07/25 04:52:42 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 53047. | |
15/07/25 04:52:42 INFO NettyBlockTransferService: Server created on 53047 | |
15/07/25 04:52:42 INFO BlockManagerMaster: Trying to register BlockManager | |
15/07/25 04:52:42 INFO BlockManagerMasterEndpoint: Registering block manager localhost:53047 with 26.9 GB RAM, BlockManagerId(driver, localhost, 53047) | |
15/07/25 04:52:42 INFO BlockManagerMaster: Registered BlockManager | |
15/07/25 04:52:42 INFO SparkILoop: Created spark context.. | |
Spark context available as sc. | |
15/07/25 04:52:42 INFO HiveContext: Initializing execution hive, version 0.13.1 | |
15/07/25 04:52:42 INFO HiveMetaStore: 0: Opening raw store with implemenation class:org.apache.hadoop.hive.metastore.ObjectStore | |
15/07/25 04:52:42 INFO ObjectStore: ObjectStore, initialize called | |
15/07/25 04:52:42 WARN General: Plugin (Bundle) "org.datanucleus.store.rdbms" is already registered. Ensure you dont have multiple JAR versions of the same plugin in the classpath. The URL "file:/home/ubuntu/spark/lib/datanucleus-rdbms-3.2.9.jar" is already registered, and you are trying to register an identical plugin located at URL "file:/home/ubuntu/spark-1.4.1-bin-hadoop2.6/lib/datanucleus-rdbms-3.2.9.jar." | |
15/07/25 04:52:42 WARN General: Plugin (Bundle) "org.datanucleus" is already registered. Ensure you dont have multiple JAR versions of the same plugin in the classpath. The URL "file:/home/ubuntu/spark/lib/datanucleus-core-3.2.10.jar" is already registered, and you are trying to register an identical plugin located at URL "file:/home/ubuntu/spark-1.4.1-bin-hadoop2.6/lib/datanucleus-core-3.2.10.jar." | |
15/07/25 04:52:43 WARN General: Plugin (Bundle) "org.datanucleus.api.jdo" is already registered. Ensure you dont have multiple JAR versions of the same plugin in the classpath. The URL "file:/home/ubuntu/spark/lib/datanucleus-api-jdo-3.2.6.jar" is already registered, and you are trying to register an identical plugin located at URL "file:/home/ubuntu/spark-1.4.1-bin-hadoop2.6/lib/datanucleus-api-jdo-3.2.6.jar." | |
15/07/25 04:52:43 INFO Persistence: Property datanucleus.cache.level2 unknown - will be ignored | |
15/07/25 04:52:43 INFO Persistence: Property hive.metastore.integral.jdo.pushdown unknown - will be ignored | |
15/07/25 04:52:43 WARN Connection: BoneCP specified but not present in CLASSPATH (or one of dependencies) | |
15/07/25 04:52:43 WARN Connection: BoneCP specified but not present in CLASSPATH (or one of dependencies) | |
15/07/25 04:52:44 INFO ObjectStore: Setting MetaStore object pin classes with hive.metastore.cache.pinobjtypes="Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order" | |
15/07/25 04:52:44 INFO MetaStoreDirectSql: MySQL check failed, assuming we are not on mysql: Lexical error at line 1, column 5. Encountered: "@" (64), after : "". | |
15/07/25 04:52:45 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table. | |
15/07/25 04:52:45 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table. | |
15/07/25 04:52:46 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table. | |
15/07/25 04:52:46 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table. | |
15/07/25 04:52:46 INFO ObjectStore: Initialized ObjectStore | |
15/07/25 04:52:46 WARN ObjectStore: Version information not found in metastore. hive.metastore.schema.verification is not enabled so recording the schema version 0.13.1aa | |
15/07/25 04:52:47 INFO HiveMetaStore: Added admin role in metastore | |
15/07/25 04:52:47 INFO HiveMetaStore: Added public role in metastore | |
15/07/25 04:52:47 INFO HiveMetaStore: No user is added in admin role, since config is empty | |
15/07/25 04:52:47 INFO SessionState: No Tez session required at this point. hive.execution.engine=mr. | |
15/07/25 04:52:47 INFO SparkILoop: Created sql context (with Hive support).. | |
SQL context available as sqlContext. | |
scala> import org.apache.spark.sql.hive.HiveContext | |
import org.apache.spark.sql.hive.HiveContext | |
scala> import org.apache.spark.sql.SaveMode | |
import org.apache.spark.sql.SaveMode | |
scala> | |
scala> val ctx = sqlContext.asInstanceOf[HiveContext] | |
ctx: org.apache.spark.sql.hive.HiveContext = org.apache.spark.sql.hive.HiveContext@7ff31e2e | |
scala> import ctx.implicits._ | |
import ctx.implicits._ | |
scala> | |
scala> //val devRoot = "/Users/sim/dev/spx" | |
scala> val devRoot = "/home/ubuntu/spx" | |
devRoot: String = /home/ubuntu/spx | |
scala> | |
scala> ctx. | |
| jsonFile("file://" + devRoot + "/data/swoop-ml-nlp/dimensions/component_variations.jsonlines"). | |
| saveAsTable("dimension_components", SaveMode.Overwrite) | |
warning: there were 2 deprecation warning(s); re-run with -deprecation for details | |
15/07/25 04:53:04 INFO MemoryStore: ensureFreeSpace(112568) called with curMem=0, maxMem=28894769971 | |
15/07/25 04:53:04 INFO MemoryStore: Block broadcast_0 stored as values in memory (estimated size 109.9 KB, free 26.9 GB) | |
15/07/25 04:53:04 INFO MemoryStore: ensureFreeSpace(19865) called with curMem=112568, maxMem=28894769971 | |
15/07/25 04:53:04 INFO MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 19.4 KB, free 26.9 GB) | |
15/07/25 04:53:04 INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on localhost:53047 (size: 19.4 KB, free: 26.9 GB) | |
15/07/25 04:53:04 INFO SparkContext: Created broadcast 0 from jsonFile at <console>:30 | |
15/07/25 04:53:04 INFO FileInputFormat: Total input paths to process : 1 | |
15/07/25 04:53:04 INFO SparkContext: Starting job: jsonFile at <console>:30 | |
15/07/25 04:53:04 INFO DAGScheduler: Got job 0 (jsonFile at <console>:30) with 2 output partitions (allowLocal=false) | |
15/07/25 04:53:04 INFO DAGScheduler: Final stage: ResultStage 0(jsonFile at <console>:30) | |
15/07/25 04:53:04 INFO DAGScheduler: Parents of final stage: List() | |
15/07/25 04:53:04 INFO DAGScheduler: Missing parents: List() | |
15/07/25 04:53:04 INFO DAGScheduler: Submitting ResultStage 0 (MapPartitionsRDD[3] at jsonFile at <console>:30), which has no missing parents | |
15/07/25 04:53:04 INFO MemoryStore: ensureFreeSpace(4392) called with curMem=132433, maxMem=28894769971 | |
15/07/25 04:53:04 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 4.3 KB, free 26.9 GB) | |
15/07/25 04:53:04 INFO MemoryStore: ensureFreeSpace(2444) called with curMem=136825, maxMem=28894769971 | |
15/07/25 04:53:04 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 2.4 KB, free 26.9 GB) | |
15/07/25 04:53:04 INFO BlockManagerInfo: Added broadcast_1_piece0 in memory on localhost:53047 (size: 2.4 KB, free: 26.9 GB) | |
15/07/25 04:53:04 INFO SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:874 | |
15/07/25 04:53:04 INFO DAGScheduler: Submitting 2 missing tasks from ResultStage 0 (MapPartitionsRDD[3] at jsonFile at <console>:30) | |
15/07/25 04:53:04 INFO TaskSchedulerImpl: Adding task set 0.0 with 2 tasks | |
15/07/25 04:53:04 INFO TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0, localhost, PROCESS_LOCAL, 1611 bytes) | |
15/07/25 04:53:04 INFO TaskSetManager: Starting task 1.0 in stage 0.0 (TID 1, localhost, PROCESS_LOCAL, 1611 bytes) | |
15/07/25 04:53:04 INFO Executor: Running task 0.0 in stage 0.0 (TID 0) | |
15/07/25 04:53:04 INFO Executor: Running task 1.0 in stage 0.0 (TID 1) | |
15/07/25 04:53:04 INFO Executor: Fetching http://10.88.50.23:49127/jars/org.apache.commons_commons-csv-1.1.jar with timestamp 1437799961945 | |
15/07/25 04:53:04 INFO Utils: Fetching http://10.88.50.23:49127/jars/org.apache.commons_commons-csv-1.1.jar to /data/spark/tmp/spark-c63fc2de-44ae-4c4f-9519-7048e03eb139/userFiles-dac9e915-81f5-4514-b895-a89c2b6f9dd9/fetchFileTemp1827183294098735604.tmp | |
15/07/25 04:53:04 INFO Executor: Adding file:/data/spark/tmp/spark-c63fc2de-44ae-4c4f-9519-7048e03eb139/userFiles-dac9e915-81f5-4514-b895-a89c2b6f9dd9/org.apache.commons_commons-csv-1.1.jar to class loader | |
15/07/25 04:53:04 INFO Executor: Fetching http://10.88.50.23:49127/jars/com.databricks_spark-csv_2.10-1.0.3.jar with timestamp 1437799961945 | |
15/07/25 04:53:04 INFO Utils: Fetching http://10.88.50.23:49127/jars/com.databricks_spark-csv_2.10-1.0.3.jar to /data/spark/tmp/spark-c63fc2de-44ae-4c4f-9519-7048e03eb139/userFiles-dac9e915-81f5-4514-b895-a89c2b6f9dd9/fetchFileTemp5532151215374070438.tmp | |
15/07/25 04:53:04 INFO Executor: Adding file:/data/spark/tmp/spark-c63fc2de-44ae-4c4f-9519-7048e03eb139/userFiles-dac9e915-81f5-4514-b895-a89c2b6f9dd9/com.databricks_spark-csv_2.10-1.0.3.jar to class loader | |
15/07/25 04:53:04 INFO HadoopRDD: Input split: file:/home/ubuntu/spx/data/swoop-ml-nlp/dimensions/component_variations.jsonlines:24834+24834 | |
15/07/25 04:53:04 INFO HadoopRDD: Input split: file:/home/ubuntu/spx/data/swoop-ml-nlp/dimensions/component_variations.jsonlines:0+24834 | |
15/07/25 04:53:04 INFO deprecation: mapred.tip.id is deprecated. Instead, use mapreduce.task.id | |
15/07/25 04:53:04 INFO deprecation: mapred.task.id is deprecated. Instead, use mapreduce.task.attempt.id | |
15/07/25 04:53:04 INFO deprecation: mapred.task.is.map is deprecated. Instead, use mapreduce.task.ismap | |
15/07/25 04:53:04 INFO deprecation: mapred.task.partition is deprecated. Instead, use mapreduce.task.partition | |
15/07/25 04:53:04 INFO deprecation: mapred.job.id is deprecated. Instead, use mapreduce.job.id | |
15/07/25 04:53:04 INFO Executor: Finished task 0.0 in stage 0.0 (TID 0). 3637 bytes result sent to driver | |
15/07/25 04:53:04 INFO Executor: Finished task 1.0 in stage 0.0 (TID 1). 3664 bytes result sent to driver | |
15/07/25 04:53:04 INFO TaskSetManager: Finished task 1.0 in stage 0.0 (TID 1) in 214 ms on localhost (1/2) | |
15/07/25 04:53:04 INFO TaskSetManager: Finished task 0.0 in stage 0.0 (TID 0) in 223 ms on localhost (2/2) | |
15/07/25 04:53:04 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool | |
15/07/25 04:53:04 INFO DAGScheduler: ResultStage 0 (jsonFile at <console>:30) finished in 0.231 s | |
15/07/25 04:53:04 INFO DAGScheduler: Job 0 finished: jsonFile at <console>:30, took 0.273572 s | |
15/07/25 04:53:04 INFO HiveContext: Initializing HiveMetastoreConnection version 0.13.1 using Spark classes. | |
15/07/25 04:53:05 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable | |
15/07/25 04:53:05 INFO HiveMetaStore: 0: Opening raw store with implemenation class:org.apache.hadoop.hive.metastore.ObjectStore | |
15/07/25 04:53:05 INFO ObjectStore: ObjectStore, initialize called | |
15/07/25 04:53:05 WARN General: Plugin (Bundle) "org.datanucleus.store.rdbms" is already registered. Ensure you dont have multiple JAR versions of the same plugin in the classpath. The URL "file:/home/ubuntu/spark/lib/datanucleus-rdbms-3.2.9.jar" is already registered, and you are trying to register an identical plugin located at URL "file:/home/ubuntu/spark-1.4.1-bin-hadoop2.6/lib/datanucleus-rdbms-3.2.9.jar." | |
15/07/25 04:53:05 WARN General: Plugin (Bundle) "org.datanucleus" is already registered. Ensure you dont have multiple JAR versions of the same plugin in the classpath. The URL "file:/home/ubuntu/spark/lib/datanucleus-core-3.2.10.jar" is already registered, and you are trying to register an identical plugin located at URL "file:/home/ubuntu/spark-1.4.1-bin-hadoop2.6/lib/datanucleus-core-3.2.10.jar." | |
15/07/25 04:53:05 WARN General: Plugin (Bundle) "org.datanucleus.api.jdo" is already registered. Ensure you dont have multiple JAR versions of the same plugin in the classpath. The URL "file:/home/ubuntu/spark/lib/datanucleus-api-jdo-3.2.6.jar" is already registered, and you are trying to register an identical plugin located at URL "file:/home/ubuntu/spark-1.4.1-bin-hadoop2.6/lib/datanucleus-api-jdo-3.2.6.jar." | |
15/07/25 04:53:05 INFO Persistence: Property datanucleus.cache.level2 unknown - will be ignored | |
15/07/25 04:53:05 INFO Persistence: Property hive.metastore.integral.jdo.pushdown unknown - will be ignored | |
15/07/25 04:53:05 WARN Connection: BoneCP specified but not present in CLASSPATH (or one of dependencies) | |
15/07/25 04:53:05 WARN Connection: BoneCP specified but not present in CLASSPATH (or one of dependencies) | |
15/07/25 04:53:06 INFO ObjectStore: Setting MetaStore object pin classes with hive.metastore.cache.pinobjtypes="Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order" | |
15/07/25 04:53:06 INFO MetaStoreDirectSql: MySQL check failed, assuming we are not on mysql: Lexical error at line 1, column 5. Encountered: "@" (64), after : "". | |
15/07/25 04:53:06 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table. | |
15/07/25 04:53:06 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table. | |
15/07/25 04:53:07 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table. | |
15/07/25 04:53:07 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table. | |
15/07/25 04:53:07 INFO Query: Reading in results for query "org.datanucleus.store.rdbms.query.SQLQuery@0" since the connection used is closing | |
15/07/25 04:53:07 INFO ObjectStore: Initialized ObjectStore | |
15/07/25 04:53:07 INFO HiveMetaStore: Added admin role in metastore | |
15/07/25 04:53:07 INFO HiveMetaStore: Added public role in metastore | |
15/07/25 04:53:07 INFO HiveMetaStore: No user is added in admin role, since config is empty | |
15/07/25 04:53:07 INFO SessionState: No Tez session required at this point. hive.execution.engine=mr. | |
15/07/25 04:53:07 INFO HiveMetaStore: 0: get_table : db=default tbl=dimension_components | |
15/07/25 04:53:07 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=get_table : db=default tbl=dimension_components | |
15/07/25 04:53:07 INFO HiveMetaStore: 0: get_table : db=default tbl=dimension_components | |
15/07/25 04:53:07 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=get_table : db=default tbl=dimension_components | |
15/07/25 04:53:07 INFO HiveMetaStore: 0: get_database: default | |
15/07/25 04:53:07 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=get_database: default | |
15/07/25 04:53:07 INFO HiveMetaStore: 0: get_table : db=default tbl=dimension_components | |
15/07/25 04:53:07 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=get_table : db=default tbl=dimension_components | |
15/07/25 04:53:08 INFO MemoryStore: ensureFreeSpace(294808) called with curMem=139269, maxMem=28894769971 | |
15/07/25 04:53:08 INFO MemoryStore: Block broadcast_2 stored as values in memory (estimated size 287.9 KB, free 26.9 GB) | |
15/07/25 04:53:08 INFO MemoryStore: ensureFreeSpace(19865) called with curMem=434077, maxMem=28894769971 | |
15/07/25 04:53:08 INFO MemoryStore: Block broadcast_2_piece0 stored as bytes in memory (estimated size 19.4 KB, free 26.9 GB) | |
15/07/25 04:53:08 INFO BlockManagerInfo: Added broadcast_2_piece0 in memory on localhost:53047 (size: 19.4 KB, free: 26.9 GB) | |
15/07/25 04:53:08 INFO SparkContext: Created broadcast 2 from saveAsTable at <console>:31 | |
15/07/25 04:53:08 INFO ParquetRelation2: Using default output committer for Parquet: parquet.hadoop.ParquetOutputCommitter | |
15/07/25 04:53:08 INFO DefaultWriterContainer: Using user defined output committer class parquet.hadoop.ParquetOutputCommitter | |
15/07/25 04:53:08 ERROR FileOutputCommitter: Mkdirs failed to create file:/user/hive/warehouse/dimension_components/_temporary/0 | |
15/07/25 04:53:08 INFO FileInputFormat: Total input paths to process : 1 | |
15/07/25 04:53:08 INFO SparkContext: Starting job: saveAsTable at <console>:31 | |
15/07/25 04:53:08 INFO DAGScheduler: Got job 1 (saveAsTable at <console>:31) with 2 output partitions (allowLocal=false) | |
15/07/25 04:53:08 INFO DAGScheduler: Final stage: ResultStage 1(saveAsTable at <console>:31) | |
15/07/25 04:53:08 INFO DAGScheduler: Parents of final stage: List() | |
15/07/25 04:53:08 INFO DAGScheduler: Missing parents: List() | |
15/07/25 04:53:08 INFO DAGScheduler: Submitting ResultStage 1 (MapPartitionsRDD[6] at saveAsTable at <console>:31), which has no missing parents | |
15/07/25 04:53:08 INFO MemoryStore: ensureFreeSpace(72024) called with curMem=453942, maxMem=28894769971 | |
15/07/25 04:53:08 INFO MemoryStore: Block broadcast_3 stored as values in memory (estimated size 70.3 KB, free 26.9 GB) | |
15/07/25 04:53:08 INFO MemoryStore: ensureFreeSpace(25656) called with curMem=525966, maxMem=28894769971 | |
15/07/25 04:53:08 INFO MemoryStore: Block broadcast_3_piece0 stored as bytes in memory (estimated size 25.1 KB, free 26.9 GB) | |
15/07/25 04:53:08 INFO BlockManagerInfo: Added broadcast_3_piece0 in memory on localhost:53047 (size: 25.1 KB, free: 26.9 GB) | |
15/07/25 04:53:08 INFO SparkContext: Created broadcast 3 from broadcast at DAGScheduler.scala:874 | |
15/07/25 04:53:08 INFO DAGScheduler: Submitting 2 missing tasks from ResultStage 1 (MapPartitionsRDD[6] at saveAsTable at <console>:31) | |
15/07/25 04:53:08 INFO TaskSchedulerImpl: Adding task set 1.0 with 2 tasks | |
15/07/25 04:53:08 INFO TaskSetManager: Starting task 0.0 in stage 1.0 (TID 2, localhost, PROCESS_LOCAL, 1611 bytes) | |
15/07/25 04:53:08 INFO TaskSetManager: Starting task 1.0 in stage 1.0 (TID 3, localhost, PROCESS_LOCAL, 1611 bytes) | |
15/07/25 04:53:08 INFO Executor: Running task 0.0 in stage 1.0 (TID 2) | |
15/07/25 04:53:08 INFO Executor: Running task 1.0 in stage 1.0 (TID 3) | |
15/07/25 04:53:08 INFO HadoopRDD: Input split: file:/home/ubuntu/spx/data/swoop-ml-nlp/dimensions/component_variations.jsonlines:0+24834 | |
15/07/25 04:53:08 INFO HadoopRDD: Input split: file:/home/ubuntu/spx/data/swoop-ml-nlp/dimensions/component_variations.jsonlines:24834+24834 | |
15/07/25 04:53:08 INFO DefaultWriterContainer: Using user defined output committer class parquet.hadoop.ParquetOutputCommitter | |
15/07/25 04:53:08 INFO DefaultWriterContainer: Using user defined output committer class parquet.hadoop.ParquetOutputCommitter | |
15/07/25 04:53:08 ERROR InsertIntoHadoopFsRelation: Aborting task. | |
java.io.IOException: Mkdirs failed to create file:/user/hive/warehouse/dimension_components/_temporary/0/_temporary/attempt_201507250453_0001_m_000001_0 (exists=false, cwd=file:/home/ubuntu) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:442) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:428) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:908) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:889) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:786) | |
at parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:154) | |
at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:279) | |
at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:252) | |
at org.apache.spark.sql.parquet.ParquetOutputWriter.<init>(newParquet.scala:83) | |
at org.apache.spark.sql.parquet.ParquetRelation2$$anon$4.newInstance(newParquet.scala:229) | |
at org.apache.spark.sql.sources.DefaultWriterContainer.initWriters(commands.scala:470) | |
at org.apache.spark.sql.sources.BaseWriterContainer.executorSideSetup(commands.scala:360) | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:172) | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160) | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63) | |
at org.apache.spark.scheduler.Task.run(Task.scala:70) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) | |
at java.lang.Thread.run(Thread.java:745) | |
15/07/25 04:53:08 ERROR InsertIntoHadoopFsRelation: Aborting task. | |
java.io.IOException: Mkdirs failed to create file:/user/hive/warehouse/dimension_components/_temporary/0/_temporary/attempt_201507250453_0001_m_000000_0 (exists=false, cwd=file:/home/ubuntu) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:442) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:428) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:908) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:889) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:786) | |
at parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:154) | |
at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:279) | |
at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:252) | |
at org.apache.spark.sql.parquet.ParquetOutputWriter.<init>(newParquet.scala:83) | |
at org.apache.spark.sql.parquet.ParquetRelation2$$anon$4.newInstance(newParquet.scala:229) | |
at org.apache.spark.sql.sources.DefaultWriterContainer.initWriters(commands.scala:470) | |
at org.apache.spark.sql.sources.BaseWriterContainer.executorSideSetup(commands.scala:360) | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:172) | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160) | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63) | |
at org.apache.spark.scheduler.Task.run(Task.scala:70) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) | |
at java.lang.Thread.run(Thread.java:745) | |
15/07/25 04:53:08 WARN FileOutputCommitter: Could not delete file:/user/hive/warehouse/dimension_components/_temporary/0/_temporary/attempt_201507250453_0001_m_000001_0 | |
15/07/25 04:53:08 WARN FileOutputCommitter: Could not delete file:/user/hive/warehouse/dimension_components/_temporary/0/_temporary/attempt_201507250453_0001_m_000000_0 | |
15/07/25 04:53:08 ERROR DefaultWriterContainer: Task attempt attempt_201507250453_0001_m_000000_0 aborted. | |
15/07/25 04:53:08 ERROR DefaultWriterContainer: Task attempt attempt_201507250453_0001_m_000001_0 aborted. | |
15/07/25 04:53:08 ERROR Executor: Exception in task 0.0 in stage 1.0 (TID 2) | |
org.apache.spark.SparkException: Task failed while writing rows. | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:191) | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160) | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63) | |
at org.apache.spark.scheduler.Task.run(Task.scala:70) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) | |
at java.lang.Thread.run(Thread.java:745) | |
Caused by: java.io.IOException: Mkdirs failed to create file:/user/hive/warehouse/dimension_components/_temporary/0/_temporary/attempt_201507250453_0001_m_000000_0 (exists=false, cwd=file:/home/ubuntu) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:442) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:428) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:908) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:889) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:786) | |
at parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:154) | |
at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:279) | |
at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:252) | |
at org.apache.spark.sql.parquet.ParquetOutputWriter.<init>(newParquet.scala:83) | |
at org.apache.spark.sql.parquet.ParquetRelation2$$anon$4.newInstance(newParquet.scala:229) | |
at org.apache.spark.sql.sources.DefaultWriterContainer.initWriters(commands.scala:470) | |
at org.apache.spark.sql.sources.BaseWriterContainer.executorSideSetup(commands.scala:360) | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:172) | |
... 8 more | |
15/07/25 04:53:08 ERROR Executor: Exception in task 1.0 in stage 1.0 (TID 3) | |
org.apache.spark.SparkException: Task failed while writing rows. | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:191) | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160) | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63) | |
at org.apache.spark.scheduler.Task.run(Task.scala:70) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) | |
at java.lang.Thread.run(Thread.java:745) | |
Caused by: java.io.IOException: Mkdirs failed to create file:/user/hive/warehouse/dimension_components/_temporary/0/_temporary/attempt_201507250453_0001_m_000001_0 (exists=false, cwd=file:/home/ubuntu) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:442) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:428) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:908) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:889) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:786) | |
at parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:154) | |
at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:279) | |
at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:252) | |
at org.apache.spark.sql.parquet.ParquetOutputWriter.<init>(newParquet.scala:83) | |
at org.apache.spark.sql.parquet.ParquetRelation2$$anon$4.newInstance(newParquet.scala:229) | |
at org.apache.spark.sql.sources.DefaultWriterContainer.initWriters(commands.scala:470) | |
at org.apache.spark.sql.sources.BaseWriterContainer.executorSideSetup(commands.scala:360) | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:172) | |
... 8 more | |
15/07/25 04:53:08 WARN TaskSetManager: Lost task 0.0 in stage 1.0 (TID 2, localhost): org.apache.spark.SparkException: Task failed while writing rows. | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:191) | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160) | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63) | |
at org.apache.spark.scheduler.Task.run(Task.scala:70) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) | |
at java.lang.Thread.run(Thread.java:745) | |
Caused by: java.io.IOException: Mkdirs failed to create file:/user/hive/warehouse/dimension_components/_temporary/0/_temporary/attempt_201507250453_0001_m_000000_0 (exists=false, cwd=file:/home/ubuntu) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:442) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:428) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:908) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:889) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:786) | |
at parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:154) | |
at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:279) | |
at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:252) | |
at org.apache.spark.sql.parquet.ParquetOutputWriter.<init>(newParquet.scala:83) | |
at org.apache.spark.sql.parquet.ParquetRelation2$$anon$4.newInstance(newParquet.scala:229) | |
at org.apache.spark.sql.sources.DefaultWriterContainer.initWriters(commands.scala:470) | |
at org.apache.spark.sql.sources.BaseWriterContainer.executorSideSetup(commands.scala:360) | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:172) | |
... 8 more | |
15/07/25 04:53:08 ERROR TaskSetManager: Task 0 in stage 1.0 failed 1 times; aborting job | |
15/07/25 04:53:08 INFO TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool | |
15/07/25 04:53:08 INFO TaskSetManager: Lost task 1.0 in stage 1.0 (TID 3) on executor localhost: org.apache.spark.SparkException (Task failed while writing rows.) [duplicate 1] | |
15/07/25 04:53:08 INFO TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool | |
15/07/25 04:53:08 INFO TaskSchedulerImpl: Cancelling stage 1 | |
15/07/25 04:53:08 INFO DAGScheduler: ResultStage 1 (saveAsTable at <console>:31) failed in 0.103 s | |
15/07/25 04:53:08 INFO DAGScheduler: Job 1 failed: saveAsTable at <console>:31, took 0.134176 s | |
15/07/25 04:53:08 ERROR InsertIntoHadoopFsRelation: Aborting job. | |
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1.0 failed 1 times, most recent failure: Lost task 0.0 in stage 1.0 (TID 2, localhost): org.apache.spark.SparkException: Task failed while writing rows. | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:191) | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160) | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63) | |
at org.apache.spark.scheduler.Task.run(Task.scala:70) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) | |
at java.lang.Thread.run(Thread.java:745) | |
Caused by: java.io.IOException: Mkdirs failed to create file:/user/hive/warehouse/dimension_components/_temporary/0/_temporary/attempt_201507250453_0001_m_000000_0 (exists=false, cwd=file:/home/ubuntu) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:442) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:428) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:908) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:889) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:786) | |
at parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:154) | |
at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:279) | |
at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:252) | |
at org.apache.spark.sql.parquet.ParquetOutputWriter.<init>(newParquet.scala:83) | |
at org.apache.spark.sql.parquet.ParquetRelation2$$anon$4.newInstance(newParquet.scala:229) | |
at org.apache.spark.sql.sources.DefaultWriterContainer.initWriters(commands.scala:470) | |
at org.apache.spark.sql.sources.BaseWriterContainer.executorSideSetup(commands.scala:360) | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:172) | |
... 8 more | |
Driver stacktrace: | |
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1273) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1264) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1263) | |
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) | |
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1263) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730) | |
at scala.Option.foreach(Option.scala:236) | |
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:730) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1457) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1418) | |
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) | |
15/07/25 04:53:08 ERROR DefaultWriterContainer: Job job_201507250453_0000 aborted. | |
org.apache.spark.SparkException: Job aborted. | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.insert(commands.scala:166) | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.run(commands.scala:139) | |
at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:57) | |
at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:57) | |
at org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:68) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:88) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:88) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) | |
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:87) | |
at org.apache.spark.sql.SQLContext$QueryExecution.toRdd$lzycompute(SQLContext.scala:950) | |
at org.apache.spark.sql.SQLContext$QueryExecution.toRdd(SQLContext.scala:950) | |
at org.apache.spark.sql.sources.ResolvedDataSource$.apply(ddl.scala:336) | |
at org.apache.spark.sql.hive.execution.CreateMetastoreDataSourceAsSelect.run(commands.scala:245) | |
at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:57) | |
at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:57) | |
at org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:68) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:88) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:88) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) | |
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:87) | |
at org.apache.spark.sql.SQLContext$QueryExecution.toRdd$lzycompute(SQLContext.scala:950) | |
at org.apache.spark.sql.SQLContext$QueryExecution.toRdd(SQLContext.scala:950) | |
at org.apache.spark.sql.DataFrameWriter.saveAsTable(DataFrameWriter.scala:211) | |
at org.apache.spark.sql.DataFrame.saveAsTable(DataFrame.scala:1550) | |
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:31) | |
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:36) | |
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:38) | |
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:40) | |
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:42) | |
at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:44) | |
at $iwC$$iwC$$iwC$$iwC.<init>(<console>:46) | |
at $iwC$$iwC$$iwC.<init>(<console>:48) | |
at $iwC$$iwC.<init>(<console>:50) | |
at $iwC.<init>(<console>:52) | |
at <init>(<console>:54) | |
at .<init>(<console>:58) | |
at .<clinit>(<console>) | |
at .<init>(<console>:7) | |
at .<clinit>(<console>) | |
at $print(<console>) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:606) | |
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065) | |
at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1338) | |
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840) | |
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871) | |
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819) | |
at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857) | |
at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902) | |
at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:875) | |
at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902) | |
at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:875) | |
at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902) | |
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814) | |
at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657) | |
at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665) | |
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670) | |
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997) | |
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) | |
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) | |
at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135) | |
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945) | |
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059) | |
at org.apache.spark.repl.Main$.main(Main.scala:31) | |
at org.apache.spark.repl.Main.main(Main.scala) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:606) | |
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:665) | |
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:170) | |
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:193) | |
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:112) | |
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) | |
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1.0 failed 1 times, most recent failure: Lost task 0.0 in stage 1.0 (TID 2, localhost): org.apache.spark.SparkException: Task failed while writing rows. | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:191) | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160) | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63) | |
at org.apache.spark.scheduler.Task.run(Task.scala:70) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) | |
at java.lang.Thread.run(Thread.java:745) | |
Caused by: java.io.IOException: Mkdirs failed to create file:/user/hive/warehouse/dimension_components/_temporary/0/_temporary/attempt_201507250453_0001_m_000000_0 (exists=false, cwd=file:/home/ubuntu) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:442) | |
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:428) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:908) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:889) | |
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:786) | |
at parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:154) | |
at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:279) | |
at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:252) | |
at org.apache.spark.sql.parquet.ParquetOutputWriter.<init>(newParquet.scala:83) | |
at org.apache.spark.sql.parquet.ParquetRelation2$$anon$4.newInstance(newParquet.scala:229) | |
at org.apache.spark.sql.sources.DefaultWriterContainer.initWriters(commands.scala:470) | |
at org.apache.spark.sql.sources.BaseWriterContainer.executorSideSetup(commands.scala:360) | |
at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:172) | |
... 8 more | |
Driver stacktrace: | |
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1273) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1264) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1263) | |
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) | |
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1263) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730) | |
at scala.Option.foreach(Option.scala:236) | |
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:730) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1457) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1418) | |
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) | |
scala> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment