Last active
August 29, 2015 14:27
-
-
Save txomon/52324bc86e85ad9d6a8e to your computer and use it in GitHub Desktop.
Spark traceback on pickle
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Traceback (most recent call last): | |
File "/home/javier/projects/bigdata/bdml/ml/spark_pipeline.py", line 61, in <module> | |
preprocess() | |
File "/home/javier/projects/bigdata/bdml/ml/spark_pipeline.py", line 57, in preprocess | |
files_spark_pipeline(args.input_path, args.output_file) | |
File "/home/javier/projects/bigdata/bdml/ml/spark_pipeline.py", line 47, in files_spark_pipeline | |
'org.apache.hadoop.io.compress.GzipCodec' | |
File "/home/javier/projects/bigdata/spark/spark/python/lib/pyspark.zip/pyspark/rdd.py", line 1484, in saveAsTextFile | |
File "/home/javier/projects/bigdata/spark/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py", line 538, in __call__ | |
File "/home/javier/projects/bigdata/spark/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py", line 300, in get_return_value | |
py4j.protocol.Py4JJavaError2015-08-11 16:04:24,706:DEBUG:py4j.java_gateway:Command to send: p | |
ro42 | |
e | |
2015-08-11 16:04:24,739:DEBUG:py4j.java_gateway:Answer received: ysjava.io.IOException: Failed to rename DeprecatedRawLocalFileStatus{path=file:/srv/bdml/spark/output.json.gz/_temporary/0/task_201508111402_000 | |
1_m_000000/part-00000.gz; isDirectory=false; length=968071; replication=1; blocksize=33554432; modification_time=1439301864000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false} to file:/srv/ | |
bdml/spark/output.json.gz/part-00000.gz\n at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.mergePaths(FileOutputCommitter.java:346)\n at org.apache.hadoop.mapreduce.lib.output.FileOutpu | |
tCommitter.mergePaths(FileOutputCommitter.java:362)\n at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.commitJob(FileOutputCommitter.java:310)\n at org.apache.hadoop.mapred.FileOutputCommitter.com | |
mitJob(FileOutputCommitter.java:136)\n at org.apache.spark.SparkHadoopWriter.commitJob(SparkHadoopWriter.scala:112)\n at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply$mcV$sp(PairRD | |
DFunctions.scala:1125)\n at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1065)\n at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDatas | |
et$1.apply(PairRDDFunctions.scala:1065)\n at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)\n at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scal | |
a:108)\n at org.apache.spark.rdd.RDD.withScope(RDD.scala:286)\n at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopDataset(PairRDDFunctions.scala:1065)\n at org.apache.spark.rdd.PairRDDFunctions$$a | |
nonfun$saveAsHadoopFile$4.apply$mcV$sp(PairRDDFunctions.scala:989)\n at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:965)\n at org.apache.spark.rdd.Pai | |
rRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:965)\n at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)\n at org.apache.spark.rdd.RDDOperationScope$. | |
withScope(RDDOperationScope.scala:108)\n at org.apache.spark.rdd.RDD.withScope(RDD.scala:286)\n at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:965)\n at org.apache.spark | |
.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$3.apply$mcV$sp(PairRDDFunctions.scala:951)\n at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$3.apply(PairRDDFunctions.scala:951)\n at | |
org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$3.apply(PairRDDFunctions.scala:951)\n at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)\n at org.apache.spark | |
.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)\n at org.apache.spark.rdd.RDD.withScope(RDD.scala:286)\n at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:95 | |
0)\n at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$2.apply$mcV$sp(PairRDDFunctions.scala:909)\n at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$2.apply(PairRDDFunctions | |
.scala:907)\n at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$2.apply(PairRDDFunctions.scala:907)\n at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)\n | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)\n at org.apache.spark.rdd.RDD.withScope(RDD.scala:286)\n at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFu | |
nctions.scala:907)\n at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$2.apply$mcV$sp(RDD.scala:1422)\n at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$2.apply(RDD.scala:1410)\n at org.apache.spark | |
.rdd.RDD$$anonfun$saveAsTextFile$2.apply(RDD.scala:1410)\n at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)\n at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOpe | |
rationScope.scala:108)\n at org.apache.spark.rdd.RDD.withScope(RDD.scala:286)\n at org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1410)\n at org.apache.spark.api.java.JavaRDDLike$class.saveAsTextFi | |
le(JavaRDDLike.scala:527)\n at org.apache.spark.api.java.AbstractJavaRDDLike.saveAsTextFile(JavaRDDLike.scala:47)\n at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n at sun.reflect.Nati | |
veMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\n at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n at java.lang.reflect.Method.invoke(Method.java:497) | |
\n at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231)\n at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:379)\n at py4j.Gateway.invoke(Gateway.java:259)\n at py4j.com | |
mands.AbstractCommand.invokeMethod(AbstractCommand.java:133)\n at py4j.commands.CallCommand.execute(CallCommand.java:79)\n at py4j.GatewayConnection.run(GatewayConnection.java:207)\n at java.lang.Thread | |
.run(Thread.java:745)\n | |
: An error occurred while calling o41.saveAsTextFile. | |
: java.io.IOException: Failed to rename DeprecatedRawLocalFileStatus{path=file:/srv/bdml/spark/output.json.gz/_temporary/0/task_201508111402_0001_m_000000/part-00000.gz; isDirectory=false; length=968071; repli | |
cation=1; blocksize=33554432; modification_time=1439301864000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false} to file:/srv/bdml/spark/output.json.gz/part-00000.gz | |
at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.mergePaths(FileOutputCommitter.java:346) | |
at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.mergePaths(FileOutputCommitter.java:362) | |
at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.commitJob(FileOutputCommitter.java:310) | |
at org.apache.hadoop.mapred.FileOutputCommitter.commitJob(FileOutputCommitter.java:136) | |
at org.apache.spark.SparkHadoopWriter.commitJob(SparkHadoopWriter.scala:112) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1125) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1065) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1065) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:286) | |
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopDataset(PairRDDFunctions.scala:1065) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply$mcV$sp(PairRDDFunctions.scala:989) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:965) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:965) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:286) | |
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:965) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$3.apply$mcV$sp(PairRDDFunctions.scala:951) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$3.apply(PairRDDFunctions.scala:951) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$3.apply(PairRDDFunctions.scala:951) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:286) | |
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:950) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$2.apply$mcV$sp(PairRDDFunctions.scala:909) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$2.apply(PairRDDFunctions.scala:907) | |
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$2.apply(PairRDDFunctions.scala:907) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:286) | |
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:907) | |
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$2.apply$mcV$sp(RDD.scala:1422) | |
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$2.apply(RDD.scala:1410) | |
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$2.apply(RDD.scala:1410) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108) | |
at org.apache.spark.rdd.RDD.withScope(RDD.scala:286) | |
at org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1410) | |
at org.apache.spark.api.java.JavaRDDLike$class.saveAsTextFile(JavaRDDLike.scala:527) | |
at org.apache.spark.api.java.AbstractJavaRDDLike.saveAsTextFile(JavaRDDLike.scala:47) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:497) | |
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231) | |
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:379) | |
at py4j.Gateway.invoke(Gateway.java:259) | |
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:133) | |
at py4j.commands.CallCommand.execute(CallCommand.java:79) | |
at py4j.GatewayConnection.run(GatewayConnection.java:207) | |
at java.lang.Thread.run(Thread.java:745) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment