Skip to content

Instantly share code, notes, and snippets.

@txomon
Last active August 29, 2015 14:27
Show Gist options
  • Save txomon/52324bc86e85ad9d6a8e to your computer and use it in GitHub Desktop.
Save txomon/52324bc86e85ad9d6a8e to your computer and use it in GitHub Desktop.
Spark traceback on pickle
Traceback (most recent call last):
File "/home/javier/projects/bigdata/bdml/ml/spark_pipeline.py", line 61, in <module>
preprocess()
File "/home/javier/projects/bigdata/bdml/ml/spark_pipeline.py", line 57, in preprocess
files_spark_pipeline(args.input_path, args.output_file)
File "/home/javier/projects/bigdata/bdml/ml/spark_pipeline.py", line 47, in files_spark_pipeline
'org.apache.hadoop.io.compress.GzipCodec'
File "/home/javier/projects/bigdata/spark/spark/python/lib/pyspark.zip/pyspark/rdd.py", line 1484, in saveAsTextFile
File "/home/javier/projects/bigdata/spark/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py", line 538, in __call__
File "/home/javier/projects/bigdata/spark/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py", line 300, in get_return_value
py4j.protocol.Py4JJavaError2015-08-11 16:04:24,706:DEBUG:py4j.java_gateway:Command to send: p
ro42
e
2015-08-11 16:04:24,739:DEBUG:py4j.java_gateway:Answer received: ysjava.io.IOException: Failed to rename DeprecatedRawLocalFileStatus{path=file:/srv/bdml/spark/output.json.gz/_temporary/0/task_201508111402_000
1_m_000000/part-00000.gz; isDirectory=false; length=968071; replication=1; blocksize=33554432; modification_time=1439301864000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false} to file:/srv/
bdml/spark/output.json.gz/part-00000.gz\n at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.mergePaths(FileOutputCommitter.java:346)\n at org.apache.hadoop.mapreduce.lib.output.FileOutpu
tCommitter.mergePaths(FileOutputCommitter.java:362)\n at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.commitJob(FileOutputCommitter.java:310)\n at org.apache.hadoop.mapred.FileOutputCommitter.com
mitJob(FileOutputCommitter.java:136)\n at org.apache.spark.SparkHadoopWriter.commitJob(SparkHadoopWriter.scala:112)\n at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply$mcV$sp(PairRD
DFunctions.scala:1125)\n at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1065)\n at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDatas
et$1.apply(PairRDDFunctions.scala:1065)\n at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)\n at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scal
a:108)\n at org.apache.spark.rdd.RDD.withScope(RDD.scala:286)\n at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopDataset(PairRDDFunctions.scala:1065)\n at org.apache.spark.rdd.PairRDDFunctions$$a
nonfun$saveAsHadoopFile$4.apply$mcV$sp(PairRDDFunctions.scala:989)\n at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:965)\n at org.apache.spark.rdd.Pai
rRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:965)\n at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)\n at org.apache.spark.rdd.RDDOperationScope$.
withScope(RDDOperationScope.scala:108)\n at org.apache.spark.rdd.RDD.withScope(RDD.scala:286)\n at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:965)\n at org.apache.spark
.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$3.apply$mcV$sp(PairRDDFunctions.scala:951)\n at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$3.apply(PairRDDFunctions.scala:951)\n at
org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$3.apply(PairRDDFunctions.scala:951)\n at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)\n at org.apache.spark
.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)\n at org.apache.spark.rdd.RDD.withScope(RDD.scala:286)\n at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:95
0)\n at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$2.apply$mcV$sp(PairRDDFunctions.scala:909)\n at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$2.apply(PairRDDFunctions
.scala:907)\n at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$2.apply(PairRDDFunctions.scala:907)\n at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)\n
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)\n at org.apache.spark.rdd.RDD.withScope(RDD.scala:286)\n at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFu
nctions.scala:907)\n at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$2.apply$mcV$sp(RDD.scala:1422)\n at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$2.apply(RDD.scala:1410)\n at org.apache.spark
.rdd.RDD$$anonfun$saveAsTextFile$2.apply(RDD.scala:1410)\n at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)\n at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOpe
rationScope.scala:108)\n at org.apache.spark.rdd.RDD.withScope(RDD.scala:286)\n at org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1410)\n at org.apache.spark.api.java.JavaRDDLike$class.saveAsTextFi
le(JavaRDDLike.scala:527)\n at org.apache.spark.api.java.AbstractJavaRDDLike.saveAsTextFile(JavaRDDLike.scala:47)\n at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n at sun.reflect.Nati
veMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\n at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n at java.lang.reflect.Method.invoke(Method.java:497)
\n at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231)\n at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:379)\n at py4j.Gateway.invoke(Gateway.java:259)\n at py4j.com
mands.AbstractCommand.invokeMethod(AbstractCommand.java:133)\n at py4j.commands.CallCommand.execute(CallCommand.java:79)\n at py4j.GatewayConnection.run(GatewayConnection.java:207)\n at java.lang.Thread
.run(Thread.java:745)\n
: An error occurred while calling o41.saveAsTextFile.
: java.io.IOException: Failed to rename DeprecatedRawLocalFileStatus{path=file:/srv/bdml/spark/output.json.gz/_temporary/0/task_201508111402_0001_m_000000/part-00000.gz; isDirectory=false; length=968071; repli
cation=1; blocksize=33554432; modification_time=1439301864000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false} to file:/srv/bdml/spark/output.json.gz/part-00000.gz
at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.mergePaths(FileOutputCommitter.java:346)
at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.mergePaths(FileOutputCommitter.java:362)
at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.commitJob(FileOutputCommitter.java:310)
at org.apache.hadoop.mapred.FileOutputCommitter.commitJob(FileOutputCommitter.java:136)
at org.apache.spark.SparkHadoopWriter.commitJob(SparkHadoopWriter.scala:112)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1125)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1065)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1065)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:286)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopDataset(PairRDDFunctions.scala:1065)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply$mcV$sp(PairRDDFunctions.scala:989)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:965)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:965)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:286)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:965)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$3.apply$mcV$sp(PairRDDFunctions.scala:951)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$3.apply(PairRDDFunctions.scala:951)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$3.apply(PairRDDFunctions.scala:951)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:286)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:950)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$2.apply$mcV$sp(PairRDDFunctions.scala:909)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$2.apply(PairRDDFunctions.scala:907)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$2.apply(PairRDDFunctions.scala:907)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:286)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:907)
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$2.apply$mcV$sp(RDD.scala:1422)
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$2.apply(RDD.scala:1410)
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$2.apply(RDD.scala:1410)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:286)
at org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1410)
at org.apache.spark.api.java.JavaRDDLike$class.saveAsTextFile(JavaRDDLike.scala:527)
at org.apache.spark.api.java.AbstractJavaRDDLike.saveAsTextFile(JavaRDDLike.scala:47)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:379)
at py4j.Gateway.invoke(Gateway.java:259)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:133)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:207)
at java.lang.Thread.run(Thread.java:745)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment