Created
April 26, 2016 13:37
-
-
Save chicagobuss/b3847df1b74fac8ff18b78c42da2183e to your computer and use it in GitHub Desktop.
spark-s3a-error
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<configuration> | |
<property> | |
<name>fs.s3a.access.key</name> | |
<value>S3_ACCESS_KEY</value> | |
</property> | |
<property> | |
<name>fs.s3a.secret.key</name> | |
<value>S3_SECRET_KEY</value> | |
</property> | |
</configuration> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark import SparkContext | |
sc = SparkContext('spark://spark-master-1:7077,spark-master-2:7077') | |
# Load a text file and convert each line to a tuple. | |
dataFile = "s3a://mybucket/out.csv" | |
input = sc.textFile(dataFile) | |
header = input.take(1)[0] | |
print header |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Driver stacktrace: | |
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1419) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1418) | |
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) | |
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799) | |
at scala.Option.foreach(Option.scala:236) | |
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:799) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1640) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588) | |
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) | |
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1845) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1858) | |
at org.apache.spark.api.python.PythonRDD$.runJob(PythonRDD.scala:393) | |
at org.apache.spark.api.python.PythonRDD.runJob(PythonRDD.scala) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231) | |
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:381) | |
at py4j.Gateway.invoke(Gateway.java:259) | |
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:133) | |
at py4j.commands.CallCommand.execute(CallCommand.java:79) | |
at py4j.GatewayConnection.run(GatewayConnection.java:209) | |
at java.lang.Thread.run(Thread.java:745) | |
Caused by: com.cloudera.com.amazonaws.services.s3.model.AmazonS3Exception: The provided security credentials are not valid. (Service: Amazon S3; Status Code: 403; Error Code: InvalidSecurity; Request ID: null), S3 Extended Request ID: null | |
at com.cloudera.com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1182) | |
at com.cloudera.com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:770) | |
at com.cloudera.com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489) | |
at com.cloudera.com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310) | |
at com.cloudera.com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785) | |
at com.cloudera.com.amazonaws.services.s3.AmazonS3Client.getObject(AmazonS3Client.java:1191) | |
at org.apache.hadoop.fs.s3a.S3AInputStream.reopen(S3AInputStream.java:91) | |
at org.apache.hadoop.fs.s3a.S3AInputStream.openIfNeeded(S3AInputStream.java:62) | |
at org.apache.hadoop.fs.s3a.S3AInputStream.read(S3AInputStream.java:156) | |
at java.io.DataInputStream.read(DataInputStream.java:149) | |
at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.fillBuffer(UncompressedSplitLineReader.java:59) | |
at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216) | |
at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174) | |
at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.readLine(UncompressedSplitLineReader.java:91) | |
at org.apache.hadoop.mapred.LineRecordReader.skipUtfByteOrderMark(LineRecordReader.java:208) | |
at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:246) | |
at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:48) | |
at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:246) | |
at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:208) | |
at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73) | |
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) | |
at scala.collection.Iterator$class.foreach(Iterator.scala:727) | |
at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) | |
at org.apache.spark.api.python.PythonRDD$.writeIteratorToStream(PythonRDD.scala:452) | |
at org.apache.spark.api.python.PythonRunner$WriterThread$$anonfun$run$3.apply(PythonRDD.scala:280) | |
at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1741) | |
at org.apache.spark.api.python.PythonRunner$WriterThread.run(PythonRDD.scala:239) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment