Skip to content

Instantly share code, notes, and snippets.

@dilip
dilip / SparkContext.scala.diff
Created July 11, 2012 05:57
Spark patch to support direct access of S3 data
diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index 6e019d6..8bd059e 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -124,6 +124,14 @@ class SparkContext(
FileInputFormat.setInputPaths(conf, path)
val bufferSize = System.getProperty("spark.buffer.size", "65536")
conf.set("io.file.buffer.size", bufferSize)
+ if (System.getProperty("awsAccessKeyId") != null) {
+ conf.set("fs.s3.awsAccessKeyId", System.getProperty("awsAccessKeyId"))
@dilip
dilip / SequenceFileKeyInputFormat.java
Created April 19, 2012 15:39
SequenceFileInputFormat and SequenceFileKeyRecordReader for enabling Hive to access data stored in a sequence file's key
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
@dilip
dilip / mapred.FileInputFormat.diff
Created December 5, 2011 04:28
Patches to hadoop 0.20.2chd3u2 for hierarchical hive partitions
--- ./src/mapred/org/apache/hadoop/mapred/FileInputFormat.java 2011-12-02 16:10:39.000000000 -0800
+++ /Users/dilipjoseph/Downloads/hadoop-0.20.2-cdh3u2/src/mapred/org/apache/hadoop/mapred/FileInputFormat.java 2011-10-14 01:39:58.000000000 -0700
@@ -139,31 +139,6 @@ public abstract class FileInputFormat<K,
ReflectionUtils.newInstance(filterClass, conf) : null;
}
- /**
- * Add files in the input path recursively into the results.
- * @param result
- * The List to store all files.