Created
December 9, 2015 02:07
-
-
Save dirkgr/82021d7fa534cbb13975 to your computer and use it in GitHub Desktop.
Gets a file from S3, wrapped in a function
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.allenai.common.Resource | |
import com.amazonaws.services.s3.AmazonS3Client | |
import java.net.URI | |
import java.nio.file.{Path, Files} | |
object IO { | |
private val s3 = new AmazonS3Client() | |
private val acceptableUrlSchemes = Set("s3", "s3n", "s3a") | |
def withFileFromS3[T](url: URI)(function: Path => T): T = { | |
require(acceptableUrlSchemes.contains(url.getScheme)) | |
// Write to file first, because we don't know how fast our consumer will read the file, and if | |
// it reads too slowly, S3 will time out. | |
val tempFilePrefix = s"pipeline-download-${url.getHost}#${url.getPath}".replace('/', '$') | |
val tempFile = Files.createTempFile(tempFilePrefix, ".tmp") | |
tempFile.toFile.deleteOnExit() | |
try { | |
Resource.using(s3.getObject(url.getHost, url.getPath.dropWhile(_ == '/')).getObjectContent) { is => | |
Files.copy(is, tempFile) | |
} | |
function(tempFile) | |
} finally { | |
Files.deleteIfExists(tempFile) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment