Skip to content

Instantly share code, notes, and snippets.

@dirkgr
Created December 9, 2015 02:07
Show Gist options
  • Save dirkgr/82021d7fa534cbb13975 to your computer and use it in GitHub Desktop.
Save dirkgr/82021d7fa534cbb13975 to your computer and use it in GitHub Desktop.
Gets a file from S3, wrapped in a function
import org.allenai.common.Resource
import com.amazonaws.services.s3.AmazonS3Client
import java.net.URI
import java.nio.file.{Path, Files}
object IO {
private val s3 = new AmazonS3Client()
private val acceptableUrlSchemes = Set("s3", "s3n", "s3a")
def withFileFromS3[T](url: URI)(function: Path => T): T = {
require(acceptableUrlSchemes.contains(url.getScheme))
// Write to file first, because we don't know how fast our consumer will read the file, and if
// it reads too slowly, S3 will time out.
val tempFilePrefix = s"pipeline-download-${url.getHost}#${url.getPath}".replace('/', '$')
val tempFile = Files.createTempFile(tempFilePrefix, ".tmp")
tempFile.toFile.deleteOnExit()
try {
Resource.using(s3.getObject(url.getHost, url.getPath.dropWhile(_ == '/')).getObjectContent) { is =>
Files.copy(is, tempFile)
}
function(tempFile)
} finally {
Files.deleteIfExists(tempFile)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment