Created
January 14, 2017 17:23
-
-
Save russellpierce/b947c05fa6e6ceb650b0a929e8416fb8 to your computer and use it in GitHub Desktop.
Connect to AWS Athena using R (with the option to use IAM credentials)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#repsych is on github and is here only for the glibrary idiom | |
library(repsych) | |
#install and load the following packages | |
glibrary(whisker, lubridate, magrittr, rappdirs, awsjavasdk, rJava) | |
if (!aws_sdk_present()) { | |
install_aws_sdk() | |
} | |
load_sdk() | |
#' @param s3_staging_dir required. \code{character}. The s3 staging bucket and prefix | |
#' @param region required, with default. \code{character}. The AWS region to use. This is used to create the connection url. For details refer to \link{http://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html#jdbc-url-format} | |
#' @param aws_access_key optional. \code{character}. | |
#' @param aws_secret_key optional. \code{character}. | |
#' @param aws_credentials_provider_class optional. \code{character}. The credentials provider class. | |
#' By default, if the parameters \code{aws_access_key} or \code{aws_secret_key} this function uses the default credential provider chain in the AWS Java SDK. | |
#' This will look for credentials in the following order: | |
#' \begin{enumerate} | |
#' \item{AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY Environmental variables} | |
#' \item{aws.accessKeyId and aws.secretKey java system properties} | |
#' \item{The default credential profiles file – typically located at ~/.aws/credentials} | |
#' \item{Amazon ECS container credentials} | |
#' \item{Instance profile credentials} | |
#' \end{enumerate} | |
#' For details, refer to \link{http://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/credentials.html#credentials-default}. | |
#' @param ... optional arguments passed to DBI::dbConnect. This function already covers the s3_staging_dir, user, password, and aws_credentials_provider_class arguments. For other optional arguments refer to the Amazon Athena JDBC documention, \link{http://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html}. | |
#' | |
#' @importFrom R.utils tempvar | |
#' @importFrom rJava .jpackage | |
#' @importFrom RJDBC JDBC | |
#' @importFrom DBI dbConnect | |
#' @example | |
#' athenaConn <- athenaConnect() | |
#' DBI::dbGetQuery(athenaConn, "select * from flow_node limit 1") | |
athenaConnect <- function(s3_staging_dir = "s3://recommender-data-analysis/athena-staging/", | |
region = "us-east-1", | |
aws_access_key = NULL, | |
aws_secret_key = NULL, | |
aws_credentials_provider_class = "com.amazonaws.auth.DefaultAWSCredentialsProviderChain", ...) { | |
if (!file.exists("~/Athena/AthenaJDBC41-1.0.0.jar")) { | |
suppressWarnings(dir.create("~/Athena/")) | |
download.file("https://s3.amazonaws.com/athena-downloads/drivers/AthenaJDBC41-1.0.0.jar", "~/Athena/AthenaJDBC41-1.0.0.jar") | |
} | |
if (!exists("verboseOption")) { | |
verboseOption <- R.utils::tempvar() | |
assign(verboseOption, TRUE) | |
} | |
if (get(verboseOption)) {message("Testing connection status")} | |
JDBCLib <- "~/Athena/" | |
.jpackage("rathena", morePaths= list.files(JDBCLib, full.names = TRUE)) | |
drv <- RJDBC::JDBC("com.amazonaws.athena.jdbc.AthenaDriver", paste0(JDBCLib,"AthenaJDBC41-1.0.0.jar"),"`") | |
connection_string <- paste0("jdbc:awsathena://athena.", region, ".amazonaws.com:443") | |
if (!is.null(aws_access_key) && !is.null(aws_secret_key)) { | |
athenaConn <- | |
dbConnect(drv, connection_string, | |
s3_staging_dir = s3_staging_dir, | |
user = aws_access_key, | |
password = aws_secret_key | |
) | |
} else { | |
athenaConn <- DBI::dbConnect(drv, connection_string, | |
s3_staging_dir = s3_staging_dir, | |
aws_credentials_provider_class = aws_credentials_provider_class | |
) | |
} | |
return(athenaConn) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks great share.
Note, latest driver is now AthenaJDBC41-1.1.0.jar and will likely update again soon.