-
-
Save jsta/5355a1f0daa489597a735f36eb15e3d2 to your computer and use it in GitHub Desktop.
Function to download a file from github via API, including large files and private repos
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://gist.github.com/noamross/73944d85cad545ae89efaa4d90b049db | |
#' Gets a file from a github repo, using the Data API blob endpoint | |
#' | |
#' This avoids the 1MB limit of the content API and uses [gh::gh] to deal with | |
#' authorization and such. See https://developer.github.com/v3/git/blobs/ | |
#' @param url the URL of the file to download via API, of the form | |
#' `:owner/:repo/blob/:path | |
#' @param ref the reference of a commit: a branch name, tag, or commit SHA | |
#' @param owner,repo,path,ref alternate way to specify the file. These will | |
#' override values in `url` | |
#' @param to_disk,destfile write file to disk (default=TRUE)? If so, use the | |
#' name in `destfile`, or the original filename by default | |
#' @param .token,.api_url,.method,.limit,.send_headers arguments passed on to | |
#' [gh::gh] | |
#' @importFrom gh gh | |
#' @importFrom stringi stri_match_all_regex | |
#' @importFrom purrr %||% keep | |
#' @importFrom base64enc base64decode | |
#' @return Either the local path of the downloaded file (default), or a raw | |
#' vector | |
gh_file <- function(url = NULL, ref=NULL, | |
owner = NULL, repo = NULL, path = NULL, | |
to_disk=TRUE, destfile=NULL, | |
.token = NULL, .api_url= NULL, .method="GET", | |
.limit = NULL, .send_headers = NULL) { | |
if (!is.null(url)) { | |
matches <- stringi::stri_match_all_regex( | |
url, | |
"(github\\.com/)?([^\\/]+)/([^\\/]+)/[^\\/]+/([^\\/]+)/([^\\?]+)" | |
) | |
owner <- owner %||% matches[[1]][3] | |
repo <- repo %||% matches[[1]][4] | |
ref <- ref %||% matches[[1]][5] | |
path <- path %||% matches[[1]][6] | |
pathfile <- basename(path) | |
} | |
pathdir <- dirname(path) | |
if(length(grep("/", path)) == 0){ | |
pathdir <- NULL | |
} | |
blob <- gh( | |
paste0("/repos/:owner/:repo/contents/", path), | |
owner = owner, repo = repo, | |
.token = NULL, .api_url = NULL, .method = "GET", | |
.limit = NULL, .send_headers = NULL | |
) | |
raw <- base64decode(blob[["content"]]) | |
if (to_disk) { | |
destfile <- destfile %||% pathfile | |
writeBin(raw, con = destfile) | |
return(destfile) | |
} else { | |
return(raw) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment