Last active
November 3, 2023 18:55
-
-
Save noamross/e05d3884b8ec8b4dce8380e24163851d to your computer and use it in GitHub Desktop.
Read previous versions of targets from git repository
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Requires a development branch of git2r for now. Do `remotes::install_github('ropensci/git2r@raw-blob-content')` or | |
# `renv::install('ropensci/git2r@raw-blob-content')`. This installs from source. | |
# TODO: | |
# - Add a function to extract an arbitrary file/folder, e.g., copy_git_file(path, ref, repo) | |
# - Have tar_*_version check if the target is a local file and in that case extract it and return a temporary path (turn on/off with arguments) | |
# - Guardrails and informative error messages cases such as: Not a git repository, target not present at the given reference, remote version not available. What happens with shallow clones? | |
# - Maybe only extract stuff in the `objects/` directory when needed | |
library(git2r) | |
library(fs) | |
library(targets) | |
#' Get a target from a given git reference | |
#' | |
#' @param name The name of the target | |
#' @param ref The git reference: tag, branch, SHA, or revision like HEAD~1 (see https://git-scm.com/docs/git-rev-parse.html#_specifying_revisions) | |
#' @param ... Additional arguments passed to \code{\link{tar_load}} or \code{\link{tar_read}} | |
#' @export | |
tar_load_version <- function(name, ref = "HEAD", envir = parent.frame(), ..., repo = ".") { | |
name <- targets::tar_deparse_language(substitute(name)) | |
tmp_store <- make_temporary_store(ref, repo) | |
tar_load(name, store = tmp_store, envir = envir, ...) | |
} | |
#' @export | |
#' @rdname tar_load_version | |
tar_read_version <- function(name, ref = "HEAD", repo = ".", ...) { | |
name <- targets::tar_deparse_language(substitute(name)) | |
tmp_store <- make_temporary_store(ref, repo) | |
tar_read_raw(name, store = tmp_store, ...) | |
} | |
make_temporary_store <- function(ref = "HEAD", repo = ".", store = targets::tar_path_store()) { | |
current_store <- store | |
store_git_obj <- get_git_obj_at_reference(path = fs::path(current_store), ref = ref, repo = repo) | |
tmp_store_path <- fs::path(tar_tempdir(), sha(store_git_obj)) | |
# Don't bother copying if the store is already in the tempdir | |
if(fs::dir_exists(tmp_store_path)) { | |
return(tmp_store_path) | |
} else { | |
tmp_store <- copy_git_tree(store_git_obj, dirname = sha(store_git_obj), path = tar_tempdir()) | |
} | |
tmp_store | |
} | |
#' @returns A git2r object representing the file at the given reference, either a blob or, for a directory, a tree | |
get_git_obj_at_reference <- function(path, ref, repo = ".") { | |
# Parse the reference name to a commit object | |
if(is.character(ref)) ref <- revparse_single(repo, ref) | |
path <- fs::path(path) | |
path_parts <- fs::path_split(path)[[1]] | |
repo <- git2r::repository(repo) | |
repo_tree <- git2r::tree(ref) | |
# Find the file in the tree | |
obj <- repo_tree | |
splits <- path_parts | |
while (length(splits) >= 1) { | |
obj <- obj[splits[1]] | |
splits <- splits[-1] | |
} | |
return(obj) | |
} | |
#' Copy the contents of a git_blob object to disk | |
#' @return The path to the copied file, which is named for the SHA of the blob | |
copy_git_obj <- function(obj, filename = sha(obj), path = ".") { | |
if(inherits(obj, "git_tree")) return(copy_git_tree(obj, dirname = filename, path)) | |
stopifnot(inherits(obj, "git_blob")) | |
# stopifnot(!is_binary(obj)) | |
# Find the path of the git object in the .git/objects directory given its SHA | |
out_path <- fs::path(path, filename) | |
contents <- content(obj, split = FALSE, raw = TRUE) | |
if(is.raw(contents)) { | |
writeBin(contents, out_path) | |
} else { | |
cat(content(obj, split = FALSE), file = out_path) | |
} | |
out_path | |
} | |
#' Copy the contents of a git_tree object to a directory on disk recursively | |
#' @return The path to the directory, which is named for the SHA of the tree | |
copy_git_tree <- function(tree, dirname = sha(tree), path = ".") { | |
out_path <- fs::path(path, dirname) | |
fs::dir_create(out_path, recurse = TRUE) | |
# Recurse down tree and copy all blobs | |
for (i in seq_along(tree$name)) { | |
obj <- tree[i] | |
if (inherits(obj, "git_blob")) { | |
# Can't read binary files yet, see https://github.com/ropensci/git2r/issues/460 | |
#if(is_binary(obj)) message("Skipping binary file") | |
copy_git_obj(obj, filename = tree$name[i], path = out_path) | |
} else if (inherits(obj, "git_tree")) { | |
copy_git_tree(obj, dirname = tree$name[i], path = out_path) | |
} else { | |
warning("Unknown git object type") | |
} | |
} | |
return(out_path) | |
} | |
#' Get a system-level temporary directory | |
tar_tempdir <- function() { | |
p <- fs::path(fs::path_dir(tempdir()), "tar_versions") | |
if(!fs::dir_exists(p)) fs::dir_create(p) | |
p | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment