Last active
April 14, 2026 12:22
-
-
Save KEINOS/85e90e73ea6186ae5772d1c7790dee1e to your computer and use it in GitHub Desktop.
Git sub-command to copy a single directory to a local path.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| set -euo pipefail | |
| # ----------------------------------------------------------------------------- | |
| # git-sparse | |
| # ----------------------------------------------------------------------------- | |
| # | |
| # Git external subcommand to copy a single directory from a GitHub repository | |
| # to a local path, without cloning the entire repository. | |
| # | |
| # Install: | |
| # 1. Place this script as `git-sparse` in a directory that is included in | |
| # your system's PATH. | |
| # 2. Change script file mode: `chmod 0700 git-sparse` | |
| # 3. Check visibility: `type git-sparse` | |
| # 4. Check functionality `git sparse -h` | |
| # | |
| # Usage: | |
| # git sparse <repo-url> <dir-url-or-path> [target-dir] | |
| # | |
| # Options: | |
| # <repo-url> URL of the GitHub repository (e.g. https://github.com/user/repo.git) | |
| # <dir-url-or-path> URL or path of the directory within the repository | |
| # [target-dir] Optional target directory (default: current directory) | |
| # | |
| # Example: | |
| # git sparse https://github.com/git/git.git templates/info ./foo | |
| # git sparse https://github.com/git/git.git https://github.com/git/git/tree/master/templates/info ./foo | |
| # | |
| # Notes: | |
| # To specify a branch, the directory URL must include the /tree/<branch>/ component. | |
| # See the second example above. | |
| # | |
| # Latest version: https://gist.github.com/KEINOS/85e90e73ea6186ae5772d1c7790dee1e | |
| # License: WTFPL v2, (c) 2026 KEINOS and the contributors | |
| # ----------------------------------------------------------------------------- | |
| # ---- safe absolute path (avoid cd side effects) ---- | |
| resolve_path() { | |
| ( | |
| cd "$(dirname "$1")" 2>/dev/null || exit 1 | |
| printf "%s/%s\n" "$(pwd -P)" "$(basename "$1")" | |
| ) | |
| } | |
| # ---- detect command name ---- | |
| is_in_path_as_git_sparse() { | |
| local cmd_path script_path | |
| cmd_path="$(command -v git-sparse 2>/dev/null || true)" | |
| [[ -z "$cmd_path" ]] && return 1 | |
| cmd_path="$(resolve_path "$cmd_path")" | |
| script_path="$(resolve_path "$0")" | |
| [[ "$cmd_path" = "$script_path" ]] | |
| } | |
| cmd_name() { | |
| if is_in_path_as_git_sparse; then | |
| echo "git sparse" | |
| else | |
| echo "git-sparse" | |
| fi | |
| } | |
| usage() { | |
| local CMD | |
| CMD="$(cmd_name)" | |
| cat <<EOF | |
| Usage: | |
| ${CMD} <repo-url> <dir-url-or-path> [target-dir] | |
| EOF | |
| exit 1 | |
| } | |
| [[ $# -lt 2 ]] && usage | |
| REPO_URL="$1" | |
| INPUT_PATH="$2" | |
| TARGET_DIR="${3:-.}" | |
| ORIG_PWD="$(pwd)" | |
| command -v git >/dev/null 2>&1 || { | |
| echo "Error: git is required" >&2 | |
| exit 1 | |
| } | |
| BRANCH="" | |
| SPARSE_PATH="" | |
| # ---- parse GitHub URL robustly ---- | |
| if [[ "$INPUT_PATH" =~ ^https://github.com/[^/]+/[^/]+/tree/ ]]; then | |
| remaining_path="${INPUT_PATH#*tree/}" | |
| # Strip trailing slash so branch-root check and SPARSE_PATH stripping work | |
| # consistently (e.g. .../tree/main/ behaves the same as .../tree/main) | |
| remaining_path="${remaining_path%/}" | |
| # get branch list; let git's own stderr reach the terminal for context, | |
| # then add a clear summary message on failure | |
| BRANCHES="$(git ls-remote --heads "$REPO_URL")" || { | |
| echo "Error: cannot list remote branches for $REPO_URL" >&2 | |
| exit 1 | |
| } | |
| BRANCHES="$(echo "$BRANCHES" | awk '{print $2}' | sed 's|refs/heads/||')" | |
| MATCH="" | |
| for b in $BRANCHES; do | |
| if [[ "$remaining_path" == "$b" || "$remaining_path" == "$b/"* ]]; then | |
| if [[ ${#b} -gt ${#MATCH} ]]; then | |
| MATCH="$b" | |
| fi | |
| fi | |
| done | |
| if [[ -z "$MATCH" ]]; then | |
| echo "Error: cannot parse branch from URL" >&2 | |
| exit 1 | |
| fi | |
| # Reject URLs that point to the branch root (no subdirectory specified) | |
| if [[ "$remaining_path" == "$MATCH" ]]; then | |
| echo "Error: no subdirectory specified in URL (use 'git clone' for full repo)" >&2 | |
| exit 1 | |
| fi | |
| BRANCH="$MATCH" | |
| SPARSE_PATH="${remaining_path#"$BRANCH"/}" | |
| else | |
| # Reject any GitHub URL that is missing the required /tree/<branch>/ component. | |
| # Such URLs look valid but would be treated as a bare path, causing silent | |
| # failures later (e.g. cp of a non-existent file whose name is the URL). | |
| if [[ "$INPUT_PATH" =~ ^https://github\.com/ ]]; then | |
| echo "Error: GitHub URL must contain /tree/<branch>/ before the path" >&2 | |
| echo " Expected: https://github.com/<user>/<repo>/tree/<branch>/<path>" >&2 | |
| echo " Got: $INPUT_PATH" >&2 | |
| exit 1 | |
| fi | |
| SPARSE_PATH="$INPUT_PATH" | |
| fi | |
| # Guard against an empty sparse path (e.g. INPUT_PATH was blank) | |
| [[ -z "$SPARSE_PATH" ]] && { echo "Error: empty sparse path" >&2; exit 1; } | |
| # ---- validate target ---- | |
| # Support absolute TARGET_DIR in addition to relative paths | |
| if [[ "$TARGET_DIR" = /* ]]; then | |
| DEST_PARENT="${TARGET_DIR}/$(dirname "$SPARSE_PATH")" | |
| else | |
| DEST_PARENT="${ORIG_PWD}/${TARGET_DIR}/$(dirname "$SPARSE_PATH")" | |
| fi | |
| DEST_DIR="${DEST_PARENT}/$(basename "$SPARSE_PATH")" | |
| if [[ -e "$DEST_DIR" ]]; then | |
| echo "Error: destination already exists: $DEST_DIR" >&2 | |
| exit 1 | |
| fi | |
| TMP_DIR="$(mktemp -d)" | |
| trap 'rm -rf "$TMP_DIR"' EXIT | |
| echo "==> Cloning repository (partial)..." | |
| CLONE_ARGS=(--filter=blob:none --depth 1 --single-branch --no-checkout) | |
| if [[ -n "$BRANCH" ]]; then | |
| CLONE_ARGS+=(--branch "$BRANCH") | |
| fi | |
| git clone "${CLONE_ARGS[@]}" "$REPO_URL" "$TMP_DIR" | |
| cd "$TMP_DIR" | |
| git sparse-checkout init --cone | |
| git sparse-checkout set "$SPARSE_PATH" | |
| if [[ -n "$BRANCH" ]]; then | |
| git checkout "$BRANCH" | |
| else | |
| git checkout | |
| fi | |
| mkdir -p "$DEST_PARENT" | |
| echo "==> Copying directory..." | |
| cp -R "$SPARSE_PATH" "${DEST_PARENT}/" | |
| # ---- LICENSE handling ---- | |
| echo "==> Fetching LICENSE files..." | |
| # Use process substitution to avoid a subshell (preserves variable scope). | |
| # Filter to blobs only so directories (e.g. LICENSE.d/) are not matched. | |
| while IFS= read -r file; do | |
| case "$file" in | |
| [Ll][Ii][Cc][Ee][Nn][Ss][Ee]*|[Cc][Oo][Pp][Yy][Ii][Nn][Gg]*) | |
| echo " -> ${file}" | |
| git show "HEAD:$file" > "${DEST_PARENT}/${file}" | |
| ;; | |
| esac | |
| # git ls-tree format: "<mode> <type> <hash>\t<filename>" | |
| # Split on TAB so filenames containing spaces are preserved correctly. | |
| done < <(git ls-tree HEAD | awk -F'\t' '{split($1,a," "); if(a[2]=="blob") print $2}') | |
| echo "==> Done." | |
| echo "Output: ${DEST_DIR}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment