Skip to content

Instantly share code, notes, and snippets.

@KEINOS
Last active April 14, 2026 02:22
Show Gist options
  • Select an option

  • Save KEINOS/85e90e73ea6186ae5772d1c7790dee1e to your computer and use it in GitHub Desktop.

Select an option

Save KEINOS/85e90e73ea6186ae5772d1c7790dee1e to your computer and use it in GitHub Desktop.
Git sub-command to copy a single directory to a local path.
#!/usr/bin/env bash
set -euo pipefail
# -----------------------------------------------------------------------------
# git-sparse
# -----------------------------------------------------------------------------
#
# Copy a single directory from a GitHub repository to a local path, without
# cloning the entire repository.
#
# Install:
# 1. Place this script as `git-sparse` in a directory that is included in your
# system's PATH.
# 2. Change script file mode: `chmod 0700 git-sparse`
# 3. Check visibility: `type git-sparse`
# 4. Check functionality `git sparse -h`
#
# Usage:
# git sparse <repo-url> <dir-url-or-path> [target-dir]
#
# Example:
# git sparse https://github.com/HKUDS/CLI-Anything.git blender/agent-harness ./foo
# git sparse https://github.com/git/git.git https://github.com/git/git/tree/master/templates/info ./foo
#
# Latest version: https://gist.github.com/KEINOS/85e90e73ea6186ae5772d1c7790dee1e
# License: WTFPL v2, 2026 KEINOS and the contributors
# -----------------------------------------------------------------------------
# ---- safe absolute path (avoid cd side effects) ----
resolve_path() {
(
cd "$(dirname "$1")" 2>/dev/null || exit 1
printf "%s/%s\n" "$(pwd -P)" "$(basename "$1")"
)
}
# ---- detect command name ----
is_in_path_as_git_sparse() {
local cmd_path script_path
cmd_path="$(command -v git-sparse 2>/dev/null || true)"
[[ -z "$cmd_path" ]] && return 1
cmd_path="$(resolve_path "$cmd_path")"
script_path="$(resolve_path "$0")"
[[ "$cmd_path" = "$script_path" ]]
}
cmd_name() {
if is_in_path_as_git_sparse; then
echo "git sparse"
else
echo "git-sparse"
fi
}
usage() {
local CMD
CMD="$(cmd_name)"
cat <<EOF
Usage:
${CMD} <repo-url> <dir-url-or-path> [target-dir]
EOF
exit 1
}
[[ $# -lt 2 ]] && usage
REPO_URL="$1"
INPUT_PATH="$2"
TARGET_DIR="${3:-.}"
ORIG_PWD="$(pwd)"
command -v git >/dev/null 2>&1 || {
echo "Error: git is required" >&2
exit 1
}
BRANCH=""
SPARSE_PATH=""
# ---- parse GitHub URL robustly ----
if [[ "$INPUT_PATH" =~ ^https://github.com/[^/]+/[^/]+/tree/ ]]; then
remaining_path="${INPUT_PATH#*tree/}"
# Strip trailing slash so branch-root check and SPARSE_PATH stripping work
# consistently (e.g. .../tree/main/ behaves the same as .../tree/main)
remaining_path="${remaining_path%/}"
# get branch list; let git's own stderr reach the terminal for context,
# then add a clear summary message on failure
BRANCHES="$(git ls-remote --heads "$REPO_URL")" || {
echo "Error: cannot list remote branches for $REPO_URL" >&2
exit 1
}
BRANCHES="$(echo "$BRANCHES" | awk '{print $2}' | sed 's|refs/heads/||')"
MATCH=""
for b in $BRANCHES; do
if [[ "$remaining_path" == "$b" || "$remaining_path" == "$b/"* ]]; then
if [[ ${#b} -gt ${#MATCH} ]]; then
MATCH="$b"
fi
fi
done
if [[ -z "$MATCH" ]]; then
echo "Error: cannot parse branch from URL" >&2
exit 1
fi
# Reject URLs that point to the branch root (no subdirectory specified)
if [[ "$remaining_path" == "$MATCH" ]]; then
echo "Error: no subdirectory specified in URL (use 'git clone' for full repo)" >&2
exit 1
fi
BRANCH="$MATCH"
SPARSE_PATH="${remaining_path#"$BRANCH"/}"
else
SPARSE_PATH="$INPUT_PATH"
fi
# Guard against an empty sparse path (e.g. INPUT_PATH was blank)
[[ -z "$SPARSE_PATH" ]] && { echo "Error: empty sparse path" >&2; exit 1; }
# ---- validate target ----
# Support absolute TARGET_DIR in addition to relative paths
if [[ "$TARGET_DIR" = /* ]]; then
DEST_PARENT="${TARGET_DIR}/$(dirname "$SPARSE_PATH")"
else
DEST_PARENT="${ORIG_PWD}/${TARGET_DIR}/$(dirname "$SPARSE_PATH")"
fi
DEST_DIR="${DEST_PARENT}/$(basename "$SPARSE_PATH")"
if [[ -e "$DEST_DIR" ]]; then
echo "Error: destination already exists: $DEST_DIR" >&2
exit 1
fi
TMP_DIR="$(mktemp -d)"
trap 'rm -rf "$TMP_DIR"' EXIT
echo "==> Cloning repository (partial)..."
git clone --filter=blob:none --no-checkout "$REPO_URL" "$TMP_DIR"
cd "$TMP_DIR"
git sparse-checkout init --cone
git sparse-checkout set "$SPARSE_PATH"
if [[ -n "$BRANCH" ]]; then
git checkout "$BRANCH"
else
git checkout
fi
mkdir -p "$DEST_PARENT"
echo "==> Copying directory..."
cp -R "$SPARSE_PATH" "${DEST_PARENT}/"
# ---- LICENSE handling ----
echo "==> Fetching LICENSE files..."
# Use process substitution to avoid a subshell (preserves variable scope).
# Filter to blobs only so directories (e.g. LICENSE.d/) are not matched.
while IFS= read -r file; do
case "$file" in
[Ll][Ii][Cc][Ee][Nn][Ss][Ee]*|[Cc][Oo][Pp][Yy][Ii][Nn][Gg]*)
echo " -> ${file}"
git show "HEAD:$file" > "${DEST_PARENT}/${file}"
;;
esac
# git ls-tree format: "<mode> <type> <hash>\t<filename>"
# Split on TAB so filenames containing spaces are preserved correctly.
done < <(git ls-tree HEAD | awk -F'\t' '{split($1,a," "); if(a[2]=="blob") print $2}')
echo "==> Done."
echo "Output: ${DEST_DIR}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment