Skip to content

Instantly share code, notes, and snippets.

@allain
Created December 18, 2025 15:13
Show Gist options
  • Select an option

  • Save allain/b2b1f05754814dae8c02fe8f890a2188 to your computer and use it in GitHub Desktop.

Select an option

Save allain/b2b1f05754814dae8c02fe8f890a2188 to your computer and use it in GitHub Desktop.
Clone all accessible gitlab repos
#!/bin/bash
# --- Configuration ---
# GITLAB_URL and GITLAB_TOKEN must be set as environment variables
if [ -z "$GITLAB_URL" ]; then
echo "ERROR: GITLAB_URL environment variable is not set"
exit 1
fi
if [ -z "$GITLAB_TOKEN" ]; then
echo "ERROR: GITLAB_TOKEN environment variable is not set"
exit 1
fi
# Check for required dependencies
MISSING_DEPS=()
if ! command -v curl &> /dev/null; then
MISSING_DEPS+=("curl")
fi
if ! command -v jq &> /dev/null; then
MISSING_DEPS+=("jq")
fi
if ! command -v git &> /dev/null; then
MISSING_DEPS+=("git")
fi
if [ ${#MISSING_DEPS[@]} -gt 0 ]; then
echo "ERROR: Missing required dependencies: ${MISSING_DEPS[*]}"
echo "Please install the missing dependencies and try again."
exit 1
fi
CLONE_DIR="." # ⬅️ **OPTIONAL: Change the directory where repos will be cloned**
PAGE_SIZE=100 # Max items per page. 100 is the recommended max.
CLONE_METHOD="ssh_url_to_repo" # Options: "ssh_url_to_repo" or "http_url_to_repo"
MAX_PARALLEL_JOBS=10 # ⬅️ **OPTIONAL: Number of parallel clone/update operations**
# ---------------------
echo "Starting GitLab repository cloning with group/namespace structure..."
echo "Clones will be placed under: $CLONE_DIR"
echo "Parallel jobs: $MAX_PARALLEL_JOBS"
echo "--------------------------------------------------"
mkdir -p "$CLONE_DIR"
cd "$CLONE_DIR" || exit 1
# Function to clone or update a repository
clone_or_update_repo() {
local REPO_URL=$1
local NAMESPACE_PATH=$2
# Remove the repository name (the part after the last '/') to get the directory path
DIR_PATH=$(dirname "$NAMESPACE_PATH")
# Get the final repo name (the part after the last '/')
REPO_NAME=$(basename "$NAMESPACE_PATH")
if [ "$DIR_PATH" == "." ]; then
# Handle projects not in a group (under the user namespace)
CLONE_PATH="$REPO_NAME"
else
CLONE_PATH="$DIR_PATH/$REPO_NAME"
fi
# Check if the target directory already exists
if [ -d "$CLONE_PATH" ]; then
# Check if it's a git repository
if [ -d "$CLONE_PATH/.git" ]; then
# Check if the repo is clean (no uncommitted changes)
if [ -z "$(cd "$CLONE_PATH" && git status --porcelain)" ]; then
echo "UPDATING: $CLONE_PATH (clean repo)"
(cd "$CLONE_PATH" && git pull)
else
echo "SKIPPING: $CLONE_PATH (dirty repo - has uncommitted changes)"
fi
else
echo "SKIPPING: $CLONE_PATH exists but is not a git repository"
fi
else
echo "PREPARING: Directory $DIR_PATH"
# Create the nested directory structure (e.g., group/subgroup)
mkdir -p "$DIR_PATH"
echo "CLONING: $REPO_URL into $CLONE_PATH"
# Clone the repository directly into the target directory
# We use --depth 1 for faster cloning (can be removed if full history is needed)
git clone --depth 1 "$REPO_URL" "$CLONE_PATH"
fi
}
# Function to wait for job slots to be available
wait_for_job_slot() {
while [ "$(jobs -r | wc -l)" -ge "$MAX_PARALLEL_JOBS" ]; do
sleep 0.1
done
}
PAGE=1
while : ; do
echo "Fetching page $PAGE of projects..."
API_ENDPOINT="/api/v4/projects?per_page=$PAGE_SIZE&page=$PAGE&min_access_level=20&archived=false&simple=false" # Note: simple=false is better for getting all path info
RESPONSE=$(curl --silent --header "PRIVATE-TOKEN: $GITLAB_TOKEN" "$GITLAB_URL$API_ENDPOINT")
# Use jq to create an array of objects, each containing the URL and the namespace path.
# The output format is: "ssh://[email protected]/group/project.git,group/project"
PROJECT_DATA=$(echo "$RESPONSE" | jq -r \
--arg method "$CLONE_METHOD" \
'.[] | .[$method] + "," + .path_with_namespace')
# Count the projects received in this page
PROJECT_COUNT=$(echo "$PROJECT_DATA" | grep -c ',')
if [ "$PROJECT_COUNT" -eq 0 ]; then
echo "Finished. No more projects found."
break
fi
# Loop through the extracted data (URL,Namespace) and clone in parallel
while IFS=',' read -r REPO_URL NAMESPACE_PATH; do
# Wait for a job slot to be available
wait_for_job_slot
# Launch clone/update in background
clone_or_update_repo "$REPO_URL" "$NAMESPACE_PATH" &
done < <(echo "$PROJECT_DATA")
# Wait for all background jobs from this page to complete before moving to next page
wait
# Check for pagination end
if [ "$PROJECT_COUNT" -lt "$PAGE_SIZE" ]; then
echo "Reached the end of the project list."
break
fi
PAGE=$((PAGE + 1))
done
# Final wait to ensure all background jobs are complete
wait
echo "--------------------------------------------------"
echo "All accessible repositories have been processed and organized by group."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment