Created
December 18, 2025 15:13
-
-
Save allain/b2b1f05754814dae8c02fe8f890a2188 to your computer and use it in GitHub Desktop.
Clone all accessible gitlab repos
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # --- Configuration --- | |
| # GITLAB_URL and GITLAB_TOKEN must be set as environment variables | |
| if [ -z "$GITLAB_URL" ]; then | |
| echo "ERROR: GITLAB_URL environment variable is not set" | |
| exit 1 | |
| fi | |
| if [ -z "$GITLAB_TOKEN" ]; then | |
| echo "ERROR: GITLAB_TOKEN environment variable is not set" | |
| exit 1 | |
| fi | |
| # Check for required dependencies | |
| MISSING_DEPS=() | |
| if ! command -v curl &> /dev/null; then | |
| MISSING_DEPS+=("curl") | |
| fi | |
| if ! command -v jq &> /dev/null; then | |
| MISSING_DEPS+=("jq") | |
| fi | |
| if ! command -v git &> /dev/null; then | |
| MISSING_DEPS+=("git") | |
| fi | |
| if [ ${#MISSING_DEPS[@]} -gt 0 ]; then | |
| echo "ERROR: Missing required dependencies: ${MISSING_DEPS[*]}" | |
| echo "Please install the missing dependencies and try again." | |
| exit 1 | |
| fi | |
| CLONE_DIR="." # ⬅️ **OPTIONAL: Change the directory where repos will be cloned** | |
| PAGE_SIZE=100 # Max items per page. 100 is the recommended max. | |
| CLONE_METHOD="ssh_url_to_repo" # Options: "ssh_url_to_repo" or "http_url_to_repo" | |
| MAX_PARALLEL_JOBS=10 # ⬅️ **OPTIONAL: Number of parallel clone/update operations** | |
| # --------------------- | |
| echo "Starting GitLab repository cloning with group/namespace structure..." | |
| echo "Clones will be placed under: $CLONE_DIR" | |
| echo "Parallel jobs: $MAX_PARALLEL_JOBS" | |
| echo "--------------------------------------------------" | |
| mkdir -p "$CLONE_DIR" | |
| cd "$CLONE_DIR" || exit 1 | |
| # Function to clone or update a repository | |
| clone_or_update_repo() { | |
| local REPO_URL=$1 | |
| local NAMESPACE_PATH=$2 | |
| # Remove the repository name (the part after the last '/') to get the directory path | |
| DIR_PATH=$(dirname "$NAMESPACE_PATH") | |
| # Get the final repo name (the part after the last '/') | |
| REPO_NAME=$(basename "$NAMESPACE_PATH") | |
| if [ "$DIR_PATH" == "." ]; then | |
| # Handle projects not in a group (under the user namespace) | |
| CLONE_PATH="$REPO_NAME" | |
| else | |
| CLONE_PATH="$DIR_PATH/$REPO_NAME" | |
| fi | |
| # Check if the target directory already exists | |
| if [ -d "$CLONE_PATH" ]; then | |
| # Check if it's a git repository | |
| if [ -d "$CLONE_PATH/.git" ]; then | |
| # Check if the repo is clean (no uncommitted changes) | |
| if [ -z "$(cd "$CLONE_PATH" && git status --porcelain)" ]; then | |
| echo "UPDATING: $CLONE_PATH (clean repo)" | |
| (cd "$CLONE_PATH" && git pull) | |
| else | |
| echo "SKIPPING: $CLONE_PATH (dirty repo - has uncommitted changes)" | |
| fi | |
| else | |
| echo "SKIPPING: $CLONE_PATH exists but is not a git repository" | |
| fi | |
| else | |
| echo "PREPARING: Directory $DIR_PATH" | |
| # Create the nested directory structure (e.g., group/subgroup) | |
| mkdir -p "$DIR_PATH" | |
| echo "CLONING: $REPO_URL into $CLONE_PATH" | |
| # Clone the repository directly into the target directory | |
| # We use --depth 1 for faster cloning (can be removed if full history is needed) | |
| git clone --depth 1 "$REPO_URL" "$CLONE_PATH" | |
| fi | |
| } | |
| # Function to wait for job slots to be available | |
| wait_for_job_slot() { | |
| while [ "$(jobs -r | wc -l)" -ge "$MAX_PARALLEL_JOBS" ]; do | |
| sleep 0.1 | |
| done | |
| } | |
| PAGE=1 | |
| while : ; do | |
| echo "Fetching page $PAGE of projects..." | |
| API_ENDPOINT="/api/v4/projects?per_page=$PAGE_SIZE&page=$PAGE&min_access_level=20&archived=false&simple=false" # Note: simple=false is better for getting all path info | |
| RESPONSE=$(curl --silent --header "PRIVATE-TOKEN: $GITLAB_TOKEN" "$GITLAB_URL$API_ENDPOINT") | |
| # Use jq to create an array of objects, each containing the URL and the namespace path. | |
| # The output format is: "ssh://[email protected]/group/project.git,group/project" | |
| PROJECT_DATA=$(echo "$RESPONSE" | jq -r \ | |
| --arg method "$CLONE_METHOD" \ | |
| '.[] | .[$method] + "," + .path_with_namespace') | |
| # Count the projects received in this page | |
| PROJECT_COUNT=$(echo "$PROJECT_DATA" | grep -c ',') | |
| if [ "$PROJECT_COUNT" -eq 0 ]; then | |
| echo "Finished. No more projects found." | |
| break | |
| fi | |
| # Loop through the extracted data (URL,Namespace) and clone in parallel | |
| while IFS=',' read -r REPO_URL NAMESPACE_PATH; do | |
| # Wait for a job slot to be available | |
| wait_for_job_slot | |
| # Launch clone/update in background | |
| clone_or_update_repo "$REPO_URL" "$NAMESPACE_PATH" & | |
| done < <(echo "$PROJECT_DATA") | |
| # Wait for all background jobs from this page to complete before moving to next page | |
| wait | |
| # Check for pagination end | |
| if [ "$PROJECT_COUNT" -lt "$PAGE_SIZE" ]; then | |
| echo "Reached the end of the project list." | |
| break | |
| fi | |
| PAGE=$((PAGE + 1)) | |
| done | |
| # Final wait to ensure all background jobs are complete | |
| wait | |
| echo "--------------------------------------------------" | |
| echo "All accessible repositories have been processed and organized by group." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment