allain · December 18, 2025 15:13
diff --git a/clone-all-gitlab.sh b/clone-all-gitlab.sh
 #!/bin/bash

 # --- Configuration ---
 # GITLAB_URL and GITLAB_TOKEN must be set as environment variables
 if [ -z "$GITLAB_URL" ]; then
    echo "ERROR: GITLAB_URL environment variable is not set"
    exit 1
 fi

 if [ -z "$GITLAB_TOKEN" ]; then
    echo "ERROR: GITLAB_TOKEN environment variable is not set"
    exit 1
 fi

 # Check for required dependencies
 MISSING_DEPS=()

 if ! command -v curl &> /dev/null; then
    MISSING_DEPS+=("curl")
 fi

 if ! command -v jq &> /dev/null; then
    MISSING_DEPS+=("jq")
 fi

 if ! command -v git &> /dev/null; then
    MISSING_DEPS+=("git")
 fi

 if [ ${#MISSING_DEPS[@]} -gt 0 ]; then
    echo "ERROR: Missing required dependencies: ${MISSING_DEPS[*]}"
    echo "Please install the missing dependencies and try again."
    exit 1
 fi

 CLONE_DIR="." # ⬅️ **OPTIONAL: Change the directory where repos will be cloned**
 PAGE_SIZE=100 # Max items per page. 100 is the recommended max.
 CLONE_METHOD="ssh_url_to_repo" # Options: "ssh_url_to_repo" or "http_url_to_repo"
 MAX_PARALLEL_JOBS=10 # ⬅️ **OPTIONAL: Number of parallel clone/update operations**
 # ---------------------

 echo "Starting GitLab repository cloning with group/namespace structure..."
 echo "Clones will be placed under: $CLONE_DIR"
 echo "Parallel jobs: $MAX_PARALLEL_JOBS"
 echo "--------------------------------------------------"

 mkdir -p "$CLONE_DIR"
 cd "$CLONE_DIR" || exit 1

 # Function to clone or update a repository
 clone_or_update_repo() {
    local REPO_URL=$1
    local NAMESPACE_PATH=$2

    # Remove the repository name (the part after the last '/') to get the directory path
    DIR_PATH=$(dirname "$NAMESPACE_PATH")

    # Get the final repo name (the part after the last '/')
    REPO_NAME=$(basename "$NAMESPACE_PATH")

    if [ "$DIR_PATH" == "." ]; then
        # Handle projects not in a group (under the user namespace)
        CLONE_PATH="$REPO_NAME"
    else
        CLONE_PATH="$DIR_PATH/$REPO_NAME"
    fi

    # Check if the target directory already exists
    if [ -d "$CLONE_PATH" ]; then
        # Check if it's a git repository
        if [ -d "$CLONE_PATH/.git" ]; then
            # Check if the repo is clean (no uncommitted changes)
            if [ -z "$(cd "$CLONE_PATH" && git status --porcelain)" ]; then
                echo "UPDATING: $CLONE_PATH (clean repo)"
                (cd "$CLONE_PATH" && git pull)
            else
                echo "SKIPPING: $CLONE_PATH (dirty repo - has uncommitted changes)"
            fi
        else
            echo "SKIPPING: $CLONE_PATH exists but is not a git repository"
        fi
    else
        echo "PREPARING: Directory $DIR_PATH"
        # Create the nested directory structure (e.g., group/subgroup)
        mkdir -p "$DIR_PATH"

        echo "CLONING: $REPO_URL into $CLONE_PATH"
        # Clone the repository directly into the target directory
        # We use --depth 1 for faster cloning (can be removed if full history is needed)
        git clone --depth 1 "$REPO_URL" "$CLONE_PATH"
    fi
 }

 # Function to wait for job slots to be available
 wait_for_job_slot() {
    while [ "$(jobs -r | wc -l)" -ge "$MAX_PARALLEL_JOBS" ]; do
        sleep 0.1
    done
 }

 PAGE=1
 while : ; do
    echo "Fetching page $PAGE of projects..."

    API_ENDPOINT="/api/v4/projects?per_page=$PAGE_SIZE&page=$PAGE&min_access_level=20&archived=false&simple=false" # Note: simple=false is better for getting all path info

    RESPONSE=$(curl --silent --header "PRIVATE-TOKEN: $GITLAB_TOKEN" "$GITLAB_URL$API_ENDPOINT")

    # Use jq to create an array of objects, each containing the URL and the namespace path.
    # The output format is: "ssh://[email protected]/group/project.git,group/project"
    PROJECT_DATA=$(echo "$RESPONSE" | jq -r \
        --arg method "$CLONE_METHOD" \
        '.[] | .[$method] + "," + .path_with_namespace')

    # Count the projects received in this page
    PROJECT_COUNT=$(echo "$PROJECT_DATA" | grep -c ',')

    if [ "$PROJECT_COUNT" -eq 0 ]; then
        echo "Finished. No more projects found."
        break
    fi

    # Loop through the extracted data (URL,Namespace) and clone in parallel
    while IFS=',' read -r REPO_URL NAMESPACE_PATH; do
        # Wait for a job slot to be available
        wait_for_job_slot

        # Launch clone/update in background
        clone_or_update_repo "$REPO_URL" "$NAMESPACE_PATH" &
    done < <(echo "$PROJECT_DATA")

    # Wait for all background jobs from this page to complete before moving to next page
    wait

    # Check for pagination end
    if [ "$PROJECT_COUNT" -lt "$PAGE_SIZE" ]; then
        echo "Reached the end of the project list."
        break
    fi

    PAGE=$((PAGE + 1))
 done

 # Final wait to ensure all background jobs are complete
 wait

 echo "--------------------------------------------------"
 echo "All accessible repositories have been processed and organized by group."
	#!/bin/bash

	# --- Configuration ---
	# GITLAB_URL and GITLAB_TOKEN must be set as environment variables
	if [ -z "$GITLAB_URL" ]; then
	echo "ERROR: GITLAB_URL environment variable is not set"
	exit 1
	fi

	if [ -z "$GITLAB_TOKEN" ]; then
	echo "ERROR: GITLAB_TOKEN environment variable is not set"
	exit 1
	fi

	# Check for required dependencies
	MISSING_DEPS=()

	if ! command -v curl &> /dev/null; then
	MISSING_DEPS+=("curl")
	fi

	if ! command -v jq &> /dev/null; then
	MISSING_DEPS+=("jq")
	fi

	if ! command -v git &> /dev/null; then
	MISSING_DEPS+=("git")
	fi

	if [ ${#MISSING_DEPS[@]} -gt 0 ]; then
	echo "ERROR: Missing required dependencies: ${MISSING_DEPS[*]}"
	echo "Please install the missing dependencies and try again."
	exit 1
	fi

	CLONE_DIR="." # ⬅️ OPTIONAL: Change the directory where repos will be cloned
	PAGE_SIZE=100 # Max items per page. 100 is the recommended max.
	CLONE_METHOD="ssh_url_to_repo" # Options: "ssh_url_to_repo" or "http_url_to_repo"
	MAX_PARALLEL_JOBS=10 # ⬅️ OPTIONAL: Number of parallel clone/update operations
	# ---------------------

	echo "Starting GitLab repository cloning with group/namespace structure..."
	echo "Clones will be placed under: $CLONE_DIR"
	echo "Parallel jobs: $MAX_PARALLEL_JOBS"
	echo "--------------------------------------------------"

	mkdir -p "$CLONE_DIR"
	cd "$CLONE_DIR" \|\| exit 1

	# Function to clone or update a repository
	clone_or_update_repo() {
	local REPO_URL=$1
	local NAMESPACE_PATH=$2

	# Remove the repository name (the part after the last '/') to get the directory path
	DIR_PATH=$(dirname "$NAMESPACE_PATH")

	# Get the final repo name (the part after the last '/')
	REPO_NAME=$(basename "$NAMESPACE_PATH")

	if [ "$DIR_PATH" == "." ]; then
	# Handle projects not in a group (under the user namespace)
	CLONE_PATH="$REPO_NAME"
	else
	CLONE_PATH="$DIR_PATH/$REPO_NAME"
	fi

	# Check if the target directory already exists
	if [ -d "$CLONE_PATH" ]; then
	# Check if it's a git repository
	if [ -d "$CLONE_PATH/.git" ]; then
	# Check if the repo is clean (no uncommitted changes)
	if [ -z "$(cd "$CLONE_PATH" && git status --porcelain)" ]; then
	echo "UPDATING: $CLONE_PATH (clean repo)"
	(cd "$CLONE_PATH" && git pull)
	else
	echo "SKIPPING: $CLONE_PATH (dirty repo - has uncommitted changes)"
	fi
	else
	echo "SKIPPING: $CLONE_PATH exists but is not a git repository"
	fi
	else
	echo "PREPARING: Directory $DIR_PATH"
	# Create the nested directory structure (e.g., group/subgroup)
	mkdir -p "$DIR_PATH"

	echo "CLONING: $REPO_URL into $CLONE_PATH"
	# Clone the repository directly into the target directory
	# We use --depth 1 for faster cloning (can be removed if full history is needed)
	git clone --depth 1 "$REPO_URL" "$CLONE_PATH"
	fi
	}

	# Function to wait for job slots to be available
	wait_for_job_slot() {
	while [ "$(jobs -r \| wc -l)" -ge "$MAX_PARALLEL_JOBS" ]; do
	sleep 0.1
	done
	}

	PAGE=1
	while : ; do
	echo "Fetching page $PAGE of projects..."

	API_ENDPOINT="/api/v4/projects?per_page=$PAGE_SIZE&page=$PAGE&min_access_level=20&archived=false&simple=false" # Note: simple=false is better for getting all path info

	RESPONSE=$(curl --silent --header "PRIVATE-TOKEN: $GITLAB_TOKEN" "$GITLAB_URL$API_ENDPOINT")

	# Use jq to create an array of objects, each containing the URL and the namespace path.
	# The output format is: "ssh://[email protected]/group/project.git,group/project"
	PROJECT_DATA=$(echo "$RESPONSE" \| jq -r \
	--arg method "$CLONE_METHOD" \
	'.[] \| .[$method] + "," + .path_with_namespace')

	# Count the projects received in this page
	PROJECT_COUNT=$(echo "$PROJECT_DATA" \| grep -c ',')

	if [ "$PROJECT_COUNT" -eq 0 ]; then
	echo "Finished. No more projects found."
	break
	fi

	# Loop through the extracted data (URL,Namespace) and clone in parallel
	while IFS=',' read -r REPO_URL NAMESPACE_PATH; do
	# Wait for a job slot to be available
	wait_for_job_slot

	# Launch clone/update in background
	clone_or_update_repo "$REPO_URL" "$NAMESPACE_PATH" &
	done < <(echo "$PROJECT_DATA")

	# Wait for all background jobs from this page to complete before moving to next page
	wait

	# Check for pagination end
	if [ "$PROJECT_COUNT" -lt "$PAGE_SIZE" ]; then
	echo "Reached the end of the project list."
	break
	fi

	PAGE=$((PAGE + 1))
	done

	# Final wait to ensure all background jobs are complete
	wait

	echo "--------------------------------------------------"
	echo "All accessible repositories have been processed and organized by group."
No results found