Skip to content

Instantly share code, notes, and snippets.

@secemp9
Created February 12, 2025 05:31
Show Gist options
  • Save secemp9/9cff25e0651645eac0edcdaa9e022992 to your computer and use it in GitHub Desktop.
Save secemp9/9cff25e0651645eac0edcdaa9e022992 to your computer and use it in GitHub Desktop.
Clone large repository in depth
import os
import sys
import logging
from git import Repo, GitCommandError
def extract_repo_name(repo_url):
"""
Extracts the repository name from the given Git URL.
Supports URLs ending with '.git' and those without it.
"""
repo_name = os.path.splitext(os.path.basename(repo_url))[0]
return repo_name
def is_shallow(repo_path):
"""
Checks if the repository at the given path is shallow.
"""
shallow_file = os.path.join(repo_path, '.git', 'shallow')
return os.path.exists(shallow_file)
def main():
if len(sys.argv) != 2:
print("Usage: python script.py <repository_url>")
sys.exit(1)
# Enable GitPython command tracing
os.environ['GIT_PYTHON_TRACE'] = 'full'
logging.basicConfig(level=logging.INFO)
repo_url = sys.argv[1]
clone_dir = extract_repo_name(repo_url)
# Initial depth
depth = 1
try:
# Clone the repository with the initial depth
print(f"Cloning {repo_url} into {clone_dir} with depth {depth}...")
repo = Repo.clone_from(repo_url, clone_dir, depth=depth)
# Loop to incrementally fetch more history
while is_shallow(clone_dir):
depth *= 2
print(f"Increasing depth to {depth}...")
repo.git.fetch(depth=depth)
print("Repository is fully cloned.")
except GitCommandError as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment