Created
July 4, 2024 10:30
-
-
Save FeepingCreature/74e04a7167e5f63dea306e656dc5df18 to your computer and use it in GitHub Desktop.
git centralize tool, written by opus 3.5 sonnet
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Warning: Undertested! May corrupt your git repos! | |
import os | |
import sys | |
import subprocess | |
import re | |
import argparse | |
import shutil | |
from pathlib import Path | |
from urllib.parse import urlparse | |
DATA_DIR = Path(os.environ.get('XDG_DATA_HOME', Path.home() / '.local' / 'share')) / 'git-submodules' | |
# Global variables for dry run and verbose modes | |
DRY_RUN = False | |
VERBOSE = False | |
# Global cache for fetched URLs | |
FETCHED_URLS = set() | |
def run_command(cmd, dry_run=None, verbose=None): | |
dry_run = DRY_RUN if dry_run is None else dry_run | |
verbose = VERBOSE if verbose is None else verbose | |
# Convert all arguments to strings | |
cmd = [str(arg) for arg in cmd] | |
if verbose: | |
print(f"Running command: {' '.join(cmd)}") | |
if dry_run: | |
print(f"[DRY RUN] Would run: {' '.join(cmd)}") | |
return None | |
result = subprocess.run(cmd, check=True, capture_output=True, text=True) | |
stdout = result.stdout.strip() | |
if verbose and stdout: | |
print(f"Command output:\n{stdout}") | |
return result | |
def get_submodules(repo_path): | |
gitmodules_path = repo_path / '.gitmodules' | |
if not gitmodules_path.exists(): | |
return {} | |
result = run_command(['git', '-C', repo_path, 'config', '--file', '.gitmodules', '--list'], dry_run=False) | |
pattern = re.compile(r'^submodule\.(.+?)\.(path|url)=(.+)$') | |
submodules = {} | |
for line in result.stdout.splitlines(): | |
match = pattern.match(line) | |
if match: | |
name, key, value = match.groups() | |
if name not in submodules: | |
submodules[name] = {} | |
submodules[name][key] = value | |
return {name: submodule for name, submodule in submodules.items() if 'path' in submodule and 'url' in submodule} | |
def encode_url(url): | |
parsed = urlparse(url) | |
hostname = parsed.hostname or '' | |
path = parsed.path.lstrip('/') | |
path = re.sub(r'\.git$', '', path) | |
encoded = re.sub(r'[^a-zA-Z0-9]+', '_', f"{hostname}_{path}") | |
return encoded | |
def update_cache(submodule_url): | |
global FETCHED_URLS | |
if submodule_url in FETCHED_URLS: | |
if VERBOSE: | |
print(f"Skipping already fetched URL: {submodule_url}") | |
return | |
encoded_url = encode_url(submodule_url) | |
cache_path = DATA_DIR / encoded_url | |
if VERBOSE: | |
print(f"Updating cache for {submodule_url}") | |
if not cache_path.exists(): | |
if VERBOSE: | |
print(f"Creating directory: {cache_path}") | |
cache_path.mkdir(parents=True, exist_ok=True) | |
run_command(['git', '-C', cache_path, 'init', '--bare'], dry_run=False) | |
run_command(['git', '-C', cache_path, 'remote', 'add', 'origin', submodule_url], dry_run=False) | |
run_command(['git', '-C', cache_path, 'fetch', '--tags', 'origin'], dry_run=False) | |
FETCHED_URLS.add(submodule_url) | |
def update_alternates(git_path, submodule_name, submodule_url, submodule_path): | |
encoded_url = encode_url(submodule_url) | |
cache_path = DATA_DIR / encoded_url | |
alternates_path = git_path / 'modules' / submodule_name / 'objects' / 'info' / 'alternates' | |
if VERBOSE: | |
print(f"Updating alternates for {submodule_name} in {git_path}") | |
if DRY_RUN: | |
print(f"[DRY RUN] Would update {alternates_path} with content: {str(cache_path / 'objects')}") | |
return | |
alternates_path.parent.mkdir(parents=True, exist_ok=True) | |
alternates_path.write_text(str(cache_path / 'objects')) | |
run_command(['git', '-C', submodule_path, 'repack', '-a', '-d', '--local']) | |
def remove_alternates(git_path, submodule_name, submodule_path): | |
# After this arcane command, alternates are no longer referenced. | |
run_command(['git', '-C', submodule_path, 'repack', '-a', '-d']) | |
alternates_path = git_path / 'modules' / submodule_name / 'objects' / 'info' / 'alternates' | |
if VERBOSE: | |
print(f"Removing alternates for {submodule_name} in {git_path}") | |
if DRY_RUN: | |
print(f"[DRY RUN] Would remove {alternates_path}") | |
return | |
if alternates_path.exists(): | |
alternates_path.unlink() | |
if alternates_path.parent.exists() and not any(alternates_path.parent.iterdir()): | |
alternates_path.parent.rmdir() | |
def process_repository(repo_path, git_path=None, revert=False): | |
if git_path is None: | |
git_path = repo_path / '.git' | |
print(f"[process] {repo_path} in {git_path}") | |
submodules = get_submodules(repo_path) | |
for submodule_name, submodule_info in submodules.items(): | |
submodule_path = submodule_info['path'] | |
submodule_url = submodule_info['url'] | |
# Check if the submodule is initialized | |
submodule_git_path = git_path / 'modules' / submodule_name | |
if not submodule_git_path.exists(): | |
continue | |
submodule_full_path = repo_path / submodule_path | |
if not revert: | |
update_cache(submodule_url) | |
update_alternates(git_path, submodule_name, submodule_url, submodule_full_path) | |
else: | |
remove_alternates(git_path, submodule_name, submodule_full_path) | |
# Recursively process submodules | |
process_repository(submodule_full_path, submodule_git_path, revert) | |
def main(folder, revert=False): | |
for root, dirs, files in os.walk(folder): | |
if '.git' in dirs: | |
process_repository(Path(root), revert=revert) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser(description='Centralize or decentralize git submodules to reduce disk usage.') | |
parser.add_argument('folder', help='The folder containing git repositories to process') | |
parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output') | |
parser.add_argument('-n', '--dry-run', action='store_true', help='Perform a dry run without making changes') | |
parser.add_argument('-r', '--revert', action='store_true', help='Revert the centralization process') | |
args = parser.parse_args() | |
DRY_RUN = args.dry_run | |
VERBOSE = args.verbose | |
main(args.folder, revert=args.revert) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment