Skip to content

Instantly share code, notes, and snippets.

@ink-splatters
Created November 12, 2024 06:48
Show Gist options
  • Save ink-splatters/8d66ce165f29a27f3cabc3916db0eb3a to your computer and use it in GitHub Desktop.
Save ink-splatters/8d66ce165f29a27f3cabc3916db0eb3a to your computer and use it in GitHub Desktop.
Fast population of git submodules using gitoxide
# SPDX-FileCopyrightText: © 2024 Peter A. (@ink-splatters)
#
# SPDX-License-Identifier: MIT
#
# -----------------------------------------------------------------------------
# gix_populate_submodules
# -----------------------------------------------------------------------------
#
# MIT License
#
# © 2024 Peter A. (@ink-splatters)
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# -----------------------------------------------------------------------------
#
# gitoxide hasn't (yet?) implemented a drop-in replacement for
# `git submodules update`. This may negate the whole purpose of using it on repos
# containing a large number of heavy submodules.
#
# This script tricks git by manually cloning all the submodules using gitoxide.
# NOTE: Recursive clones are not supported yet.
#
# -----------------------------------------------------------------------------
from __future__ import annotations
import dataclasses as D
import pathlib
import typing as T
import subprocess
import shutil
@D.dataclass
class Option:
name: str
value: T.Optional[str] = None
def __str__(self) -> str:
prefix = "-" if len(self.name) == 1 else "--"
return f"{prefix}{self.name}" + (f" {self.value}" if self.value else "")
@D.dataclass
class Command:
cmd: str
args: list[str] = D.field(default_factory=list)
opts: list[Option] = D.field(default_factory=list)
def append_args(self, *args: str) -> Command:
self.args.extend(args)
return self
def append_options(self, *args: str, **kwargs: str) -> Command:
self.opts.extend([Option(name=name) for name in args])
self.opts.extend([Option(name=k, value=v) for k, v in kwargs.items()])
return self
def run(self) -> int:
command_line = [str(self.cmd)] + self.args + [str(opt) for opt in self.opts]
try:
print(f"Running command: {' '.join(command_line)}")
result = subprocess.run(command_line, check=True)
return result.returncode
except subprocess.CalledProcessError as e:
print(f"Command failed with return code {e.returncode}")
return e.returncode
def __post_init__(self):
if not (pathlib.Path(self.cmd).is_absolute() or shutil.which(self.cmd)):
raise FileNotFoundError(f"Command {self.cmd} not found in PATH")
def parse_modules(repo_dir: pathlib.Path) -> T.List[T.Tuple[str, str]]:
modules_path = repo_dir / ".gitmodules"
if not modules_path.exists():
raise FileNotFoundError(".gitmodules file not found")
print("Parsing .gitmodules to obtain the list of submodules...")
submodules = []
submodule_path = None
submodule_url = None
with open(modules_path, "r") as file:
for line in file:
line = line.strip()
if line.startswith("[submodule"):
# Start of a new submodule entry, save the previous one if complete
if submodule_path and submodule_url:
submodules.append((submodule_path, submodule_url))
# Reset for the new submodule entry
submodule_path = None
submodule_url = None
elif line.startswith("path ="):
submodule_path = line.split("=", 1)[1].strip()
elif line.startswith("url ="):
submodule_url = line.split("=", 1)[1].strip()
# Add the last submodule if present
if submodule_path and submodule_url:
submodules.append((submodule_path, submodule_url))
return submodules
def update(repo_dir: T.Optional[pathlib.Path] = None, shallow_clones: bool = False):
repo_dir = repo_dir or pathlib.Path.cwd()
print(f"Repository: {repo_dir}")
try:
submodules = parse_modules(repo_dir)
except FileNotFoundError as e:
print(e)
return
if len(submodules) == 0:
raise AttributeError("The repository does not have any submodules.")
for submodule_path, submodule_url in submodules:
submodule_full_path = repo_dir / submodule_path
if (
not submodule_full_path.exists()
or not (submodule_full_path / ".git").exists()
):
print(
f"{'Shallow-c' if shallow_clones else 'C'}loning submodule: {submodule_url} to: {submodule_path}"
)
cmd = Command(cmd="gix").append_args(
"clone", submodule_url, str(submodule_full_path)
)
if shallow_clones:
cmd.append_options(depth="1")
result = cmd.run()
if result != 0:
print(f"Failed to clone submodule {submodule_path}")
else:
print(f"Cloned submodule {submodule_path} successfully")
if __name__ == "__main__":
update()
print(
"To register the submodules, run:\n\n\tgit submodule update --init --recursive\n"
)
print(
"NOTE: the clones of recursive sobmodules, if any, won't be optimized as it's not implemented yet (by us)."
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment