Skip to content

Instantly share code, notes, and snippets.

@bobisme
Created May 31, 2026 22:34
Show Gist options
  • Select an option

  • Save bobisme/f25f2d2c41de7f4ad5a8b0c8c28ec13a to your computer and use it in GitHub Desktop.

Select an option

Save bobisme/f25f2d2c41de7f4ad5a8b0c8c28ec13a to your computer and use it in GitHub Desktop.
combined_range_stats.py
#!/usr/bin/env -S uv run --script
#
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "rich",
# ]
# ///
import argparse
from datetime import date
from pathlib import Path
import re
import subprocess
import sys
from rich.console import Console
from rich.panel import Panel
from rich.table import Table
SRC_ROOT = "/home/bob/src"
# Explicit list for nested paths that wouldn't be found by the top-level scan.
EXTRA_REPOS = (
)
def discover_repos():
root = Path(SRC_ROOT)
found = sorted(
p.name for p in root.iterdir() if p.is_dir() and (p / ".git").exists()
)
return tuple(found) + EXTRA_REPOS
def humanize(n):
if n >= 1_000_000:
return f"{n / 1_000_000:.2f}M".rstrip("0").rstrip(".")
if n >= 10_000:
return f"{n / 1_000:.2f}k".rstrip("0").rstrip(".")
return f"{n:,}"
def pluralize(n, singular, plural=None):
if n == 1:
return singular
return plural or f"{singular}s"
def run_git(repo, args, stdin_text=None):
return subprocess.check_output(
["git", *args], cwd=repo, text=True, input=stdin_text
)
def get_author_email(repo):
result = subprocess.run(
["git", "config", "user.email"],
cwd=repo,
text=True,
capture_output=True,
)
return result.stdout.strip() or None
def month_range(month_str):
if not re.fullmatch(r"\d{4}-\d{2}", month_str):
raise ValueError(f"--month must be YYYY-MM, got {month_str!r}")
year, month = (int(x) for x in month_str.split("-"))
start = date(year, month, 1)
end = date(year + 1, 1, 1) if month == 12 else date(year, month + 1, 1)
return start.isoformat(), end.isoformat()
def at_midnight(date_expr):
if re.search(r"\d:\d", date_expr):
return date_expr
return f"{date_expr} 00:00:00"
def has_head(repo):
result = subprocess.run(
["git", "rev-parse", "--verify", "--quiet", "HEAD"],
cwd=repo,
capture_output=True,
)
return result.returncode == 0
def combined_revs(repo, start, end, cutoff, author):
if not has_head(repo):
return []
start = at_midnight(start)
end = at_midnight(end)
filter_args = [f"--author={author}"] if author else []
revs = run_git(
repo,
["rev-list", *filter_args, f"--since-as-filter={start}", f"--before={end}", "HEAD"],
).splitlines()
if cutoff:
cutoff = at_midnight(cutoff)
old = run_git(
repo, ["rev-list", *filter_args, f"--before={cutoff}", "HEAD"]
).splitlines()
revs = sorted(set(revs) | set(old))
return revs
def numstat_for_revs(repo, revs):
insertions = 0
deletions = 0
files = 0
if not revs:
return 0, 0, 0
output = run_git(
repo,
["show", "--pretty=tformat:", "--numstat", "--stdin"],
stdin_text="\n".join(revs) + "\n",
)
for line in output.splitlines():
if not line.strip():
continue
parts = line.split("\t")
if len(parts) < 3:
continue
add, delete = parts[0], parts[1]
if add.isdigit():
insertions += int(add)
if delete.isdigit():
deletions += int(delete)
files += 1
return files, insertions, deletions
def compute(repo, start, end, cutoff, author):
revs = combined_revs(repo, start, end, cutoff, author)
files, insertions, deletions = numstat_for_revs(repo, revs)
return {
"repo": repo,
"author": author,
"commits": len(revs),
"files": files,
"insertions": insertions,
"deletions": deletions,
"changed_lines": insertions + deletions,
}
def compute_totals(results):
active = sum(1 for row in results if row["files"] > 0)
return {
"repo": "TOTAL",
"active_projects": active,
"commits": sum(row["commits"] for row in results),
"files": sum(row["files"] for row in results),
"insertions": sum(row["insertions"] for row in results),
"deletions": sum(row["deletions"] for row in results),
"changed_lines": sum(row["changed_lines"] for row in results),
}
def display_repo(repo):
path = Path(repo)
try:
return str(path.relative_to(Path.cwd()))
except ValueError:
return repo
def parse_args():
parser = argparse.ArgumentParser(
description=(
"Compute stats for a date range, optionally including commits before a cutoff date. "
"Uses --since-as-filter to avoid traversal pruning."
)
)
today = date.today()
default_month = f"{today.year:04d}-{today.month:02d}"
parser.add_argument("repos", nargs="*", help="Repo paths (default: built-in list)")
parser.add_argument(
"--month",
default=default_month,
help="Month in YYYY-MM format (default: current month)",
)
parser.add_argument("--start", default=None, help="Range start date (overrides --month)")
parser.add_argument("--end", default=None, help="Range end date (overrides --month)")
parser.add_argument(
"--cutoff",
default=None,
help="Also count commits before this date",
)
parser.add_argument(
"--author",
default=None,
help="Filter by author email (default: per-repo git config user.email)",
)
parser.add_argument(
"--all-authors",
action="store_true",
help="Disable author filtering",
)
parser.add_argument("--csv", action="store_true", help="Print CSV output")
parser.add_argument(
"--show-all",
action="store_true",
help="Show all detected repos, including those with 0 commits",
)
args = parser.parse_args()
month_start, month_end = month_range(args.month)
if args.start is None:
args.start = month_start
if args.end is None:
args.end = month_end
return args
def print_rich_output(args, results):
console = Console()
visible = results if args.show_all else [r for r in results if r["commits"] > 0]
range_text = f"{args.start} to {args.end}"
if args.cutoff:
range_text += f"; cutoff before {args.cutoff}"
if args.all_authors:
range_text += "; all authors"
elif args.author:
range_text += f"; author={args.author}"
else:
range_text += "; author=per-repo user.email"
table = Table(
title="Repository Stats",
caption=f"Range: {range_text}",
show_lines=False,
)
table.add_column("Project", style="cyan", no_wrap=True)
table.add_column("Commits", justify="right")
table.add_column("Files", justify="right")
table.add_column("Insertions", justify="right", style="green")
table.add_column("Deletions", justify="right", style="red")
table.add_column("Changed lines", justify="right", style="bold")
for row in visible:
row_style = None if row["files"] > 0 else "dim"
table.add_row(
display_repo(row["repo"]),
humanize(row["commits"]),
humanize(row["files"]),
f"+{humanize(row['insertions'])}",
f"-{humanize(row['deletions'])}",
humanize(row["changed_lines"]),
style=row_style,
)
console.print(table)
if results:
totals = compute_totals(results)
project_word = pluralize(totals["active_projects"], "project")
commit_word = pluralize(totals["commits"], "commit")
file_word = pluralize(totals["files"], "file")
summary = (
f"[bold]{totals['active_projects']}[/bold] {project_word}. "
f"[bold]{humanize(totals['commits'])}[/bold] {commit_word}. "
f"[bold]{humanize(totals['changed_lines'])}[/bold] changed lines.\n"
f"[green]+{humanize(totals['insertions'])}[/green] / "
f"[red]-{humanize(totals['deletions'])}[/red] "
f"in [bold]{humanize(totals['files'])}[/bold] {file_word}"
)
console.print(Panel(summary, title="Total", expand=False))
def main():
args = parse_args()
short_repos = args.repos or discover_repos()
repos = ["/".join((SRC_ROOT, repo)) for repo in short_repos]
results = []
had_error = False
for repo in repos:
try:
if args.all_authors:
author = None
else:
author = args.author or get_author_email(repo)
if author is None:
print(
f"WARN|{repo}|no user.email configured; including all authors",
file=sys.stderr,
)
results.append(compute(repo, args.start, args.end, args.cutoff, author))
except Exception as exc:
had_error = True
print(f"ERROR|{repo}|{exc}", file=sys.stderr)
if args.csv:
header = f"# range: {args.start} to {args.end}"
if args.cutoff:
header += f"; cutoff before {args.cutoff}"
print(header)
print("repo,commits,files,insertions,deletions,changed_lines")
visible = results if args.show_all else [r for r in results if r["commits"] > 0]
for row in visible:
print(
f"{row['repo']},{row['commits']},{row['files']},"
f"{row['insertions']},{row['deletions']},{row['changed_lines']}"
)
if results:
totals = compute_totals(results)
project_word = pluralize(totals["active_projects"], "project")
print(
f"{totals['repo']} ({totals['active_projects']} "
f"{project_word} with changes),"
f"{totals['commits']},{totals['files']},"
f"{totals['insertions']},{totals['deletions']},{totals['changed_lines']}"
)
else:
print_rich_output(args, results)
if had_error:
sys.exit(1)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment