Skip to content

Instantly share code, notes, and snippets.

@joeyparrish
Created February 25, 2026 04:31
Show Gist options
  • Select an option

  • Save joeyparrish/48c320b8b7d98d53ccd8cb8d102b2a61 to your computer and use it in GitHub Desktop.

Select an option

Save joeyparrish/48c320b8b7d98d53ccd8cb8d102b2a61 to your computer and use it in GitHub Desktop.
Git commit stats with agentic co-authors
#!/usr/bin/env python3
"""
Analyze git log and count absolute line deltas by author.
Attribution rules (per commit):
- If Co-authored-by tags are present, each unique co-author gets the commit's
full delta counted toward their total (lines may be counted more than once
for commits with multiple co-authors).
- If no Co-authored-by tags, the commit is attributed solely to the git author.
Co-author names are extracted from "Co-authored-by: Name <email>" lines.
Pre-configured aliases let you normalize names (e.g. several Claude variants ->
one label). Any co-author not matching an alias is shown by their raw name.
Usage:
./git_stats.py [git-log args...]
Examples:
./git_stats.py
./git_stats.py --since="30 days ago"
./git_stats.py -- src/
This tool was authored entirely by Claude Code under direction from Joey Parrish.
"""
import subprocess
import sys
import re
from collections import defaultdict
from dataclasses import dataclass
# --- Configuration -----------------------------------------------------------
# Map a regex (matched case-insensitively against the co-author name) to a
# canonical display label. First match wins. Names not matching any rule are
# displayed verbatim.
AUTHOR_ALIASES: list[tuple[str, str]] = [
(r"claude", "Claude"),
(r"dependabot", "Dependabot"),
]
# -----------------------------------------------------------------------------
@dataclass
class Stats:
commits: int = 0
added: int = 0
removed: int = 0
@property
def delta(self):
return self.added + self.removed
def accum(self, added, removed):
self.added += added
self.removed += removed
def compile_aliases(aliases):
return [(re.compile(pat, re.IGNORECASE), label) for pat, label in aliases]
def canonical_name(raw_name: str, compiled_aliases) -> str:
for pattern, label in compiled_aliases:
if pattern.search(raw_name):
return label
return raw_name
def parse_coauthors(body: str) -> list[str]:
"""Return a list of co-author names (email stripped) from the commit body."""
names = []
for line in body.splitlines():
m = re.match(
r"co-authored-by:\s*(.+?)\s*(?:<[^>]*>)?\s*$",
line.strip(),
re.IGNORECASE,
)
if m:
names.append(m.group(1).strip())
return names
def get_commits(extra_args: list[str]):
"""Yield (hash, author_name, body) for each commit."""
# %aN = author name respecting .mailmap
cmd = ["git", "log", "--format=%H%x00%aN%x00%B%x00%x00"] + extra_args
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
print(result.stderr.strip(), file=sys.stderr)
sys.exit(1)
# Records are separated by double-NUL
for record in result.stdout.split("\x00\x00"):
parts = record.strip("\x00").split("\x00", 2)
if len(parts) < 3 or not parts[0].strip():
continue
yield parts[0].strip(), parts[1].strip(), parts[2]
def get_numstat(commit_hash: str) -> tuple[int, int]:
result = subprocess.run(
["git", "diff-tree", "--no-commit-id", "-r", "--numstat", commit_hash],
capture_output=True, text=True,
)
added = removed = 0
for line in result.stdout.splitlines():
parts = line.split("\t")
if len(parts) < 2 or parts[0] == "-" or parts[1] == "-":
continue
added += int(parts[0])
removed += int(parts[1])
return added, removed
def print_report(stats: dict[str, Stats]):
if not stats:
print("No commits found.")
return
total_commits = sum(s.commits for s in stats.values())
total_delta = sum(s.delta for s in stats.values())
col_w = max(len(k) for k in stats) + 2
header = f"{'Author':<{col_w}} {'Commits':>8} {'Added':>10} {'Removed':>10} {'Delta':>10} {'Share':>7}"
divider = "-" * len(header)
print("===== Git Line Change Stats =====\n")
print(header)
print(divider)
for label, s in sorted(stats.items(), key=lambda x: -x[1].delta):
pct = f"{s.delta * 100 / total_delta:.1f}%" if total_delta else "n/a"
print(f"{label:<{col_w}} {s.commits:>8} {s.added:>10} {s.removed:>10} {s.delta:>10} {pct:>7}")
print(divider)
print(
f"{'Total':<{col_w}} {total_commits:>8}"
f" {sum(s.added for s in stats.values()):>10}"
f" {sum(s.removed for s in stats.values()):>10}"
f" {total_delta:>10}"
)
def main():
aliases = compile_aliases(AUTHOR_ALIASES)
stats: dict[str, Stats] = defaultdict(Stats)
multi_author_commits = 0
for commit_hash, git_author, body in get_commits(sys.argv[1:]):
added, removed = get_numstat(commit_hash)
coauthor_names = parse_coauthors(body)
if coauthor_names:
unique_labels = list(dict.fromkeys(
canonical_name(n, aliases) for n in coauthor_names
))
if len(unique_labels) > 1:
multi_author_commits += 1
for label in unique_labels:
stats[label].commits += 1
stats[label].accum(added, removed)
else:
stats[git_author].commits += 1
stats[git_author].accum(added, removed)
print_report(dict(stats))
if multi_author_commits:
print(
f"\nNote: {multi_author_commits} commit(s) had multiple co-authors; "
"their line deltas are counted once per co-author."
)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment