Skip to content

Instantly share code, notes, and snippets.

@nicksteffens
Created March 17, 2026 16:00
Show Gist options
  • Select an option

  • Save nicksteffens/12c1c0574f148661dd4522ae76c11437 to your computer and use it in GitHub Desktop.

Select an option

Save nicksteffens/12c1c0574f148661dd4522ae76c11437 to your computer and use it in GitHub Desktop.
PR time-to-merge audit script - analyzes merge velocity by author and PR size
#!/usr/bin/env python3
"""
PR Time-to-Merge Audit
Analyzes merged PR data from GitHub CLI JSON output.
Usage:
gh pr list --repo ORG/REPO --state merged --limit 500 \
--json number,author,createdAt,mergedAt,additions,deletions,changedFiles,title \
> prs.json
python3 pr_merge_audit.py prs.json
"""
import json
import sys
from datetime import datetime, timedelta
from collections import defaultdict
def parse_dt(s):
return datetime.fromisoformat(s.replace("Z", "+00:00"))
def hours_to_friendly(hours):
if hours < 1:
return f"{int(hours * 60)}m"
if hours < 24:
return f"{hours:.1f}h"
days = hours / 24
return f"{days:.1f}d"
def main(path):
with open(path) as f:
prs = json.load(f)
# Filter out bots
prs = [p for p in prs if not p["author"].get("is_bot", False)]
author_stats = defaultdict(lambda: {
"prs": 0,
"merge_hours": [],
"additions": 0,
"deletions": 0,
"changed_files": 0,
})
all_hours = []
size_buckets = {"xs": [], "s": [], "m": [], "l": [], "xl": []}
for pr in prs:
created = parse_dt(pr["createdAt"])
merged = parse_dt(pr["mergedAt"])
hours = (merged - created).total_seconds() / 3600
login = pr["author"]["login"]
name = pr["author"].get("name") or login
key = f"{name} ({login})"
author_stats[key]["prs"] += 1
author_stats[key]["merge_hours"].append(hours)
author_stats[key]["additions"] += pr["additions"]
author_stats[key]["deletions"] += pr["deletions"]
author_stats[key]["changed_files"] += pr["changedFiles"]
all_hours.append(hours)
# Size bucket by lines changed
lines = pr["additions"] + pr["deletions"]
if lines <= 10:
size_buckets["xs"].append(hours)
elif lines <= 50:
size_buckets["s"].append(hours)
elif lines <= 200:
size_buckets["m"].append(hours)
elif lines <= 500:
size_buckets["l"].append(hours)
else:
size_buckets["xl"].append(hours)
all_hours.sort()
def median(lst):
if not lst:
return 0
s = sorted(lst)
n = len(s)
if n % 2 == 0:
return (s[n // 2 - 1] + s[n // 2]) / 2
return s[n // 2]
def p95(lst):
if not lst:
return 0
s = sorted(lst)
idx = int(len(s) * 0.95)
return s[min(idx, len(s) - 1)]
# Overall stats
print("## PR Time-to-Merge Audit")
print()
print(f"**Total PRs (non-bot):** {len(prs)}")
print(f"**Median time to merge:** {hours_to_friendly(median(all_hours))}")
print(f"**Average time to merge:** {hours_to_friendly(sum(all_hours) / len(all_hours))}")
print(f"**P95 time to merge:** {hours_to_friendly(p95(all_hours))}")
print(f"**Fastest:** {hours_to_friendly(min(all_hours))}")
print(f"**Slowest:** {hours_to_friendly(max(all_hours))}")
print()
# By size
print("### By PR Size (lines changed)")
print("| Size | Lines | PRs | Median | Avg | P95 |")
print("|------|-------|-----|--------|-----|-----|")
labels = {
"xs": "XS (1-10)",
"s": "S (11-50)",
"m": "M (51-200)",
"l": "L (201-500)",
"xl": "XL (500+)",
}
for bucket in ("xs", "s", "m", "l", "xl"):
h = size_buckets[bucket]
if not h:
continue
print(
f"| {labels[bucket]} | | {len(h)} "
f"| {hours_to_friendly(median(h))} "
f"| {hours_to_friendly(sum(h) / len(h))} "
f"| {hours_to_friendly(p95(h))} |"
)
print()
# By author (sorted by PR count, top 20)
sorted_authors = sorted(author_stats.items(), key=lambda x: -x[1]["prs"])
print("### By Author (top 20)")
print("| Author | PRs | Median | Avg | P95 | Avg Lines |")
print("|--------|-----|--------|-----|-----|-----------|")
for author, stats in sorted_authors[:20]:
h = stats["merge_hours"]
avg_lines = (stats["additions"] + stats["deletions"]) // stats["prs"]
print(
f"| {author} | {stats['prs']} "
f"| {hours_to_friendly(median(h))} "
f"| {hours_to_friendly(sum(h) / len(h))} "
f"| {hours_to_friendly(p95(h))} "
f"| {avg_lines} |"
)
print()
# Slowest PRs
slow = sorted(prs, key=lambda p: (parse_dt(p["mergedAt"]) - parse_dt(p["createdAt"])).total_seconds(), reverse=True)
print("### Slowest PRs")
print("| PR | Author | Time | Title |")
print("|----|--------|------|-------|")
for pr in slow[:15]:
hours = (parse_dt(pr["mergedAt"]) - parse_dt(pr["createdAt"])).total_seconds() / 3600
title = pr["title"][:60]
login = pr["author"]["login"]
print(f"| #{pr['number']} | {login} | {hours_to_friendly(hours)} | {title} |")
print()
# Fastest PRs (non-zero)
fast = sorted(prs, key=lambda p: (parse_dt(p["mergedAt"]) - parse_dt(p["createdAt"])).total_seconds())
print("### Fastest PRs")
print("| PR | Author | Time | Title |")
print("|----|--------|------|-------|")
for pr in fast[:10]:
hours = (parse_dt(pr["mergedAt"]) - parse_dt(pr["createdAt"])).total_seconds() / 3600
title = pr["title"][:60]
login = pr["author"]["login"]
print(f"| #{pr['number']} | {login} | {hours_to_friendly(hours)} | {title} |")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python3 pr_merge_audit.py <prs.json>")
sys.exit(1)
main(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment