Skip to content

Instantly share code, notes, and snippets.

@haldean
Last active October 21, 2015 21:45
Show Gist options
  • Save haldean/a298dda15bff8021b228 to your computer and use it in GitHub Desktop.
Save haldean/a298dda15bff8021b228 to your computer and use it in GitHub Desktop.
"""
Most useful as:
git ls-tree --name-only -r HEAD | xargs python /path/to/authordensity.py
When run from the root of your git repository. If people show up under
multiple names, use the synonyms dict to map their aliases to a canonical
name.
Needs no external libs.
"""
from __future__ import division, print_function
import collections
import re
import subprocess
import sys
synonyms = {
}
match_files = re.compile(r"\.(cc|h|cpp|hpp|c|py|pxi|pyx)$")
author_line_re = re.compile("^author ")
count = 30
file_densities = dict()
authors_found = set()
total_freq = collections.defaultdict(lambda: 0)
for f in sys.argv[1:]:
if not match_files.search(f):
continue
try:
blame = subprocess.check_output(
["git", "blame", "--line-porcelain", f], stderr=subprocess.PIPE)
except subprocess.CalledProcessError as e:
if e.returncode == 128:
continue
raise
blame_lines = blame.splitlines()
author_lines = filter(lambda l: author_line_re.match(l), blame_lines)
authors_freq = collections.defaultdict(lambda: 0)
for line in author_lines:
author = line.split(" ", 1)[1]
if author in synonyms:
author = synonyms[author]
authors_found.add(author)
authors_freq[author] += 1
total_freq[author] += 1
total_lines = len(author_lines)
authors = {author: author_lines / total_lines
for author, author_lines in authors_freq.iteritems()}
file_densities[f] = authors
for author in authors_found:
author_densities = [(file_density.get(author, 0), fname)
for fname, file_density in file_densities.iteritems()
if file_density.get(author)]
author_densities.sort(reverse=True)
print("\n%s" % author)
print("\n".join("%3.0f%%\t%s" % (100. * s[0], s[1])
for s in author_densities[:count]))
print()
total_lines = sum(total_freq.values())
records = total_freq.items()
records.sort(key=lambda p: (p[1], p[0]), reverse=True)
for author, freq in records:
print("%s\t%.3f%%\t\t%s" % (freq, 100. * freq / total_lines, author))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment