|
""" |
|
Most useful as: |
|
|
|
git ls-tree --name-only -r HEAD | xargs python /path/to/authordensity.py |
|
|
|
When run from the root of your git repository. If people show up under |
|
multiple names, use the synonyms dict to map their aliases to a canonical |
|
name. |
|
|
|
Needs no external libs. |
|
""" |
|
|
|
from __future__ import division, print_function |
|
|
|
import collections |
|
import re |
|
import subprocess |
|
import sys |
|
|
|
synonyms = { |
|
} |
|
|
|
match_files = re.compile(r"\.(cc|h|cpp|hpp|c|py|pxi|pyx)$") |
|
author_line_re = re.compile("^author ") |
|
count = 30 |
|
|
|
file_densities = dict() |
|
authors_found = set() |
|
total_freq = collections.defaultdict(lambda: 0) |
|
|
|
for f in sys.argv[1:]: |
|
if not match_files.search(f): |
|
continue |
|
try: |
|
blame = subprocess.check_output( |
|
["git", "blame", "--line-porcelain", f], stderr=subprocess.PIPE) |
|
except subprocess.CalledProcessError as e: |
|
if e.returncode == 128: |
|
continue |
|
raise |
|
blame_lines = blame.splitlines() |
|
author_lines = filter(lambda l: author_line_re.match(l), blame_lines) |
|
authors_freq = collections.defaultdict(lambda: 0) |
|
for line in author_lines: |
|
author = line.split(" ", 1)[1] |
|
if author in synonyms: |
|
author = synonyms[author] |
|
authors_found.add(author) |
|
authors_freq[author] += 1 |
|
total_freq[author] += 1 |
|
total_lines = len(author_lines) |
|
authors = {author: author_lines / total_lines |
|
for author, author_lines in authors_freq.iteritems()} |
|
file_densities[f] = authors |
|
|
|
for author in authors_found: |
|
author_densities = [(file_density.get(author, 0), fname) |
|
for fname, file_density in file_densities.iteritems() |
|
if file_density.get(author)] |
|
author_densities.sort(reverse=True) |
|
print("\n%s" % author) |
|
print("\n".join("%3.0f%%\t%s" % (100. * s[0], s[1]) |
|
for s in author_densities[:count])) |
|
print() |
|
|
|
total_lines = sum(total_freq.values()) |
|
records = total_freq.items() |
|
records.sort(key=lambda p: (p[1], p[0]), reverse=True) |
|
for author, freq in records: |
|
print("%s\t%.3f%%\t\t%s" % (freq, 100. * freq / total_lines, author)) |