Created
May 31, 2021 03:50
-
-
Save nhoffman/e641df575d49e99ee21746da28362488 to your computer and use it in GitHub Desktop.
visualize author contributions to one or more git repos
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
"""Describe author contributions for one or more git repositories by date | |
Output is a csv with columns (repo, author, timestamp, churn) where | |
'churn' is the sum of lines added and lines removed. | |
Requires pandas and plotnine | |
""" | |
import sys | |
import argparse | |
import subprocess | |
from pathlib import Path | |
import re | |
import csv | |
import pandas as pd | |
from plotnine import (ggplot, geom_bar, aes, facet_wrap, theme, | |
element_text, scale_y_log10) | |
import plotnine as p9 | |
def parse_log(text, exclude=None): | |
repo, author, ts, churn = None, None, None, 0 | |
if exclude: | |
exclude = re.compile(exclude) | |
for line in text.splitlines(): | |
if line.startswith('#'): | |
if repo: | |
yield (repo, author, ts, churn) | |
(repo, author, ts), churn = line[1:].split('|'), 0 | |
elif line.strip(): | |
gain, loss, filename = line.split(None, 2) | |
if exclude and exclude.search(filename): | |
continue | |
try: | |
churn += int(gain) + int(loss) | |
except ValueError: | |
# eg, binary files report lines changes using '-' | |
churn += 1 | |
yield (repo, author, ts, churn) | |
def main(arguments): | |
parser = argparse.ArgumentParser( | |
description=__doc__, | |
formatter_class=argparse.RawDescriptionHelpFormatter) | |
parser.add_argument('repos', help="Path to one or more repositories", nargs='+') | |
parser.add_argument('-x', '--exclude', help='regular expression excluding filenames') | |
parser.add_argument('-a', '--author-map', type=argparse.FileType(), | |
help="""headerless csv file with columns | |
(label, author-email) providing instructions | |
for consolidating or (if the first column is | |
left empty) excluding authors. """) | |
parser.add_argument('-o', '--outfile', help="CSV output file") | |
parser.add_argument('-p', '--plotfile', help="plot output file") | |
args = parser.parse_args(arguments) | |
if args.author_map: | |
author_map = {v: k for k, v in csv.reader(args.author_map) if k} | |
data = [] | |
for path in args.repos: | |
pth = Path(path) | |
repo = pth.name | |
cmd = ['git', '-C', path, '--no-pager', 'log', | |
'--numstat', 'master', f'--format=format:#{repo}|%ce|%ci'] | |
job = subprocess.run(cmd, capture_output=True, text=True) | |
data += list(parse_log(job.stdout, exclude=args.exclude)) | |
df = pd.DataFrame(data, columns=['repo', 'author', 'timestamp', 'churn']) | |
if args.outfile: | |
df.to_csv(args.outfile, index=False) | |
if author_map: | |
df = df[df['author'].isin(author_map)] | |
df['author'] = df['author'].apply(lambda a: author_map[a]) | |
df['date'] = pd.to_datetime(df['timestamp']).apply(lambda ts: ts.strftime('%Y-%m')) | |
df.drop(['timestamp'], axis=1, inplace=True) | |
grouped = df.groupby(['repo', 'author', 'date']) | |
tab = grouped.sum().reset_index() | |
plt = (ggplot(tab, aes('date', 'churn', fill='author')) + | |
geom_bar(position='stack', stat='identity') + | |
facet_wrap('~repo', ncol=1) + | |
scale_y_log10() + | |
p9.theme_538() + | |
theme( | |
axis_text_x=element_text(rotation=90), | |
axis_text_y=element_text(size=0), | |
axis_title_y=element_text(size=0), | |
legend_position='top')) | |
if args.plofile: | |
plt.save(args.plotfile) | |
if __name__ == '__main__': | |
sys.exit(main(sys.argv[1:])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment