Skip to content

Instantly share code, notes, and snippets.

@highfestiva
Created December 8, 2023 14:57
Show Gist options
  • Save highfestiva/953917793281fe6afa2009da1dfd842b to your computer and use it in GitHub Desktop.
Save highfestiva/953917793281fe6afa2009da1dfd842b to your computer and use it in GitHub Desktop.
Measures quantity of git changes, both summarized and over time
#!/usr/bin/env python
import pandas as pd
import sys
goodext = ('java')
removalfactor = 2.0 # Paraphrasing Tolstoy: "nothing can improve a piece of software as much as code removal".
commitfactor = 0.1 # A commit is only as good as it's content.
def score(adds, removes, commits):
return adds + removes*removalfactor + commits*commitfactor
def printflatstats(peopledata):
print('name\tadds\tremoves\tcommits')
for person, changedata in peopledata.items():
print('%s\t%6.i\t%6.i\t%4.i' % (person, changedata['adds'], changedata['removes'], changedata['commits']))
def createdatetable(peopledata, dates):
table = []
headers = ['dates'] + list(peopledata.keys())
table += [headers]
for date in reversed(dates):
datefmt = '%s-%s-%s' % (date.date[:4], date.date[4:6], date.date[6:])
row = [datefmt]
for person in peopledata.keys():
changedata = date.peopledata.get(person)
if changedata:
row += [score(changedata['adds'], changedata['removes'], changedata['commits'])]
else:
row += [0.0]
table += [row]
return table
def transpose(table):
return zip(*table)
def combinemonths(table):
df = pd.DataFrame({c[0]:c[1:] for c in zip(*table)})
df.dates = [d[:7]+'-01' for d in df.dates]
df.dates = pd.to_datetime(df.dates)
df = df.groupby('dates').sum()
return df
def printdatestats(peopledata, dates):
table = createdatetable(peopledata, dates)
table = combinemonths(table)
persons = table.iloc[-13:, :].sum() # last year-ish
persons = persons.sort_values().index[::-1]
table = table[persons]
s = table.reset_index().to_csv(index=False, sep='\t')
print(s)
class CommitDate:
def __init__(self, date):
self.date = date
self.peopledata = {}
def addcommit(peopledata, person):
if not peopledata.get(person):
peopledata[person] = {'adds':0, 'removes':0, 'commits':1}
else:
peopledata[person]['commits'] += 1
def cat(inf):
global goodext
peopledata = {}
dates = []
import signal
signal.signal(signal.SIGINT, signal.SIG_IGN)
try:
linecnt = 0
goodfile = False
for line in inf:
line = line.strip()
if line.startswith('Author:'):
goodfile = False
person = line.partition(' ')[2].partition('<')[0].strip()
if line.startswith('Date:'):
goodfile = False
dateline = line.split()
#print(dateline)
month, day, year = dateline[2], dateline[3], dateline[5]
#print(month)
month = ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec').index(month) + 1
day = int(day)
year = int(year)
date = '%.4i%.2i%.2i' % (year, month, day)
if len(dates) <= 0 or dates[-1].date != date:
dates.append(CommitDate(date))
addcommit(peopledata, person)
addcommit(dates[-1].peopledata, person)
if line.startswith('+++ ') or line.startswith('--- '):
if line.endswith('/dev/null'):
continue
ext = line.rsplit('.', 1)[-1].lower()
goodfile = (ext in goodext)
#if not goodfile:
# print('Not counting extension', ext)
if not goodfile:
continue
elif line[:1] == '+':
peopledata[person]['adds'] += 1
dates[-1].peopledata[person]['adds'] += 1
elif line[:1] == '-':
peopledata[person]['removes'] += 1
dates[-1].peopledata[person]['removes'] += 1
else:
continue
linecnt += 1
if linecnt % 10000 == 0:
print('.', file=sys.stderr)
except KeyboardInterrupt:
pass
return peopledata, dates
if __name__=='__main__':
import codecs
sys.stdin = codecs.getreader('utf8')(sys.stdin.detach(), errors='ignore')
peopledata, dates = cat(sys.stdin)
print()
print()
printflatstats(peopledata)
print()
printdatestats(peopledata, dates)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment