Skip to content

Instantly share code, notes, and snippets.

@lamchau
Last active May 12, 2020 08:04
Show Gist options
  • Save lamchau/7d8dd7b65443803897efac7d91980513 to your computer and use it in GitHub Desktop.
Save lamchau/7d8dd7b65443803897efac7d91980513 to your computer and use it in GitHub Desktop.
creates a table for total git differences on a file or directory (with net changes)
#!/usr/bin/env python
import git
import logging
import os
import re
import sys
from collections import Counter
from prettytable import PrettyTable
# dependencies:
# - prettytable
# - GitPython
LOG_FORMAT = '%(asctime)s.%(msecs)03d %(levelname)s:\t%(message)s'
DATE_FORMAT = '%Y-%m-%d %H:%M:%S'
LOG_LEVEL = logging.DEBUG
logger = logging.getLogger(__name__)
logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT, datefmt=DATE_FORMAT)
class GitStat:
"""
`git rev-parse --show-toplevel` is unavailable at instantiation, so we'll
attempt to walk the tree to find the .git folder
"""
@staticmethod
def find_repo_root(path):
parent = os.path.dirname(os.path.abspath(path))
if parent == '/':
return None
if os.path.exists(os.path.join(parent, '.git')):
return parent
return GitStat.find_repo_root(parent)
def __init__(self, path_to_repo):
# we need the git repo to be a directory
self.g = git.Git(GitStat.find_repo_root(path_to_repo))
def create_table(self):
table = PrettyTable()
table.field_names = [
'author',
'files modified',
'inserted',
'deleted',
'net'
]
for column in table.field_names:
table.align[column] = 'l'
table.sortby = 'net'
table.reversesort = True
return table
def generate_stats(self, filepath):
log = self.get_log(filepath)
result = {}
for authors, stats in self.pairwise(log):
for author in authors.split('+'):
result.setdefault(author, {})
merged_stats = Counter(result.get(author)) + Counter(stats)
result[author] = merged_stats
table = self.create_table()
for author, stats in result.items():
files_modified = stats['files']
inserted = stats['inserted']
deleted = stats['deleted']
net = inserted - deleted
table.add_row([author, files_modified, inserted, deleted, net])
return table
def get_log(self, filepath):
# `--shortstat` gives files changed/insertion/deleted
# `--pretty='%aE'` uses author email (normalized with mailmap)
# `--no-merges` needed otherwise we'll have inconsistent formatting
# author names will double up breaking `pairwise`
log = self.g.log('--shortstat',
'--pretty="%aE"',
'--no-merges', filepath)
return [self.normalize(line) for line in log.split('\n') if len(line)]
def as_int(self, text, suffix):
pattern = r"(\d+) %s" % suffix
match = re.search(pattern, text)
return int(match.group(1)) if match else 0
"""
Normalize `git log` output.
- Removes the domain from email addresses
- Removes ascii indicators from (e.g. (+) and (-))
- From: 21 files changed, 215 insertions(+), 37 deletions(-)
- To: 21 files changed, 215 insertions, 37 deletions
"""
def normalize(self, s):
pattern = r"(\@.+|\((\+|\-)\))|\""
return re.sub(pattern, '', s.strip(), flags=re.IGNORECASE)
"""
Generator to iterate across a sequence pairwise, assumes that
'author\n<git stats>'.
"""
def pairwise(self, seq):
iterable = iter(seq)
for i in iterable:
value = next(iterable)
# if not using --no-merges, uncomment these next 2 lines
# while stat_regex.match(value) == None:
# value = next(iterable)
if re.match(r'^\d+\s+', value):
value = {
'files': self.as_int(value, "files? changed"),
'inserted': self.as_int(value, "insertions?"),
'deleted': self.as_int(value, "deletions?")
}
yield i, value
filepath = os.path.abspath(sys.argv[1])
g = GitStat(filepath)
print(filepath)
print(g.generate_stats(filepath))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment