Last active
May 12, 2020 08:04
-
-
Save lamchau/7d8dd7b65443803897efac7d91980513 to your computer and use it in GitHub Desktop.
creates a table for total git differences on a file or directory (with net changes)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import git | |
import logging | |
import os | |
import re | |
import sys | |
from collections import Counter | |
from prettytable import PrettyTable | |
# dependencies: | |
# - prettytable | |
# - GitPython | |
LOG_FORMAT = '%(asctime)s.%(msecs)03d %(levelname)s:\t%(message)s' | |
DATE_FORMAT = '%Y-%m-%d %H:%M:%S' | |
LOG_LEVEL = logging.DEBUG | |
logger = logging.getLogger(__name__) | |
logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT, datefmt=DATE_FORMAT) | |
class GitStat: | |
""" | |
`git rev-parse --show-toplevel` is unavailable at instantiation, so we'll | |
attempt to walk the tree to find the .git folder | |
""" | |
@staticmethod | |
def find_repo_root(path): | |
parent = os.path.dirname(os.path.abspath(path)) | |
if parent == '/': | |
return None | |
if os.path.exists(os.path.join(parent, '.git')): | |
return parent | |
return GitStat.find_repo_root(parent) | |
def __init__(self, path_to_repo): | |
# we need the git repo to be a directory | |
self.g = git.Git(GitStat.find_repo_root(path_to_repo)) | |
def create_table(self): | |
table = PrettyTable() | |
table.field_names = [ | |
'author', | |
'files modified', | |
'inserted', | |
'deleted', | |
'net' | |
] | |
for column in table.field_names: | |
table.align[column] = 'l' | |
table.sortby = 'net' | |
table.reversesort = True | |
return table | |
def generate_stats(self, filepath): | |
log = self.get_log(filepath) | |
result = {} | |
for authors, stats in self.pairwise(log): | |
for author in authors.split('+'): | |
result.setdefault(author, {}) | |
merged_stats = Counter(result.get(author)) + Counter(stats) | |
result[author] = merged_stats | |
table = self.create_table() | |
for author, stats in result.items(): | |
files_modified = stats['files'] | |
inserted = stats['inserted'] | |
deleted = stats['deleted'] | |
net = inserted - deleted | |
table.add_row([author, files_modified, inserted, deleted, net]) | |
return table | |
def get_log(self, filepath): | |
# `--shortstat` gives files changed/insertion/deleted | |
# `--pretty='%aE'` uses author email (normalized with mailmap) | |
# `--no-merges` needed otherwise we'll have inconsistent formatting | |
# author names will double up breaking `pairwise` | |
log = self.g.log('--shortstat', | |
'--pretty="%aE"', | |
'--no-merges', filepath) | |
return [self.normalize(line) for line in log.split('\n') if len(line)] | |
def as_int(self, text, suffix): | |
pattern = r"(\d+) %s" % suffix | |
match = re.search(pattern, text) | |
return int(match.group(1)) if match else 0 | |
""" | |
Normalize `git log` output. | |
- Removes the domain from email addresses | |
- Removes ascii indicators from (e.g. (+) and (-)) | |
- From: 21 files changed, 215 insertions(+), 37 deletions(-) | |
- To: 21 files changed, 215 insertions, 37 deletions | |
""" | |
def normalize(self, s): | |
pattern = r"(\@.+|\((\+|\-)\))|\"" | |
return re.sub(pattern, '', s.strip(), flags=re.IGNORECASE) | |
""" | |
Generator to iterate across a sequence pairwise, assumes that | |
'author\n<git stats>'. | |
""" | |
def pairwise(self, seq): | |
iterable = iter(seq) | |
for i in iterable: | |
value = next(iterable) | |
# if not using --no-merges, uncomment these next 2 lines | |
# while stat_regex.match(value) == None: | |
# value = next(iterable) | |
if re.match(r'^\d+\s+', value): | |
value = { | |
'files': self.as_int(value, "files? changed"), | |
'inserted': self.as_int(value, "insertions?"), | |
'deleted': self.as_int(value, "deletions?") | |
} | |
yield i, value | |
filepath = os.path.abspath(sys.argv[1]) | |
g = GitStat(filepath) | |
print(filepath) | |
print(g.generate_stats(filepath)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment