Last active
July 7, 2022 20:55
-
-
Save bendichter/96953f94dc1cdde79f381013d17ddd25 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# first run: | |
# | |
#!git log --all --numstat --pretty=format:'--%h--%ad--%aN' --no-renames > git.log | |
import datetime | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
from pprint import pprint | |
import matplotlib.ticker | |
import numpy as np | |
author_map = { | |
"alfred_username1": "Alfred", | |
"alfred_username2": "Alfred", | |
"bobby_username1": "Bobby", | |
} | |
blacklisted = [ | |
"!git for-each-ref --format='%(refname:short)' `git symbolic-ref HEAD`", | |
"root", | |
"add-bots-here", | |
"dependabot[bot]", | |
] | |
fpath = "path/to/git.log" | |
# parse git log into pandas dataframe | |
commits = pd.read_csv(fpath, sep="\u0012", header=None, names=['raw']) | |
commit_marker = commits[commits['raw'].str.startswith("--",na=False)] | |
commit_info = commit_marker['raw'].str.extract(r"^--(?P<sha>.*?)--(?P<date>.*?)--(?P<author>.*?)$", expand=True) | |
commit_info['date'] = pd.to_datetime(commit_info['date']) | |
file_stats_marker = commits[~commits.index.isin(commit_info.index)] | |
file_stats = file_stats_marker['raw'].str.split("\t", expand=True) | |
file_stats = file_stats.rename(columns={0: "insertions", 1: "deletions", 2: "filename"}) | |
file_stats['insertions'] = pd.to_numeric(file_stats['insertions'], errors='coerce') | |
file_stats['deletions'] = pd.to_numeric(file_stats['deletions'], errors='coerce') | |
commit_data = commit_info.reindex(commits.index).fillna(method="ffill") | |
commit_data = commit_data[~commit_data.index.isin(commit_info.index)] | |
commit_data = commit_data.join(file_stats) | |
# get total authors and weeks | |
all_authors = commit_data["author"].unique() | |
all_authors = list(np.unique([author_map.get(x, x) for x in all_authors if x not in blacklisted])) | |
dates = commit_data["date"] | |
start = dates.min() | |
stop = dates.max() | |
n_weeks = (stop-start).days // 7 | |
timesheet = np.zeros((len(all_authors), n_weeks)) | |
# iterate over commits and timesheet per week | |
for week_n in range(n_weeks): | |
week_start = start + datetime.timedelta(7 * (week_n-1)) | |
week_stop = start + datetime.timedelta(7 * week_n) | |
commit_data_for_week = commit_data[(week_start < commit_data["date"]) & (commit_data["date"] < week_stop)] | |
authors_for_week = commit_data_for_week["author"].unique() | |
# handle different usernames | |
authors_for_week = list(np.unique([author_map.get(x, x) for x in authors_for_week])) | |
for i, author in enumerate(all_authors): | |
if author in authors_for_week: | |
timesheet[i, week_n] = 1 | |
fig, ax = plt.subplots(figsize=(15, 10)) | |
ax.imshow(timesheet, cmap="Greys") | |
ax.set_yticks(range(len(all_authors))) | |
_ = ax.set_yticklabels(all_authors) | |
ax.set_xlabel("weeks") | |
plt.minorticks_on() | |
plt.gca().xaxis.set_minor_locator(matplotlib.ticker.MultipleLocator(1)) | |
plt.gca().yaxis.set_minor_locator(matplotlib.ticker.MultipleLocator(1)) | |
plt.grid(which="both", linewidth=0.25,color="k") | |
plt.show() | |
total_weeks = {k:v for k,v in zip(all_authors,timesheet.sum(axis=1))} | |
print("total weeks:") | |
print("============") | |
for author, total_weeks in zip(all_authors,timesheet.sum(axis=1)): | |
print(f"{author}: {total_weeks}") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
total weeks:
Alejandro Sánchez Yalí: 32.0
Amelia Ardath: 2.0
Angel Rey: 1.0
Anthony Anabila Abeo: 9.0
Ben Dichter: 19.0
Cesar Uribe: 1.0
Cristian: 7.0
César Alfredo Uribe León: 8.0
Daniel Lopez: 8.0
Joyce Obi: 5.0
Juan David Arias: 35.0
Karim Marzouq: 1.0
Kevin c: 7.0
Max: 6.0
Nick Sweeting: 5.0
Rey Messon: 3.0
Sergey Mankovsky: 3.0
Thomas Lisankie: 1.0
ana: 1.0
apkallum: 1.0
dnl-molina: 1.0
jdcaballerov: 17.0