bendichter · July 7, 2022 20:55 · bendichter · Jul 1, 2022
diff --git a/git_timesheet.py b/git_timesheet.py
 # first run:
 #
 #!git log --all --numstat --pretty=format:'--%h--%ad--%aN' --no-renames > git.log 


 import datetime
 import matplotlib.pyplot as plt
 import pandas as pd
 from pprint import pprint
 import matplotlib.ticker
 import numpy as np

 author_map = {
    "alfred_username1": "Alfred",
    "alfred_username2": "Alfred",
    "bobby_username1": "Bobby",
 }

 blacklisted = [
    "!git for-each-ref --format='%(refname:short)' `git symbolic-ref HEAD`",
    "root",
    "add-bots-here",
    "dependabot[bot]",
 ]

 fpath = "path/to/git.log"

 # parse git log into pandas dataframe
 commits = pd.read_csv(fpath, sep="\u0012", header=None, names=['raw'])

 commit_marker = commits[commits['raw'].str.startswith("--",na=False)]
 commit_info = commit_marker['raw'].str.extract(r"^--(?P<sha>.*?)--(?P<date>.*?)--(?P<author>.*?)$", expand=True)
 commit_info['date'] = pd.to_datetime(commit_info['date'])

 file_stats_marker = commits[~commits.index.isin(commit_info.index)]
 file_stats = file_stats_marker['raw'].str.split("\t", expand=True)
 file_stats = file_stats.rename(columns={0: "insertions", 1: "deletions", 2: "filename"})
 file_stats['insertions'] = pd.to_numeric(file_stats['insertions'], errors='coerce')
 file_stats['deletions'] = pd.to_numeric(file_stats['deletions'], errors='coerce')

 commit_data = commit_info.reindex(commits.index).fillna(method="ffill")
 commit_data = commit_data[~commit_data.index.isin(commit_info.index)]
 commit_data = commit_data.join(file_stats)

 # get total authors and weeks
 all_authors = commit_data["author"].unique()
 all_authors = list(np.unique([author_map.get(x, x) for x in all_authors if x not in blacklisted]))

 dates = commit_data["date"]
 start = dates.min()
 stop = dates.max()

 n_weeks = (stop-start).days // 7

 timesheet = np.zeros((len(all_authors), n_weeks))

 # iterate over commits and timesheet per week
 for week_n in range(n_weeks):
    week_start = start + datetime.timedelta(7 * (week_n-1))
    week_stop = start + datetime.timedelta(7 * week_n)
    commit_data_for_week = commit_data[(week_start < commit_data["date"]) & (commit_data["date"] < week_stop)]
    authors_for_week = commit_data_for_week["author"].unique()
    # handle different usernames
    authors_for_week = list(np.unique([author_map.get(x, x) for x in authors_for_week]))
    for i, author in enumerate(all_authors):
        if author in authors_for_week:
            timesheet[i, week_n] = 1

 fig, ax = plt.subplots(figsize=(15, 10))
 ax.imshow(timesheet, cmap="Greys")
 ax.set_yticks(range(len(all_authors)))
 _ = ax.set_yticklabels(all_authors)
 ax.set_xlabel("weeks")

 plt.minorticks_on()
 plt.gca().xaxis.set_minor_locator(matplotlib.ticker.MultipleLocator(1))
 plt.gca().yaxis.set_minor_locator(matplotlib.ticker.MultipleLocator(1))
 plt.grid(which="both", linewidth=0.25,color="k")

 plt.show()

 total_weeks = {k:v for k,v in zip(all_authors,timesheet.sum(axis=1))}
 print("total weeks:")
 print("============")
 for author, total_weeks in zip(all_authors,timesheet.sum(axis=1)):
    print(f"{author}: {total_weeks}")
	# first run:
	#
	#!git log --all --numstat --pretty=format:'--%h--%ad--%aN' --no-renames > git.log


	import datetime
	import matplotlib.pyplot as plt
	import pandas as pd
	from pprint import pprint
	import matplotlib.ticker
	import numpy as np

	author_map = {
	"alfred_username1": "Alfred",
	"alfred_username2": "Alfred",
	"bobby_username1": "Bobby",
	}

	blacklisted = [
	"!git for-each-ref --format='%(refname:short)' `git symbolic-ref HEAD`",
	"root",
	"add-bots-here",
	"dependabot[bot]",
	]

	fpath = "path/to/git.log"

	# parse git log into pandas dataframe
	commits = pd.read_csv(fpath, sep="\u0012", header=None, names=['raw'])

	commit_marker = commits[commits['raw'].str.startswith("--",na=False)]
	commit_info = commit_marker['raw'].str.extract(r"^--(?P<sha>.?)--(?P<date>.?)--(?P<author>.*?)$", expand=True)
	commit_info['date'] = pd.to_datetime(commit_info['date'])

	file_stats_marker = commits[~commits.index.isin(commit_info.index)]
	file_stats = file_stats_marker['raw'].str.split("\t", expand=True)
	file_stats = file_stats.rename(columns={0: "insertions", 1: "deletions", 2: "filename"})
	file_stats['insertions'] = pd.to_numeric(file_stats['insertions'], errors='coerce')
	file_stats['deletions'] = pd.to_numeric(file_stats['deletions'], errors='coerce')

	commit_data = commit_info.reindex(commits.index).fillna(method="ffill")
	commit_data = commit_data[~commit_data.index.isin(commit_info.index)]
	commit_data = commit_data.join(file_stats)

	# get total authors and weeks
	all_authors = commit_data["author"].unique()
	all_authors = list(np.unique([author_map.get(x, x) for x in all_authors if x not in blacklisted]))

	dates = commit_data["date"]
	start = dates.min()
	stop = dates.max()

	n_weeks = (stop-start).days // 7

	timesheet = np.zeros((len(all_authors), n_weeks))

	# iterate over commits and timesheet per week
	for week_n in range(n_weeks):
	week_start = start + datetime.timedelta(7 * (week_n-1))
	week_stop = start + datetime.timedelta(7 * week_n)
	commit_data_for_week = commit_data[(week_start < commit_data["date"]) & (commit_data["date"] < week_stop)]
	authors_for_week = commit_data_for_week["author"].unique()
	# handle different usernames
	authors_for_week = list(np.unique([author_map.get(x, x) for x in authors_for_week]))
	for i, author in enumerate(all_authors):
	if author in authors_for_week:
	timesheet[i, week_n] = 1

	fig, ax = plt.subplots(figsize=(15, 10))
	ax.imshow(timesheet, cmap="Greys")
	ax.set_yticks(range(len(all_authors)))
	_ = ax.set_yticklabels(all_authors)
	ax.set_xlabel("weeks")

	plt.minorticks_on()
	plt.gca().xaxis.set_minor_locator(matplotlib.ticker.MultipleLocator(1))
	plt.gca().yaxis.set_minor_locator(matplotlib.ticker.MultipleLocator(1))
	plt.grid(which="both", linewidth=0.25,color="k")

	plt.show()

	total_weeks = {k:v for k,v in zip(all_authors,timesheet.sum(axis=1))}
	print("total weeks:")
	print("============")
	for author, total_weeks in zip(all_authors,timesheet.sum(axis=1)):
	print(f"{author}: {total_weeks}")
No results found