Skip to content

Instantly share code, notes, and snippets.

@JnyJny
Created June 26, 2025 23:26
Show Gist options
  • Save JnyJny/6d615daf645c812b1e4a6a2cf8784b7e to your computer and use it in GitHub Desktop.
Save JnyJny/6d615daf645c812b1e4a6a2cf8784b7e to your computer and use it in GitHub Desktop.
Parse a git commit log and return a tuple of YYYY-MM for least activity and most activity.
import re
from collections import Counter
from datetime import datetime
import dateutil
key_fmt = "{d.year}-{d.month:02d}"
def parse_log(path: str, target_year: int = 0) -> tuple[str, str]:
pattern = re.compile(
r"Date:\s+(.*?)\s+\|\s+" # Date part
r"(\d+)\s+files?\s+changed(?:,\s+)?" # Files changed
r"(?:(\d+)\s+insertions?\(\+\))?(?:,\s+)?" # Optional insertions
r"(?:(\d+)\s+deletions?\(\-\))?" # Optional deletions
)
counter = Counter()
with open(path, "r") as fp:
for line in fp:
if match := pattern.search(line):
date = dateutil.parser.parse(match.group(1).strip())
if target_year and target_year != date.year:
continue
key = key_fmt.format(d=date)
insertions = int(match.group(3)) if match.group(3) else 0
deletions = int(match.group(4)) if match.group(4) else 0
counter[key] += insertions - deletions
return (min(counter, key=counter.get), max(counter, key=counter.get))
def parse_log2(path: str, target_year: int = 0) -> tuple[str, str]:
counter = Counter()
date_fmt = "%a %b %d %H:%M:%S %Y"
with open(path, "r") as fp:
for line in fp:
date_part, _, updates = line.partition("|")
date = datetime.strptime(" ".join(date_part.split()[1:-1]), date_fmt)
if target_year and target_year != data.year:
continue
key = key_fmt.format(d=date)
for update in updates.split(",")[1:]:
counter[key] += int(update.split()[0])
return (min(counter, key=counter.get), max(counter, key=counter.get))
def parse_log3(path: str, target_year: int = 0) -> tuple[str, str]:
"""Returns (least, most) active year/month in the commit log.
Normalize the lines by replacing '|' with ',' and removing the
"Date: " prefix to make parsing easier.
"""
counter = Counter()
for line in open(path, "r").read().replace("|", ",").splitlines():
line = line.removeprefix("Date: ")
date_part, *updates = line.split(",")
date = dateutil.parser.parse(date_part)
if target_year and target_year != date.year:
continue
counter[key_fmt.format(d=date)] += sum(
int(update.split()[0]) for update in updates[1:]
)
return (min(counter, key=counter.get), max(counter, key=counter.get))
def get_min_max_amount_of_commits(
commit_log: str, year: int | None = None
) -> tuple[str, str]:
"""
Calculate the amount of inserts / deletes per month from the
provided commit log.Add commentMore actions
Takes optional year arg, if provided only look at lines for
that year, if not, use the entire file.
Returns a tuple of (least_active_month, most_active_month)
"""
cnt = Counter()
with open(commit_log) as f:
for line in f.readlines():
date, counter = line.split("|")
_, date = date.split(":", 1)
date = dateutil.parser.parse(date.strip())
if (
year is not None and year != date.year
): # is the [optional] year provided?
continue
counter_parts = [s.strip() for s in counter.split(",")]
ym = key_fmt.format(d=date)
insert_and_deletes = counter_parts[1:]
num = sum(int(el.split()[0]) for el in insert_and_deletes)
cnt[ym] += num
highest, *_, lowest = cnt.most_common()
return (lowest[0], highest[0])
if __name__ == "__main__":
print("1", parse_log("git_log_stat.out"))
print("2", parse_log2("git_log_stat.out"))
print("3", parse_log3("git_log_stat.out"))
print("B", get_min_max_amount_of_commits("git_log_stat.out"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment