Created
June 26, 2025 23:26
-
-
Save JnyJny/6d615daf645c812b1e4a6a2cf8784b7e to your computer and use it in GitHub Desktop.
Parse a git commit log and return a tuple of YYYY-MM for least activity and most activity.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from collections import Counter | |
from datetime import datetime | |
import dateutil | |
key_fmt = "{d.year}-{d.month:02d}" | |
def parse_log(path: str, target_year: int = 0) -> tuple[str, str]: | |
pattern = re.compile( | |
r"Date:\s+(.*?)\s+\|\s+" # Date part | |
r"(\d+)\s+files?\s+changed(?:,\s+)?" # Files changed | |
r"(?:(\d+)\s+insertions?\(\+\))?(?:,\s+)?" # Optional insertions | |
r"(?:(\d+)\s+deletions?\(\-\))?" # Optional deletions | |
) | |
counter = Counter() | |
with open(path, "r") as fp: | |
for line in fp: | |
if match := pattern.search(line): | |
date = dateutil.parser.parse(match.group(1).strip()) | |
if target_year and target_year != date.year: | |
continue | |
key = key_fmt.format(d=date) | |
insertions = int(match.group(3)) if match.group(3) else 0 | |
deletions = int(match.group(4)) if match.group(4) else 0 | |
counter[key] += insertions - deletions | |
return (min(counter, key=counter.get), max(counter, key=counter.get)) | |
def parse_log2(path: str, target_year: int = 0) -> tuple[str, str]: | |
counter = Counter() | |
date_fmt = "%a %b %d %H:%M:%S %Y" | |
with open(path, "r") as fp: | |
for line in fp: | |
date_part, _, updates = line.partition("|") | |
date = datetime.strptime(" ".join(date_part.split()[1:-1]), date_fmt) | |
if target_year and target_year != data.year: | |
continue | |
key = key_fmt.format(d=date) | |
for update in updates.split(",")[1:]: | |
counter[key] += int(update.split()[0]) | |
return (min(counter, key=counter.get), max(counter, key=counter.get)) | |
def parse_log3(path: str, target_year: int = 0) -> tuple[str, str]: | |
"""Returns (least, most) active year/month in the commit log. | |
Normalize the lines by replacing '|' with ',' and removing the | |
"Date: " prefix to make parsing easier. | |
""" | |
counter = Counter() | |
for line in open(path, "r").read().replace("|", ",").splitlines(): | |
line = line.removeprefix("Date: ") | |
date_part, *updates = line.split(",") | |
date = dateutil.parser.parse(date_part) | |
if target_year and target_year != date.year: | |
continue | |
counter[key_fmt.format(d=date)] += sum( | |
int(update.split()[0]) for update in updates[1:] | |
) | |
return (min(counter, key=counter.get), max(counter, key=counter.get)) | |
def get_min_max_amount_of_commits( | |
commit_log: str, year: int | None = None | |
) -> tuple[str, str]: | |
""" | |
Calculate the amount of inserts / deletes per month from the | |
provided commit log.Add commentMore actions | |
Takes optional year arg, if provided only look at lines for | |
that year, if not, use the entire file. | |
Returns a tuple of (least_active_month, most_active_month) | |
""" | |
cnt = Counter() | |
with open(commit_log) as f: | |
for line in f.readlines(): | |
date, counter = line.split("|") | |
_, date = date.split(":", 1) | |
date = dateutil.parser.parse(date.strip()) | |
if ( | |
year is not None and year != date.year | |
): # is the [optional] year provided? | |
continue | |
counter_parts = [s.strip() for s in counter.split(",")] | |
ym = key_fmt.format(d=date) | |
insert_and_deletes = counter_parts[1:] | |
num = sum(int(el.split()[0]) for el in insert_and_deletes) | |
cnt[ym] += num | |
highest, *_, lowest = cnt.most_common() | |
return (lowest[0], highest[0]) | |
if __name__ == "__main__": | |
print("1", parse_log("git_log_stat.out")) | |
print("2", parse_log2("git_log_stat.out")) | |
print("3", parse_log3("git_log_stat.out")) | |
print("B", get_min_max_amount_of_commits("git_log_stat.out")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment