Last active
October 1, 2024 11:41
-
-
Save mkorpela/5ca8c596181864fce3fa057e5d0653ed to your computer and use it in GitHub Desktop.
Repository Activity Measure
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip install gitpython numpy tqdm | |
import git | |
from collections import defaultdict | |
from datetime import timedelta | |
import numpy as np | |
import argparse | |
from tqdm import tqdm | |
def analyze_repo(repo_path, branch='main'): | |
repo = git.Repo(repo_path) | |
commits = list(repo.iter_commits(branch)) | |
commits.sort(key=lambda x: x.committed_datetime) # Sort commits by date | |
hourly_changes = defaultdict(int) | |
commit_times = [] | |
total_changes = 0 | |
print(f"Analyzing {len(commits)} commits on branch '{branch}'") | |
changes_per_hour = [] | |
last_100_changes = [] | |
pbar = tqdm(commits, desc="Processing commits") | |
for i, commit in enumerate(pbar): | |
hour = commit.committed_datetime.replace(minute=0, second=0, microsecond=0) | |
changes = commit.stats.total['lines'] | |
hourly_changes[hour] += changes | |
total_changes += changes | |
commit_times.append(commit.committed_datetime) | |
changes_per_hour.append(changes) | |
last_100_changes.append(changes) | |
if len(last_100_changes) > 100: | |
last_100_changes.pop(0) | |
median_changes = np.median(last_100_changes) | |
pbar.set_postfix({ | |
'Date': f"{commit.committed_datetime.strftime('%Y-%m-%d')}", | |
'Median changes/100': f"{median_changes:.2f}" | |
}, refresh=True) | |
active_hours = len(hourly_changes) | |
avg_changes_per_hour = total_changes / active_hours if active_hours > 0 else 0 | |
# Calculate percentiles of changes per active hour | |
median_changes_per_hour = np.median(list(hourly_changes.values())) | |
percentile_25 = np.percentile(list(hourly_changes.values()), 25) | |
percentile_75 = np.percentile(list(hourly_changes.values()), 75) | |
# Calculate percentiles of time between commits | |
time_between_commits = [] | |
for i in range(1, len(commit_times)): | |
time_diff = (commit_times[i] - commit_times[i-1]).total_seconds() / 3600 # in hours | |
time_between_commits.append(time_diff) | |
median_time_between_commits = np.median(time_between_commits) | |
percentile_25_time = np.percentile(time_between_commits, 25) | |
percentile_75_time = np.percentile(time_between_commits, 75) | |
return { | |
'total_commits': len(commits), | |
'total_changes': total_changes, | |
'active_hours': active_hours, | |
'avg_changes_per_hour': avg_changes_per_hour, | |
'median_changes_per_hour': median_changes_per_hour, | |
'percentile_25_changes': percentile_25, | |
'percentile_75_changes': percentile_75, | |
'median_time_between_commits': median_time_between_commits, | |
'percentile_25_time': percentile_25_time, | |
'percentile_75_time': percentile_75_time | |
} | |
def print_summary(stats): | |
print("\nOverall Summary:") | |
print(f"Total commits: {stats['total_commits']}") | |
print(f"Total changes: {stats['total_changes']} lines") | |
print(f"Active hours: {stats['active_hours']}") | |
print(f"Average changes per active hour: {stats['avg_changes_per_hour']:.2f} lines") | |
print(f"Changes per active hour:") | |
print(f" 25th percentile: {stats['percentile_25_changes']:.2f} lines") | |
print(f" 50th percentile (median): {stats['median_changes_per_hour']:.2f} lines") | |
print(f" 75th percentile: {stats['percentile_75_changes']:.2f} lines") | |
print(f"Time between commits:") | |
print(f" 25th percentile: {stats['percentile_25_time']:.2f} hours") | |
print(f" 50th percentile (median): {stats['median_time_between_commits']:.2f} hours") | |
print(f" 75th percentile: {stats['percentile_75_time']:.2f} hours") | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="Analyze Git repository commit patterns.") | |
parser.add_argument("repo_path", help="Path to the Git repository") | |
parser.add_argument("-b", "--branch", default="main", help="Branch to analyze (default: main)") | |
args = parser.parse_args() | |
print(f"Analyzing repository: {args.repo_path}") | |
print(f"Branch: {args.branch}") | |
stats = analyze_repo(args.repo_path, args.branch) | |
print_summary(stats) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment