Skip to content

Instantly share code, notes, and snippets.

@mkorpela
Last active October 1, 2024 11:41
Show Gist options
  • Save mkorpela/5ca8c596181864fce3fa057e5d0653ed to your computer and use it in GitHub Desktop.
Save mkorpela/5ca8c596181864fce3fa057e5d0653ed to your computer and use it in GitHub Desktop.
Repository Activity Measure
# pip install gitpython numpy tqdm
import git
from collections import defaultdict
from datetime import timedelta
import numpy as np
import argparse
from tqdm import tqdm
def analyze_repo(repo_path, branch='main'):
repo = git.Repo(repo_path)
commits = list(repo.iter_commits(branch))
commits.sort(key=lambda x: x.committed_datetime) # Sort commits by date
hourly_changes = defaultdict(int)
commit_times = []
total_changes = 0
print(f"Analyzing {len(commits)} commits on branch '{branch}'")
changes_per_hour = []
last_100_changes = []
pbar = tqdm(commits, desc="Processing commits")
for i, commit in enumerate(pbar):
hour = commit.committed_datetime.replace(minute=0, second=0, microsecond=0)
changes = commit.stats.total['lines']
hourly_changes[hour] += changes
total_changes += changes
commit_times.append(commit.committed_datetime)
changes_per_hour.append(changes)
last_100_changes.append(changes)
if len(last_100_changes) > 100:
last_100_changes.pop(0)
median_changes = np.median(last_100_changes)
pbar.set_postfix({
'Date': f"{commit.committed_datetime.strftime('%Y-%m-%d')}",
'Median changes/100': f"{median_changes:.2f}"
}, refresh=True)
active_hours = len(hourly_changes)
avg_changes_per_hour = total_changes / active_hours if active_hours > 0 else 0
# Calculate percentiles of changes per active hour
median_changes_per_hour = np.median(list(hourly_changes.values()))
percentile_25 = np.percentile(list(hourly_changes.values()), 25)
percentile_75 = np.percentile(list(hourly_changes.values()), 75)
# Calculate percentiles of time between commits
time_between_commits = []
for i in range(1, len(commit_times)):
time_diff = (commit_times[i] - commit_times[i-1]).total_seconds() / 3600 # in hours
time_between_commits.append(time_diff)
median_time_between_commits = np.median(time_between_commits)
percentile_25_time = np.percentile(time_between_commits, 25)
percentile_75_time = np.percentile(time_between_commits, 75)
return {
'total_commits': len(commits),
'total_changes': total_changes,
'active_hours': active_hours,
'avg_changes_per_hour': avg_changes_per_hour,
'median_changes_per_hour': median_changes_per_hour,
'percentile_25_changes': percentile_25,
'percentile_75_changes': percentile_75,
'median_time_between_commits': median_time_between_commits,
'percentile_25_time': percentile_25_time,
'percentile_75_time': percentile_75_time
}
def print_summary(stats):
print("\nOverall Summary:")
print(f"Total commits: {stats['total_commits']}")
print(f"Total changes: {stats['total_changes']} lines")
print(f"Active hours: {stats['active_hours']}")
print(f"Average changes per active hour: {stats['avg_changes_per_hour']:.2f} lines")
print(f"Changes per active hour:")
print(f" 25th percentile: {stats['percentile_25_changes']:.2f} lines")
print(f" 50th percentile (median): {stats['median_changes_per_hour']:.2f} lines")
print(f" 75th percentile: {stats['percentile_75_changes']:.2f} lines")
print(f"Time between commits:")
print(f" 25th percentile: {stats['percentile_25_time']:.2f} hours")
print(f" 50th percentile (median): {stats['median_time_between_commits']:.2f} hours")
print(f" 75th percentile: {stats['percentile_75_time']:.2f} hours")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Analyze Git repository commit patterns.")
parser.add_argument("repo_path", help="Path to the Git repository")
parser.add_argument("-b", "--branch", default="main", help="Branch to analyze (default: main)")
args = parser.parse_args()
print(f"Analyzing repository: {args.repo_path}")
print(f"Branch: {args.branch}")
stats = analyze_repo(args.repo_path, args.branch)
print_summary(stats)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment