Skip to content

Instantly share code, notes, and snippets.

@ecelis
Created January 22, 2025 17:21
Show Gist options
  • Save ecelis/9a012c322cecbf096c230e98056b7438 to your computer and use it in GitHub Desktop.
Save ecelis/9a012c322cecbf096c230e98056b7438 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
Git Time Analyzer - Enhanced Version
Based on original work by MiKatre (https://gist.github.com/MiKatre/0d6bdd4664cef1ed5d6d67ba531821b8)
Features:
- Commit size analysis
- Working hours consideration (including overnight shifts)
- Smart time allocation
- Data visualization
- Commit message analysis
- Automated commit detection
- Merge commit filtering
"""
import subprocess
import re
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from collections import defaultdict
import numpy as np
class Config:
"""Configuration parameters for time estimation algorithm."""
# Time thresholds
MAX_COMMIT_DIFF_MINUTES = 2 * 60
ISOLATED_COMMIT_MINUTES = 60
MIN_COMMIT_MINUTES = 15
MAX_COMMIT_MINUTES = 4 * 60
# Working hours (can handle overnight shifts)
WORK_START_HOUR = 13 # 1 PM
WORK_END_HOUR = 5 # 3 AM next day
OVERNIGHT_SHIFT = True # Flag to indicate if work hours cross midnight
# Commit size thresholds
SMALL_COMMIT_CHANGES = 50
MEDIUM_COMMIT_CHANGES = 200
LARGE_COMMIT_CHANGES = 500
# Commit message patterns
AUTOMATED_PATTERNS = [
r'automated',
r'auto-generated',
r'bot:',
r'dependabot',
r'^\[automated\]',
r'ci:',
r'chore\(deps\)',
r'build\(deps\)',
r'yarn upgrade',
r'npm update',
r'package-lock.json',
r'poetry.lock',
r'bump version',
]
# Merge patterns
MERGE_PATTERNS = [
r'^Merge branch',
r'^Merge pull request',
r'^Merge remote-tracking',
r'^\[maven-release-plugin\]',
r'^Automatic merge',
r'^Auto-merge',
]
# Time multipliers for different commit types
TIME_MULTIPLIERS = {
'feature': 1.0,
'fix': 0.8,
'refactor': 1.2,
'docs': 0.5,
'style': 0.3,
'test': 0.7,
'chore': 0.4,
'automated': 0.0,
'merge': 0.1,
}
class CommitAnalyzer:
"""Analyzes commit messages and metadata to determine commit type and importance."""
def __init__(self):
"""Initialize regex patterns for commit analysis."""
self.automated_patterns = [re.compile(pattern, re.IGNORECASE)
for pattern in Config.AUTOMATED_PATTERNS]
self.merge_patterns = [re.compile(pattern, re.IGNORECASE)
for pattern in Config.MERGE_PATTERNS]
def get_commit_type(self, message, commit_hash):
"""Determine the type of commit based on its message and metadata."""
# Check for automated commits
if any(pattern.search(message) for pattern in self.automated_patterns):
return 'automated', Config.TIME_MULTIPLIERS['automated']
# Check for merge commits
if any(pattern.search(message) for pattern in self.merge_patterns):
return 'merge', Config.TIME_MULTIPLIERS['merge']
# Conventional commits analysis
conventional_match = re.match(r'^(feat|fix|docs|style|refactor|test|chore)(\(.*\))?: ', message)
if conventional_match:
commit_type = conventional_match.group(1)
if commit_type == 'feat':
return 'feature', Config.TIME_MULTIPLIERS['feature']
return commit_type, Config.TIME_MULTIPLIERS.get(commit_type, 1.0)
# Analyze message content for keywords
message_lower = message.lower()
if 'fix' in message_lower or 'bug' in message_lower:
return 'fix', Config.TIME_MULTIPLIERS['fix']
if 'refactor' in message_lower:
return 'refactor', Config.TIME_MULTIPLIERS['refactor']
if 'test' in message_lower:
return 'test', Config.TIME_MULTIPLIERS['test']
if 'doc' in message_lower:
return 'docs', Config.TIME_MULTIPLIERS['docs']
# Default to feature type
return 'feature', Config.TIME_MULTIPLIERS['feature']
def get_commit_message(self, commit_hash):
"""Get the full commit message for a given hash."""
try:
return execute_git_command(f"git log --format=%B -n 1 {commit_hash}").strip()
except:
return ""
def execute_git_command(command):
"""Execute a Git command and return its output."""
output = subprocess.check_output(command, shell=True, universal_newlines=True)
return output
def get_commit_size(commit_hash):
"""Calculate the size of a commit by counting lines changed."""
try:
diff_stats = execute_git_command(f"git show --shortstat {commit_hash}")
insertions = deletions = 0
if "insertions" in diff_stats:
insertions = int(re.search(r"(\d+) insertion", diff_stats).group(1))
if "deletions" in diff_stats:
deletions = int(re.search(r"(\d+) deletion", diff_stats).group(1))
return insertions + deletions
except:
return 0
def is_working_hours(dt):
"""Check if a given datetime falls within defined working hours."""
hour = dt.hour
if Config.OVERNIGHT_SHIFT:
if Config.WORK_START_HOUR > Config.WORK_END_HOUR:
# Handle overnight shift (e.g., 13:00 to 03:00 next day)
return hour >= Config.WORK_START_HOUR or hour <= Config.WORK_END_HOUR
else:
# Handle same-day shift
return Config.WORK_START_HOUR <= hour < Config.WORK_END_HOUR
else:
# Original behavior for regular day shifts
return Config.WORK_START_HOUR <= hour < Config.WORK_END_HOUR
def estimate_commit_time(commit_size, commit_type_multiplier):
"""Estimate time spent on a commit based on its size and type."""
base_time = 0
if commit_size <= Config.SMALL_COMMIT_CHANGES:
base_time = max(Config.MIN_COMMIT_MINUTES, commit_size * 0.5)
elif commit_size <= Config.MEDIUM_COMMIT_CHANGES:
base_time = commit_size * 0.4
elif commit_size <= Config.LARGE_COMMIT_CHANGES:
base_time = commit_size * 0.3
else:
base_time = min(Config.MAX_COMMIT_MINUTES, commit_size * 0.2)
return base_time * commit_type_multiplier
def estimate_hours(dates, commit_hashes):
"""Estimate working hours based on commit dates, sizes, and types."""
if len(dates) < 2:
return 0, [], {}
analyzer = CommitAnalyzer()
hours = 0
commit_times = []
commit_stats = defaultdict(int)
for i in range(len(dates) - 1):
current_date = dates[i]
next_date = dates[i + 1]
# Handle overnight time difference calculation
diff_minutes = (next_date - current_date).total_seconds() / 60
if Config.OVERNIGHT_SHIFT:
# If commits are on consecutive days during work hours, adjust the time difference
if (is_working_hours(current_date) and is_working_hours(next_date) and
next_date.date() == current_date.date() + timedelta(days=1)):
# Calculate time until end of day plus time from start of next day
minutes_until_midnight = (24 - current_date.hour) * 60 - current_date.minute
minutes_after_midnight = next_date.hour * 60 + next_date.minute
diff_minutes = min(minutes_until_midnight + minutes_after_midnight,
Config.MAX_COMMIT_DIFF_MINUTES)
commit_message = analyzer.get_commit_message(commit_hashes[i])
commit_type, type_multiplier = analyzer.get_commit_type(commit_message, commit_hashes[i])
commit_stats[commit_type] += 1
commit_size = get_commit_size(commit_hashes[i])
base_time = estimate_commit_time(commit_size, type_multiplier)
if diff_minutes < Config.MAX_COMMIT_DIFF_MINUTES and is_working_hours(current_date):
time_to_add = min(diff_minutes, Config.MAX_COMMIT_MINUTES)
time_to_add = max(base_time, time_to_add)
else:
time_to_add = max(base_time, Config.ISOLATED_COMMIT_MINUTES)
time_to_add *= type_multiplier
hours += time_to_add / 60
commit_times.append((current_date, time_to_add / 60, commit_type))
# Handle last commit
last_message = analyzer.get_commit_message(commit_hashes[-1])
last_type, last_multiplier = analyzer.get_commit_type(last_message, commit_hashes[-1])
commit_stats[last_type] += 1
last_commit_size = get_commit_size(commit_hashes[-1])
last_commit_time = max(estimate_commit_time(last_commit_size, last_multiplier),
Config.ISOLATED_COMMIT_MINUTES) / 60
hours += last_commit_time
commit_times.append((dates[-1], last_commit_time, last_type))
return round(hours, 1), commit_times, dict(commit_stats)
def create_visualization(author_commit_times):
"""Create visualizations of work patterns and commit types."""
plt.figure(figsize=(15, 12))
# Weekly pattern
plt.subplot(3, 1, 1)
weekly_hours = defaultdict(float)
for author, commit_data in author_commit_times.items():
for date, hours, _ in commit_data:
week_num = date.isocalendar()[1]
weekly_hours[week_num] += hours
weeks = sorted(weekly_hours.keys())
hours = [weekly_hours[w] for w in weeks]
plt.bar(weeks, hours, alpha=0.7)
plt.title('Weekly Work Pattern')
plt.xlabel('Week Number')
plt.ylabel('Hours')
# Daily distribution
plt.subplot(3, 1, 2)
hours_by_hour = defaultdict(float)
for author, commit_data in author_commit_times.items():
for date, hours, _ in commit_data:
hours_by_hour[date.hour] += hours
hours_range = range(24)
hourly_dist = [hours_by_hour[h] for h in hours_range]
# Reorder hours to show work hours in the middle for overnight shifts
if Config.OVERNIGHT_SHIFT:
start_idx = Config.WORK_START_HOUR
reordered_hours = list(range(start_idx, 24)) + list(range(0, start_idx))
reordered_dist = [hourly_dist[h] for h in reordered_hours]
plt.bar(range(24), reordered_dist, alpha=0.7)
plt.xticks(range(24), [f"{h:02d}:00" for h in reordered_hours])
else:
plt.bar(hours_range, hourly_dist, alpha=0.7)
plt.xticks(hours_range, [f"{h:02d}:00" for h in hours_range])
plt.title('Daily Work Distribution')
plt.xlabel('Hour of Day')
plt.ylabel('Total Hours')
# Commit types distribution
plt.subplot(3, 1, 3)
commit_types = defaultdict(float)
for author, commit_data in author_commit_times.items():
for _, hours, commit_type in commit_data:
commit_types[commit_type] += hours
types = list(commit_types.keys())
type_hours = [commit_types[t] for t in types]
plt.bar(types, type_hours, alpha=0.7)
plt.title('Time Distribution by Commit Type')
plt.xlabel('Commit Type')
plt.ylabel('Hours')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('work_patterns.png')
plt.close()
def main():
"""Main function to run the Git time analysis."""
print("Analyzing Git repository...")
commit_logs = execute_git_command("git log --format='%H|%an|%ad' --date=iso")
commit_dates = defaultdict(list)
commit_hashes = defaultdict(list)
for line in commit_logs.strip().split('\n'):
commit_hash, author, date_str = line.split('|')
commit_date = datetime.fromisoformat(date_str.strip())
commit_dates[author].append(commit_date)
commit_hashes[author].append(commit_hash)
hours_worked = {}
author_commit_times = {}
author_commit_stats = {}
for author in commit_dates:
print(f"\nAnalyzing commits for {author}...")
hours, commit_times, commit_stats = estimate_hours(
commit_dates[author],
commit_hashes[author]
)
hours_worked[author] = hours
author_commit_times[author] = commit_times
author_commit_stats[author] = commit_stats
print("\nEstimated Hours Worked:")
print("-" * 40)
for author, hours in hours_worked.items():
print(f"\n{author}: {hours} hours")
print("Commit breakdown:")
for commit_type, count in author_commit_stats[author].items():
print(f" {commit_type}: {count} commits")
print("\nGenerating visualization...")
create_visualization(author_commit_times)
print("Visualization saved as 'work_patterns.png' in the current directory")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment