Created
January 22, 2025 17:21
-
-
Save ecelis/9a012c322cecbf096c230e98056b7438 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Git Time Analyzer - Enhanced Version | |
Based on original work by MiKatre (https://gist.github.com/MiKatre/0d6bdd4664cef1ed5d6d67ba531821b8) | |
Features: | |
- Commit size analysis | |
- Working hours consideration (including overnight shifts) | |
- Smart time allocation | |
- Data visualization | |
- Commit message analysis | |
- Automated commit detection | |
- Merge commit filtering | |
""" | |
import subprocess | |
import re | |
from datetime import datetime, timedelta | |
import matplotlib.pyplot as plt | |
from collections import defaultdict | |
import numpy as np | |
class Config: | |
"""Configuration parameters for time estimation algorithm.""" | |
# Time thresholds | |
MAX_COMMIT_DIFF_MINUTES = 2 * 60 | |
ISOLATED_COMMIT_MINUTES = 60 | |
MIN_COMMIT_MINUTES = 15 | |
MAX_COMMIT_MINUTES = 4 * 60 | |
# Working hours (can handle overnight shifts) | |
WORK_START_HOUR = 13 # 1 PM | |
WORK_END_HOUR = 5 # 3 AM next day | |
OVERNIGHT_SHIFT = True # Flag to indicate if work hours cross midnight | |
# Commit size thresholds | |
SMALL_COMMIT_CHANGES = 50 | |
MEDIUM_COMMIT_CHANGES = 200 | |
LARGE_COMMIT_CHANGES = 500 | |
# Commit message patterns | |
AUTOMATED_PATTERNS = [ | |
r'automated', | |
r'auto-generated', | |
r'bot:', | |
r'dependabot', | |
r'^\[automated\]', | |
r'ci:', | |
r'chore\(deps\)', | |
r'build\(deps\)', | |
r'yarn upgrade', | |
r'npm update', | |
r'package-lock.json', | |
r'poetry.lock', | |
r'bump version', | |
] | |
# Merge patterns | |
MERGE_PATTERNS = [ | |
r'^Merge branch', | |
r'^Merge pull request', | |
r'^Merge remote-tracking', | |
r'^\[maven-release-plugin\]', | |
r'^Automatic merge', | |
r'^Auto-merge', | |
] | |
# Time multipliers for different commit types | |
TIME_MULTIPLIERS = { | |
'feature': 1.0, | |
'fix': 0.8, | |
'refactor': 1.2, | |
'docs': 0.5, | |
'style': 0.3, | |
'test': 0.7, | |
'chore': 0.4, | |
'automated': 0.0, | |
'merge': 0.1, | |
} | |
class CommitAnalyzer: | |
"""Analyzes commit messages and metadata to determine commit type and importance.""" | |
def __init__(self): | |
"""Initialize regex patterns for commit analysis.""" | |
self.automated_patterns = [re.compile(pattern, re.IGNORECASE) | |
for pattern in Config.AUTOMATED_PATTERNS] | |
self.merge_patterns = [re.compile(pattern, re.IGNORECASE) | |
for pattern in Config.MERGE_PATTERNS] | |
def get_commit_type(self, message, commit_hash): | |
"""Determine the type of commit based on its message and metadata.""" | |
# Check for automated commits | |
if any(pattern.search(message) for pattern in self.automated_patterns): | |
return 'automated', Config.TIME_MULTIPLIERS['automated'] | |
# Check for merge commits | |
if any(pattern.search(message) for pattern in self.merge_patterns): | |
return 'merge', Config.TIME_MULTIPLIERS['merge'] | |
# Conventional commits analysis | |
conventional_match = re.match(r'^(feat|fix|docs|style|refactor|test|chore)(\(.*\))?: ', message) | |
if conventional_match: | |
commit_type = conventional_match.group(1) | |
if commit_type == 'feat': | |
return 'feature', Config.TIME_MULTIPLIERS['feature'] | |
return commit_type, Config.TIME_MULTIPLIERS.get(commit_type, 1.0) | |
# Analyze message content for keywords | |
message_lower = message.lower() | |
if 'fix' in message_lower or 'bug' in message_lower: | |
return 'fix', Config.TIME_MULTIPLIERS['fix'] | |
if 'refactor' in message_lower: | |
return 'refactor', Config.TIME_MULTIPLIERS['refactor'] | |
if 'test' in message_lower: | |
return 'test', Config.TIME_MULTIPLIERS['test'] | |
if 'doc' in message_lower: | |
return 'docs', Config.TIME_MULTIPLIERS['docs'] | |
# Default to feature type | |
return 'feature', Config.TIME_MULTIPLIERS['feature'] | |
def get_commit_message(self, commit_hash): | |
"""Get the full commit message for a given hash.""" | |
try: | |
return execute_git_command(f"git log --format=%B -n 1 {commit_hash}").strip() | |
except: | |
return "" | |
def execute_git_command(command): | |
"""Execute a Git command and return its output.""" | |
output = subprocess.check_output(command, shell=True, universal_newlines=True) | |
return output | |
def get_commit_size(commit_hash): | |
"""Calculate the size of a commit by counting lines changed.""" | |
try: | |
diff_stats = execute_git_command(f"git show --shortstat {commit_hash}") | |
insertions = deletions = 0 | |
if "insertions" in diff_stats: | |
insertions = int(re.search(r"(\d+) insertion", diff_stats).group(1)) | |
if "deletions" in diff_stats: | |
deletions = int(re.search(r"(\d+) deletion", diff_stats).group(1)) | |
return insertions + deletions | |
except: | |
return 0 | |
def is_working_hours(dt): | |
"""Check if a given datetime falls within defined working hours.""" | |
hour = dt.hour | |
if Config.OVERNIGHT_SHIFT: | |
if Config.WORK_START_HOUR > Config.WORK_END_HOUR: | |
# Handle overnight shift (e.g., 13:00 to 03:00 next day) | |
return hour >= Config.WORK_START_HOUR or hour <= Config.WORK_END_HOUR | |
else: | |
# Handle same-day shift | |
return Config.WORK_START_HOUR <= hour < Config.WORK_END_HOUR | |
else: | |
# Original behavior for regular day shifts | |
return Config.WORK_START_HOUR <= hour < Config.WORK_END_HOUR | |
def estimate_commit_time(commit_size, commit_type_multiplier): | |
"""Estimate time spent on a commit based on its size and type.""" | |
base_time = 0 | |
if commit_size <= Config.SMALL_COMMIT_CHANGES: | |
base_time = max(Config.MIN_COMMIT_MINUTES, commit_size * 0.5) | |
elif commit_size <= Config.MEDIUM_COMMIT_CHANGES: | |
base_time = commit_size * 0.4 | |
elif commit_size <= Config.LARGE_COMMIT_CHANGES: | |
base_time = commit_size * 0.3 | |
else: | |
base_time = min(Config.MAX_COMMIT_MINUTES, commit_size * 0.2) | |
return base_time * commit_type_multiplier | |
def estimate_hours(dates, commit_hashes): | |
"""Estimate working hours based on commit dates, sizes, and types.""" | |
if len(dates) < 2: | |
return 0, [], {} | |
analyzer = CommitAnalyzer() | |
hours = 0 | |
commit_times = [] | |
commit_stats = defaultdict(int) | |
for i in range(len(dates) - 1): | |
current_date = dates[i] | |
next_date = dates[i + 1] | |
# Handle overnight time difference calculation | |
diff_minutes = (next_date - current_date).total_seconds() / 60 | |
if Config.OVERNIGHT_SHIFT: | |
# If commits are on consecutive days during work hours, adjust the time difference | |
if (is_working_hours(current_date) and is_working_hours(next_date) and | |
next_date.date() == current_date.date() + timedelta(days=1)): | |
# Calculate time until end of day plus time from start of next day | |
minutes_until_midnight = (24 - current_date.hour) * 60 - current_date.minute | |
minutes_after_midnight = next_date.hour * 60 + next_date.minute | |
diff_minutes = min(minutes_until_midnight + minutes_after_midnight, | |
Config.MAX_COMMIT_DIFF_MINUTES) | |
commit_message = analyzer.get_commit_message(commit_hashes[i]) | |
commit_type, type_multiplier = analyzer.get_commit_type(commit_message, commit_hashes[i]) | |
commit_stats[commit_type] += 1 | |
commit_size = get_commit_size(commit_hashes[i]) | |
base_time = estimate_commit_time(commit_size, type_multiplier) | |
if diff_minutes < Config.MAX_COMMIT_DIFF_MINUTES and is_working_hours(current_date): | |
time_to_add = min(diff_minutes, Config.MAX_COMMIT_MINUTES) | |
time_to_add = max(base_time, time_to_add) | |
else: | |
time_to_add = max(base_time, Config.ISOLATED_COMMIT_MINUTES) | |
time_to_add *= type_multiplier | |
hours += time_to_add / 60 | |
commit_times.append((current_date, time_to_add / 60, commit_type)) | |
# Handle last commit | |
last_message = analyzer.get_commit_message(commit_hashes[-1]) | |
last_type, last_multiplier = analyzer.get_commit_type(last_message, commit_hashes[-1]) | |
commit_stats[last_type] += 1 | |
last_commit_size = get_commit_size(commit_hashes[-1]) | |
last_commit_time = max(estimate_commit_time(last_commit_size, last_multiplier), | |
Config.ISOLATED_COMMIT_MINUTES) / 60 | |
hours += last_commit_time | |
commit_times.append((dates[-1], last_commit_time, last_type)) | |
return round(hours, 1), commit_times, dict(commit_stats) | |
def create_visualization(author_commit_times): | |
"""Create visualizations of work patterns and commit types.""" | |
plt.figure(figsize=(15, 12)) | |
# Weekly pattern | |
plt.subplot(3, 1, 1) | |
weekly_hours = defaultdict(float) | |
for author, commit_data in author_commit_times.items(): | |
for date, hours, _ in commit_data: | |
week_num = date.isocalendar()[1] | |
weekly_hours[week_num] += hours | |
weeks = sorted(weekly_hours.keys()) | |
hours = [weekly_hours[w] for w in weeks] | |
plt.bar(weeks, hours, alpha=0.7) | |
plt.title('Weekly Work Pattern') | |
plt.xlabel('Week Number') | |
plt.ylabel('Hours') | |
# Daily distribution | |
plt.subplot(3, 1, 2) | |
hours_by_hour = defaultdict(float) | |
for author, commit_data in author_commit_times.items(): | |
for date, hours, _ in commit_data: | |
hours_by_hour[date.hour] += hours | |
hours_range = range(24) | |
hourly_dist = [hours_by_hour[h] for h in hours_range] | |
# Reorder hours to show work hours in the middle for overnight shifts | |
if Config.OVERNIGHT_SHIFT: | |
start_idx = Config.WORK_START_HOUR | |
reordered_hours = list(range(start_idx, 24)) + list(range(0, start_idx)) | |
reordered_dist = [hourly_dist[h] for h in reordered_hours] | |
plt.bar(range(24), reordered_dist, alpha=0.7) | |
plt.xticks(range(24), [f"{h:02d}:00" for h in reordered_hours]) | |
else: | |
plt.bar(hours_range, hourly_dist, alpha=0.7) | |
plt.xticks(hours_range, [f"{h:02d}:00" for h in hours_range]) | |
plt.title('Daily Work Distribution') | |
plt.xlabel('Hour of Day') | |
plt.ylabel('Total Hours') | |
# Commit types distribution | |
plt.subplot(3, 1, 3) | |
commit_types = defaultdict(float) | |
for author, commit_data in author_commit_times.items(): | |
for _, hours, commit_type in commit_data: | |
commit_types[commit_type] += hours | |
types = list(commit_types.keys()) | |
type_hours = [commit_types[t] for t in types] | |
plt.bar(types, type_hours, alpha=0.7) | |
plt.title('Time Distribution by Commit Type') | |
plt.xlabel('Commit Type') | |
plt.ylabel('Hours') | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
plt.savefig('work_patterns.png') | |
plt.close() | |
def main(): | |
"""Main function to run the Git time analysis.""" | |
print("Analyzing Git repository...") | |
commit_logs = execute_git_command("git log --format='%H|%an|%ad' --date=iso") | |
commit_dates = defaultdict(list) | |
commit_hashes = defaultdict(list) | |
for line in commit_logs.strip().split('\n'): | |
commit_hash, author, date_str = line.split('|') | |
commit_date = datetime.fromisoformat(date_str.strip()) | |
commit_dates[author].append(commit_date) | |
commit_hashes[author].append(commit_hash) | |
hours_worked = {} | |
author_commit_times = {} | |
author_commit_stats = {} | |
for author in commit_dates: | |
print(f"\nAnalyzing commits for {author}...") | |
hours, commit_times, commit_stats = estimate_hours( | |
commit_dates[author], | |
commit_hashes[author] | |
) | |
hours_worked[author] = hours | |
author_commit_times[author] = commit_times | |
author_commit_stats[author] = commit_stats | |
print("\nEstimated Hours Worked:") | |
print("-" * 40) | |
for author, hours in hours_worked.items(): | |
print(f"\n{author}: {hours} hours") | |
print("Commit breakdown:") | |
for commit_type, count in author_commit_stats[author].items(): | |
print(f" {commit_type}: {count} commits") | |
print("\nGenerating visualization...") | |
create_visualization(author_commit_times) | |
print("Visualization saved as 'work_patterns.png' in the current directory") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment