Last active
November 3, 2024 18:16
-
-
Save potat-dev/c12827dc690eefe7342d9df1238060c7 to your computer and use it in GitHub Desktop.
Git Repo changes frequency analyzer for files and folders
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import subprocess | |
import os | |
from collections import Counter | |
import argparse | |
from typing import List, Tuple, Dict | |
import sys | |
import unicodedata | |
import logging | |
# Configure logging | |
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") | |
logger = logging.getLogger(__name__) | |
class GitChangesAnalyzer: | |
def __init__(self, repo_path: str = "."): | |
""" | |
Initialize Git repository changes analyzer | |
Args: | |
repo_path (str): Path to Git repository (default: current directory) | |
""" | |
self.repo_path = repo_path | |
if not self._is_git_repo(): | |
raise ValueError(f"'{repo_path}' is not a git repository") | |
def _is_git_repo(self) -> bool: | |
"""Checks if the specified directory is a git repository""" | |
try: | |
subprocess.run( | |
["git", "rev-parse", "--git-dir"], | |
cwd=self.repo_path, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE, | |
check=True, | |
) | |
return True | |
except subprocess.CalledProcessError: | |
return False | |
def _normalize_path(self, path: str) -> str: | |
""" | |
Normalizes file path for consistent handling across different platforms. Adds logging. | |
""" | |
logger.debug(f"Original path: {path}") # Log the original path | |
path = unicodedata.normalize("NFC", path) | |
path = path.replace("\\", "/") | |
path = path.strip("\"'") | |
logger.debug(f"Normalized path: {path}") # Logging the normalized path | |
return path | |
def get_file_changes(self) -> Dict[str, int]: | |
"""Gets a list of all changed files and their change count. Adds logging.""" | |
try: | |
# Construct the Git command with the correct config option placement and '--' | |
command = [ | |
"git", | |
"-c", | |
"core.quotepath=false", | |
"log", | |
"--pretty=format:", | |
"--name-only", | |
"--no-abbrev", | |
"--", | |
] | |
command = [item.encode("utf-8") for item in command] | |
# Execute Git command and capture raw bytes | |
result = subprocess.run( | |
command, | |
cwd=self.repo_path, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE, | |
check=True, | |
) | |
# Decode raw bytes using UTF-8 | |
output = result.stdout.decode("utf-8", errors="replace") | |
logger.debug(f"Raw file list from git log (decoded): {output}") | |
files = [self._normalize_path(f) for f in output.split("\n") if f.strip()] | |
logger.debug(f"Processed file list: {files}") | |
return Counter(files) | |
except subprocess.CalledProcessError as e: | |
decoded_stderr = e.stderr.decode("utf-8", "replace") if e.stderr else "" | |
print( | |
f"Error getting change history: {decoded_stderr}", file=sys.stderr | |
) # Print detailed error | |
sys.exit(1) | |
except UnicodeDecodeError as e: | |
print(f"Error decoding file paths: {e}", file=sys.stderr) | |
sys.exit(1) | |
def get_directory_changes(self, file_changes: Dict[str, int]) -> Dict[str, int]: | |
""" | |
Counts changes for each directory based on file changes | |
Args: | |
file_changes (Dict[str, int]): Dictionary of file changes | |
Returns: | |
Dict[str, int]: Dictionary {directory_path: change_count} | |
""" | |
dir_changes = Counter() | |
for file_path, count in file_changes.items(): | |
# Get all parent directories of the file | |
current_path = self._normalize_path(os.path.dirname(file_path)) | |
while current_path: | |
dir_changes[current_path] += count | |
current_path = os.path.dirname(current_path.rstrip("/")) | |
# Account for root directory | |
if os.path.dirname(file_path): | |
dir_changes["."] += count | |
return dir_changes | |
def _quote_path_if_needed(self, path: str) -> str: | |
"""Quotes the path if it contains whitespace.""" | |
if " " in path: | |
return f'"{path}"' | |
return path | |
def print_top_changes(self, top_n: int = 10): | |
""" | |
Prints top_n most frequently changed files and directories | |
Args: | |
top_n (int): Number of items to display | |
""" | |
file_changes = self.get_file_changes() | |
dir_changes = self.get_directory_changes(file_changes) | |
# Exclude empty paths | |
file_changes = {k: v for k, v in file_changes.items() if k.strip()} | |
dir_changes = {k: v for k, v in dir_changes.items() if k.strip()} | |
# Get top_n items | |
top_files = sorted(file_changes.items(), key=lambda x: x[1], reverse=True)[ | |
:top_n | |
] | |
top_dirs = sorted(dir_changes.items(), key=lambda x: x[1], reverse=True)[:top_n] | |
# Get maximum path length for better formatting | |
max_file_path_len = max((len(path) for path, _ in top_files), default=0) | |
max_dir_path_len = max((len(path) for path, _ in top_dirs), default=0) | |
# Format and print results | |
print("\nMost frequently changed files:") | |
print("-" * (max_file_path_len + 20)) | |
for file_path, count in top_files: | |
quoted_path = self._quote_path_if_needed(file_path) | |
print(f"{count:6d} changes: {quoted_path}") | |
print("\nMost frequently changed directories:") | |
print("-" * (max_dir_path_len + 20)) | |
for dir_path, count in top_dirs: | |
quoted_path = self._quote_path_if_needed(dir_path) | |
print(f"{count:6d} changes: {quoted_path}") | |
def main(): | |
parser = argparse.ArgumentParser( | |
description="Analyze most frequently changed files and directories in a Git repository" | |
) | |
parser.add_argument( | |
"-n", | |
"--number", | |
type=int, | |
default=10, | |
help="Number of files and directories to display (default: 10)", | |
) | |
parser.add_argument( | |
"-p", | |
"--path", | |
default=".", | |
help="Path to Git repository (default: current directory)", | |
) | |
parser.add_argument( | |
"-v", "--verbose", action="store_true", help="Enable verbose logging" | |
) | |
args = parser.parse_args() | |
if args.verbose: | |
logger.setLevel(logging.DEBUG) | |
try: | |
analyzer = GitChangesAnalyzer(args.path) | |
analyzer.print_top_changes(args.number) | |
except ValueError as e: | |
print(f"Error: {e}", file=sys.stderr) | |
sys.exit(1) | |
except Exception as e: | |
print(f"Unexpected error: {e}", file=sys.stderr) | |
sys.exit(1) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment