Skip to content

Instantly share code, notes, and snippets.

@potat-dev
Last active November 3, 2024 18:16
Show Gist options
  • Save potat-dev/c12827dc690eefe7342d9df1238060c7 to your computer and use it in GitHub Desktop.
Save potat-dev/c12827dc690eefe7342d9df1238060c7 to your computer and use it in GitHub Desktop.
Git Repo changes frequency analyzer for files and folders
#!/usr/bin/env python3
import subprocess
import os
from collections import Counter
import argparse
from typing import List, Tuple, Dict
import sys
import unicodedata
import logging
# Configure logging
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
logger = logging.getLogger(__name__)
class GitChangesAnalyzer:
def __init__(self, repo_path: str = "."):
"""
Initialize Git repository changes analyzer
Args:
repo_path (str): Path to Git repository (default: current directory)
"""
self.repo_path = repo_path
if not self._is_git_repo():
raise ValueError(f"'{repo_path}' is not a git repository")
def _is_git_repo(self) -> bool:
"""Checks if the specified directory is a git repository"""
try:
subprocess.run(
["git", "rev-parse", "--git-dir"],
cwd=self.repo_path,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
check=True,
)
return True
except subprocess.CalledProcessError:
return False
def _normalize_path(self, path: str) -> str:
"""
Normalizes file path for consistent handling across different platforms. Adds logging.
"""
logger.debug(f"Original path: {path}") # Log the original path
path = unicodedata.normalize("NFC", path)
path = path.replace("\\", "/")
path = path.strip("\"'")
logger.debug(f"Normalized path: {path}") # Logging the normalized path
return path
def get_file_changes(self) -> Dict[str, int]:
"""Gets a list of all changed files and their change count. Adds logging."""
try:
# Construct the Git command with the correct config option placement and '--'
command = [
"git",
"-c",
"core.quotepath=false",
"log",
"--pretty=format:",
"--name-only",
"--no-abbrev",
"--",
]
command = [item.encode("utf-8") for item in command]
# Execute Git command and capture raw bytes
result = subprocess.run(
command,
cwd=self.repo_path,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
check=True,
)
# Decode raw bytes using UTF-8
output = result.stdout.decode("utf-8", errors="replace")
logger.debug(f"Raw file list from git log (decoded): {output}")
files = [self._normalize_path(f) for f in output.split("\n") if f.strip()]
logger.debug(f"Processed file list: {files}")
return Counter(files)
except subprocess.CalledProcessError as e:
decoded_stderr = e.stderr.decode("utf-8", "replace") if e.stderr else ""
print(
f"Error getting change history: {decoded_stderr}", file=sys.stderr
) # Print detailed error
sys.exit(1)
except UnicodeDecodeError as e:
print(f"Error decoding file paths: {e}", file=sys.stderr)
sys.exit(1)
def get_directory_changes(self, file_changes: Dict[str, int]) -> Dict[str, int]:
"""
Counts changes for each directory based on file changes
Args:
file_changes (Dict[str, int]): Dictionary of file changes
Returns:
Dict[str, int]: Dictionary {directory_path: change_count}
"""
dir_changes = Counter()
for file_path, count in file_changes.items():
# Get all parent directories of the file
current_path = self._normalize_path(os.path.dirname(file_path))
while current_path:
dir_changes[current_path] += count
current_path = os.path.dirname(current_path.rstrip("/"))
# Account for root directory
if os.path.dirname(file_path):
dir_changes["."] += count
return dir_changes
def _quote_path_if_needed(self, path: str) -> str:
"""Quotes the path if it contains whitespace."""
if " " in path:
return f'"{path}"'
return path
def print_top_changes(self, top_n: int = 10):
"""
Prints top_n most frequently changed files and directories
Args:
top_n (int): Number of items to display
"""
file_changes = self.get_file_changes()
dir_changes = self.get_directory_changes(file_changes)
# Exclude empty paths
file_changes = {k: v for k, v in file_changes.items() if k.strip()}
dir_changes = {k: v for k, v in dir_changes.items() if k.strip()}
# Get top_n items
top_files = sorted(file_changes.items(), key=lambda x: x[1], reverse=True)[
:top_n
]
top_dirs = sorted(dir_changes.items(), key=lambda x: x[1], reverse=True)[:top_n]
# Get maximum path length for better formatting
max_file_path_len = max((len(path) for path, _ in top_files), default=0)
max_dir_path_len = max((len(path) for path, _ in top_dirs), default=0)
# Format and print results
print("\nMost frequently changed files:")
print("-" * (max_file_path_len + 20))
for file_path, count in top_files:
quoted_path = self._quote_path_if_needed(file_path)
print(f"{count:6d} changes: {quoted_path}")
print("\nMost frequently changed directories:")
print("-" * (max_dir_path_len + 20))
for dir_path, count in top_dirs:
quoted_path = self._quote_path_if_needed(dir_path)
print(f"{count:6d} changes: {quoted_path}")
def main():
parser = argparse.ArgumentParser(
description="Analyze most frequently changed files and directories in a Git repository"
)
parser.add_argument(
"-n",
"--number",
type=int,
default=10,
help="Number of files and directories to display (default: 10)",
)
parser.add_argument(
"-p",
"--path",
default=".",
help="Path to Git repository (default: current directory)",
)
parser.add_argument(
"-v", "--verbose", action="store_true", help="Enable verbose logging"
)
args = parser.parse_args()
if args.verbose:
logger.setLevel(logging.DEBUG)
try:
analyzer = GitChangesAnalyzer(args.path)
analyzer.print_top_changes(args.number)
except ValueError as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(f"Unexpected error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment