Created
May 6, 2025 22:27
-
-
Save tommylees112/e2965114d75cc537daf70932c5172c16 to your computer and use it in GitHub Desktop.
used utils.py run with `uv run --with rich --with pandas --with click used_utils.py utils`
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ast | |
import json | |
from pathlib import Path | |
from typing import Dict, List, Set | |
import click # Added click | |
import pandas as pd # Added pandas | |
from rich.console import Console | |
from rich.table import Table | |
# Configuration | |
# UTILS_DIR = Path("utils") # Will be replaced by click argument | |
# REPO_ROOT = Path(".").resolve() # Will be replaced by click argument | |
# Directories to exclude from the search for function *usage* | |
EXCLUDE_DIRS_SEARCH: Set[str] = {".venv", "__pycache__"} | |
# Specific files to exclude from the search for function *usage* | |
EXCLUDE_FILES_SEARCH: Set[str] = {"used_utils.py"} | |
def get_functions_from_py_file(file_path: Path) -> List[str]: | |
""" | |
Parses a Python file and returns a list of function names defined in it. | |
""" | |
functions: List[str] = [] | |
try: | |
content = file_path.read_text(encoding="utf-8") | |
tree = ast.parse(content) | |
for node in ast.walk(tree): | |
if isinstance(node, ast.FunctionDef): | |
functions.append(node.name) | |
except FileNotFoundError: | |
print(f"[red]Error: File not found: {file_path}[/red]") | |
except SyntaxError as e: | |
print( | |
f"[yellow]Warning: Could not parse {file_path} due to SyntaxError: {e}[/yellow]" | |
) | |
except Exception as e: | |
print(f"[yellow]Warning: Could not process {file_path}: {e}[/yellow]") | |
return functions | |
def find_files_to_search( | |
root_dir: Path, exclude_dirs: Set[str], exclude_files: Set[str] | |
) -> List[Path]: | |
""" | |
Finds all .py and .ipynb files in the root_dir, excluding specified directories and files. | |
""" | |
searchable_files: List[Path] = [] | |
for ext_pattern in ["*.py", "*.ipynb"]: | |
for file_path in root_dir.rglob(ext_pattern): | |
if not file_path.is_file(): | |
continue | |
try: | |
# Use relative path parts for exclusion check | |
relative_path_parts = file_path.relative_to(root_dir).parts | |
except ValueError: # Should not happen if rglob starts from root_dir | |
relative_path_parts = file_path.parts | |
# Check if any part of the path is in exclude_dirs | |
if any(part in exclude_dirs for part in relative_path_parts): | |
continue | |
if file_path.name in exclude_files: | |
continue | |
searchable_files.append(file_path) | |
return searchable_files | |
def search_function_in_file(file_path: Path, func_name: str) -> bool: | |
""" | |
Searches for a function name within the content of a given file. | |
For .ipynb, it checks code cells. | |
""" | |
try: | |
content = file_path.read_text(encoding="utf-8") | |
if file_path.suffix == ".py": | |
return func_name in content | |
elif file_path.suffix == ".ipynb": | |
notebook = json.loads(content) | |
for cell in notebook.get("cells", []): | |
if cell.get("cell_type") == "code": | |
source = cell.get("source", []) | |
cell_content = ( | |
"".join(source) if isinstance(source, list) else str(source) | |
) | |
if func_name in cell_content: | |
return True | |
return False | |
except FileNotFoundError: | |
# This should ideally not be hit if find_files_to_search works correctly | |
print(f"[red]Error: File not found during search: {file_path}[/red]") | |
return False | |
except json.JSONDecodeError: | |
print(f"[yellow]Warning: Could not parse JSON from {file_path}[/yellow]") | |
return False | |
except Exception as e: | |
print( | |
f"[yellow]Warning: Could not read or process {file_path} for search: {e}[/yellow]" | |
) | |
return False | |
return False | |
@click.command() | |
@click.argument( | |
"utils_path_arg", | |
type=click.Path( | |
exists=True, file_okay=False, dir_okay=True, readable=True, resolve_path=True | |
), | |
) | |
@click.option( | |
"-r", | |
"--repo-root", | |
"repo_root_path_option", | |
default=".", | |
show_default=True, | |
type=click.Path( | |
exists=True, file_okay=False, dir_okay=True, readable=True, resolve_path=True | |
), | |
help="Path to the repository root directory.", | |
) | |
def main(utils_path_arg: str, repo_root_path_option: str): | |
UTILS_DIR = Path(utils_path_arg) | |
REPO_ROOT = Path(repo_root_path_option) | |
console = Console() | |
console.print("[bold blue]Starting utility function usage analysis...[/bold blue]") | |
console.print(f"Scanning for functions in: [green]{UTILS_DIR.resolve()}[/green]") | |
console.print(f"Searching for usage in: [green]{REPO_ROOT.resolve()}[/green]") | |
console.print( | |
f"Excluding directories from search: [cyan]{', '.join(EXCLUDE_DIRS_SEARCH)}[/cyan]" | |
) | |
console.print( | |
f"Excluding files from search: [cyan]{', '.join(EXCLUDE_FILES_SEARCH)}[/cyan]" | |
) | |
utils_py_files = [ | |
p | |
for p in UTILS_DIR.rglob("*.py") | |
if p.is_file() | |
and p.name | |
!= "__init__.py" # Typically __init__.py doesn't define many funcs directly | |
] | |
# Optionally, include __init__.py if it might contain direct function definitions: | |
# utils_py_files = [p for p in UTILS_DIR.rglob("*.py") if p.is_file()] | |
all_utils_functions_list = [] # To build DataFrame | |
# Temporary dict to check for duplicates before adding to list | |
_seen_functions_for_df: Dict[str, str] = {} | |
for py_file in utils_py_files: | |
functions = get_functions_from_py_file(py_file) | |
for func in functions: | |
relative_file_path = str(py_file.relative_to(REPO_ROOT)) | |
if func in _seen_functions_for_df: | |
console.print( | |
f"[yellow]Warning: Duplicate function definition for [bold]'{func}'[/bold]. " | |
f"Found in [magenta]{relative_file_path}[/magenta] and " | |
f"[magenta]{_seen_functions_for_df[func]}[/magenta]. Keeping the first one found for analysis." | |
) | |
else: | |
_seen_functions_for_df[func] = relative_file_path | |
all_utils_functions_list.append( | |
{ | |
"function_name": func, | |
"defined_in_file": relative_file_path, | |
"is_used": False, | |
} | |
) | |
if not all_utils_functions_list: | |
console.print( | |
f"[red]No functions found in Python files within '{UTILS_DIR}'. Exiting.[/red]" | |
) | |
return | |
df_all_functions = pd.DataFrame(all_utils_functions_list) | |
console.print( | |
f"\nFound [bold]{len(df_all_functions)}[/bold] unique function(s) in [green]{UTILS_DIR}[/green] directory (excluding __init__.py by default)." | |
) | |
files_to_search = find_files_to_search( | |
REPO_ROOT, EXCLUDE_DIRS_SEARCH, EXCLUDE_FILES_SEARCH | |
) | |
console.print(f"Searching for usage in [bold]{len(files_to_search)}[/bold] files.") | |
if not files_to_search: | |
console.print( | |
"[yellow]No files found to search for usage (after exclusions). All utility functions will be marked as unused.[/yellow]" | |
) | |
else: | |
with console.status("[bold green]Analyzing function usage...") as status: | |
for i, row in df_all_functions.iterrows(): | |
func_name = row["function_name"] | |
status.update( | |
f"[bold green]Analyzing function usage... ({i + 1}/{len(df_all_functions)}) [cyan]{func_name}[/cyan][/bold green]" | |
) | |
for search_file_path in files_to_search: | |
if search_function_in_file(search_file_path, func_name): | |
df_all_functions.loc[i, "is_used"] = True | |
break # Found, no need to search this func_name in other files | |
# --- Create used and unused DataFrames --- | |
df_used = df_all_functions[df_all_functions["is_used"] == True] | |
df_unused = df_all_functions[df_all_functions["is_used"] == False] | |
# --- Grouping for display using pandas --- | |
if not df_used.empty: | |
used_display_df = ( | |
df_used.groupby("defined_in_file")["function_name"] | |
.apply(lambda x: ", ".join(sorted(list(x)))) | |
.reset_index() | |
) | |
else: | |
used_display_df = pd.DataFrame(columns=["defined_in_file", "function_name"]) | |
if not df_unused.empty: | |
unused_display_df = ( | |
df_unused.groupby("defined_in_file")["function_name"] | |
.apply(lambda x: ", ".join(sorted(list(x)))) | |
.reset_index() | |
) | |
else: | |
unused_display_df = pd.DataFrame(columns=["defined_in_file", "function_name"]) | |
# --- End of pandas grouping --- | |
# Prepare tables | |
used_table = Table( | |
title="[bold green]Used Utility Functions[/bold green]", show_lines=True | |
) | |
used_table.add_column("Defined in File (relative to repo root)", style="magenta") | |
used_table.add_column("Function Names", style="cyan") | |
for _idx, row in used_display_df.iterrows(): | |
used_table.add_row(row["defined_in_file"], row["function_name"], style="green") | |
unused_table = Table( | |
title="[bold red]Potentially Unused Utility Functions[/bold red]", # Title color changed | |
show_lines=True, | |
) | |
unused_table.add_column("Defined in File (relative to repo root)", style="magenta") | |
unused_table.add_column("Function Names", style="cyan") | |
for _idx, row in unused_display_df.iterrows(): | |
unused_table.add_row(row["defined_in_file"], row["function_name"], style="red") | |
console.print("\n--- Results ---") | |
console.print(used_table) | |
console.print(unused_table) | |
console.print("\n[bold]Summary:[/bold]") | |
console.print( | |
f" - Total utility functions analyzed: [bold]{len(df_all_functions)}[/bold]" | |
) | |
console.print( | |
f" - Functions found used elsewhere: [bold green]{len(df_used)}[/bold green]" | |
) | |
console.print( | |
f" - Functions potentially unused: [bold yellow]{len(df_unused)}[/bold yellow]" | |
) | |
console.print( | |
f"\n[italic]Note: 'Potentially Unused' means the function name was not found in .py or .ipynb files outside of the specified utils directory \"{UTILS_DIR.name}' (and other excluded paths).[/italic]" | |
) | |
console.print( | |
"[italic]This script uses string matching for function names. Review results carefully, especially for very common or short function names." | |
"A function might be marked 'potentially unused' if it is:" | |
" - Called indirectly (e.g., using `getattr`, through function aliases)." | |
" - Used in complex metaprogramming or by decorators that obscure the direct call." | |
" - Only used in file types not scanned by this script (e.g., shell scripts, compiled extensions)." | |
" - Only used in directories excluded from the search (e.g., other utility libraries not part of this analysis)." | |
" - Part of conditional import/usage paths that are not easily detected statically." | |
) | |
console.print( | |
f"\nTo run again, for example: [blue]python {Path(__file__).name} {str(UTILS_DIR)} --repo-root {str(REPO_ROOT)}[/blue]" | |
) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment