Skip to content

Instantly share code, notes, and snippets.

@tommylees112
Created May 6, 2025 22:27
Show Gist options
  • Save tommylees112/e2965114d75cc537daf70932c5172c16 to your computer and use it in GitHub Desktop.
Save tommylees112/e2965114d75cc537daf70932c5172c16 to your computer and use it in GitHub Desktop.
used utils.py run with `uv run --with rich --with pandas --with click used_utils.py utils`
import ast
import json
from pathlib import Path
from typing import Dict, List, Set
import click # Added click
import pandas as pd # Added pandas
from rich.console import Console
from rich.table import Table
# Configuration
# UTILS_DIR = Path("utils") # Will be replaced by click argument
# REPO_ROOT = Path(".").resolve() # Will be replaced by click argument
# Directories to exclude from the search for function *usage*
EXCLUDE_DIRS_SEARCH: Set[str] = {".venv", "__pycache__"}
# Specific files to exclude from the search for function *usage*
EXCLUDE_FILES_SEARCH: Set[str] = {"used_utils.py"}
def get_functions_from_py_file(file_path: Path) -> List[str]:
"""
Parses a Python file and returns a list of function names defined in it.
"""
functions: List[str] = []
try:
content = file_path.read_text(encoding="utf-8")
tree = ast.parse(content)
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef):
functions.append(node.name)
except FileNotFoundError:
print(f"[red]Error: File not found: {file_path}[/red]")
except SyntaxError as e:
print(
f"[yellow]Warning: Could not parse {file_path} due to SyntaxError: {e}[/yellow]"
)
except Exception as e:
print(f"[yellow]Warning: Could not process {file_path}: {e}[/yellow]")
return functions
def find_files_to_search(
root_dir: Path, exclude_dirs: Set[str], exclude_files: Set[str]
) -> List[Path]:
"""
Finds all .py and .ipynb files in the root_dir, excluding specified directories and files.
"""
searchable_files: List[Path] = []
for ext_pattern in ["*.py", "*.ipynb"]:
for file_path in root_dir.rglob(ext_pattern):
if not file_path.is_file():
continue
try:
# Use relative path parts for exclusion check
relative_path_parts = file_path.relative_to(root_dir).parts
except ValueError: # Should not happen if rglob starts from root_dir
relative_path_parts = file_path.parts
# Check if any part of the path is in exclude_dirs
if any(part in exclude_dirs for part in relative_path_parts):
continue
if file_path.name in exclude_files:
continue
searchable_files.append(file_path)
return searchable_files
def search_function_in_file(file_path: Path, func_name: str) -> bool:
"""
Searches for a function name within the content of a given file.
For .ipynb, it checks code cells.
"""
try:
content = file_path.read_text(encoding="utf-8")
if file_path.suffix == ".py":
return func_name in content
elif file_path.suffix == ".ipynb":
notebook = json.loads(content)
for cell in notebook.get("cells", []):
if cell.get("cell_type") == "code":
source = cell.get("source", [])
cell_content = (
"".join(source) if isinstance(source, list) else str(source)
)
if func_name in cell_content:
return True
return False
except FileNotFoundError:
# This should ideally not be hit if find_files_to_search works correctly
print(f"[red]Error: File not found during search: {file_path}[/red]")
return False
except json.JSONDecodeError:
print(f"[yellow]Warning: Could not parse JSON from {file_path}[/yellow]")
return False
except Exception as e:
print(
f"[yellow]Warning: Could not read or process {file_path} for search: {e}[/yellow]"
)
return False
return False
@click.command()
@click.argument(
"utils_path_arg",
type=click.Path(
exists=True, file_okay=False, dir_okay=True, readable=True, resolve_path=True
),
)
@click.option(
"-r",
"--repo-root",
"repo_root_path_option",
default=".",
show_default=True,
type=click.Path(
exists=True, file_okay=False, dir_okay=True, readable=True, resolve_path=True
),
help="Path to the repository root directory.",
)
def main(utils_path_arg: str, repo_root_path_option: str):
UTILS_DIR = Path(utils_path_arg)
REPO_ROOT = Path(repo_root_path_option)
console = Console()
console.print("[bold blue]Starting utility function usage analysis...[/bold blue]")
console.print(f"Scanning for functions in: [green]{UTILS_DIR.resolve()}[/green]")
console.print(f"Searching for usage in: [green]{REPO_ROOT.resolve()}[/green]")
console.print(
f"Excluding directories from search: [cyan]{', '.join(EXCLUDE_DIRS_SEARCH)}[/cyan]"
)
console.print(
f"Excluding files from search: [cyan]{', '.join(EXCLUDE_FILES_SEARCH)}[/cyan]"
)
utils_py_files = [
p
for p in UTILS_DIR.rglob("*.py")
if p.is_file()
and p.name
!= "__init__.py" # Typically __init__.py doesn't define many funcs directly
]
# Optionally, include __init__.py if it might contain direct function definitions:
# utils_py_files = [p for p in UTILS_DIR.rglob("*.py") if p.is_file()]
all_utils_functions_list = [] # To build DataFrame
# Temporary dict to check for duplicates before adding to list
_seen_functions_for_df: Dict[str, str] = {}
for py_file in utils_py_files:
functions = get_functions_from_py_file(py_file)
for func in functions:
relative_file_path = str(py_file.relative_to(REPO_ROOT))
if func in _seen_functions_for_df:
console.print(
f"[yellow]Warning: Duplicate function definition for [bold]'{func}'[/bold]. "
f"Found in [magenta]{relative_file_path}[/magenta] and "
f"[magenta]{_seen_functions_for_df[func]}[/magenta]. Keeping the first one found for analysis."
)
else:
_seen_functions_for_df[func] = relative_file_path
all_utils_functions_list.append(
{
"function_name": func,
"defined_in_file": relative_file_path,
"is_used": False,
}
)
if not all_utils_functions_list:
console.print(
f"[red]No functions found in Python files within '{UTILS_DIR}'. Exiting.[/red]"
)
return
df_all_functions = pd.DataFrame(all_utils_functions_list)
console.print(
f"\nFound [bold]{len(df_all_functions)}[/bold] unique function(s) in [green]{UTILS_DIR}[/green] directory (excluding __init__.py by default)."
)
files_to_search = find_files_to_search(
REPO_ROOT, EXCLUDE_DIRS_SEARCH, EXCLUDE_FILES_SEARCH
)
console.print(f"Searching for usage in [bold]{len(files_to_search)}[/bold] files.")
if not files_to_search:
console.print(
"[yellow]No files found to search for usage (after exclusions). All utility functions will be marked as unused.[/yellow]"
)
else:
with console.status("[bold green]Analyzing function usage...") as status:
for i, row in df_all_functions.iterrows():
func_name = row["function_name"]
status.update(
f"[bold green]Analyzing function usage... ({i + 1}/{len(df_all_functions)}) [cyan]{func_name}[/cyan][/bold green]"
)
for search_file_path in files_to_search:
if search_function_in_file(search_file_path, func_name):
df_all_functions.loc[i, "is_used"] = True
break # Found, no need to search this func_name in other files
# --- Create used and unused DataFrames ---
df_used = df_all_functions[df_all_functions["is_used"] == True]
df_unused = df_all_functions[df_all_functions["is_used"] == False]
# --- Grouping for display using pandas ---
if not df_used.empty:
used_display_df = (
df_used.groupby("defined_in_file")["function_name"]
.apply(lambda x: ", ".join(sorted(list(x))))
.reset_index()
)
else:
used_display_df = pd.DataFrame(columns=["defined_in_file", "function_name"])
if not df_unused.empty:
unused_display_df = (
df_unused.groupby("defined_in_file")["function_name"]
.apply(lambda x: ", ".join(sorted(list(x))))
.reset_index()
)
else:
unused_display_df = pd.DataFrame(columns=["defined_in_file", "function_name"])
# --- End of pandas grouping ---
# Prepare tables
used_table = Table(
title="[bold green]Used Utility Functions[/bold green]", show_lines=True
)
used_table.add_column("Defined in File (relative to repo root)", style="magenta")
used_table.add_column("Function Names", style="cyan")
for _idx, row in used_display_df.iterrows():
used_table.add_row(row["defined_in_file"], row["function_name"], style="green")
unused_table = Table(
title="[bold red]Potentially Unused Utility Functions[/bold red]", # Title color changed
show_lines=True,
)
unused_table.add_column("Defined in File (relative to repo root)", style="magenta")
unused_table.add_column("Function Names", style="cyan")
for _idx, row in unused_display_df.iterrows():
unused_table.add_row(row["defined_in_file"], row["function_name"], style="red")
console.print("\n--- Results ---")
console.print(used_table)
console.print(unused_table)
console.print("\n[bold]Summary:[/bold]")
console.print(
f" - Total utility functions analyzed: [bold]{len(df_all_functions)}[/bold]"
)
console.print(
f" - Functions found used elsewhere: [bold green]{len(df_used)}[/bold green]"
)
console.print(
f" - Functions potentially unused: [bold yellow]{len(df_unused)}[/bold yellow]"
)
console.print(
f"\n[italic]Note: 'Potentially Unused' means the function name was not found in .py or .ipynb files outside of the specified utils directory \"{UTILS_DIR.name}' (and other excluded paths).[/italic]"
)
console.print(
"[italic]This script uses string matching for function names. Review results carefully, especially for very common or short function names."
"A function might be marked 'potentially unused' if it is:"
" - Called indirectly (e.g., using `getattr`, through function aliases)."
" - Used in complex metaprogramming or by decorators that obscure the direct call."
" - Only used in file types not scanned by this script (e.g., shell scripts, compiled extensions)."
" - Only used in directories excluded from the search (e.g., other utility libraries not part of this analysis)."
" - Part of conditional import/usage paths that are not easily detected statically."
)
console.print(
f"\nTo run again, for example: [blue]python {Path(__file__).name} {str(UTILS_DIR)} --repo-root {str(REPO_ROOT)}[/blue]"
)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment