Created
October 8, 2023 14:07
-
-
Save glenn-jocher/8cee2e8c48884f0c22867b8c5206b55f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This script recursively scans Python files within a specified directory to identify functions and classes | |
that lack docstrings. | |
Features: | |
- Can navigate deeply nested directory structures to analyze all `.py` files. | |
- Allows for specific directories (like `venv` or `runs`) to be excluded from the scan. | |
- For each missing docstring, the script outputs the file path and the specific function or class declaration. | |
- Provides a summary count of total functions/classes analyzed, how many have docstrings, and how many are missing them. | |
- Offers an overview of the number of missing docstrings categorized by the top-level directory. | |
Usage: | |
- Define the starting directory and any directories to exclude, then run the script. | |
- Review the printed output for details on missing docstrings and the overall summary. | |
Note: This tool is beneficial for maintaining code documentation standards across projects and can | |
aid in identifying areas where documentation might be improved. | |
""" | |
from collections import defaultdict | |
from pathlib import Path | |
def find_missing_docstrings_in_directory(directory_path: Path, exclude_dirs: list = []) -> None: | |
""" | |
Recursively scan all Python files in a directory for functions and classes without docstrings. | |
Args: | |
directory_path (Path): Directory to start the scan from. | |
exclude_dirs (list): List of directory patterns to exclude. | |
""" | |
def should_exclude(file_path: Path) -> bool: | |
"""Check if the file path should be excluded based on the exclude patterns.""" | |
for pattern in exclude_dirs: | |
if pattern in str(file_path): | |
return True | |
return False | |
total_functions = 0 | |
total_classes = 0 | |
missing_function_docstrings = 0 | |
missing_class_docstrings = 0 | |
missing_by_directory = defaultdict(int) | |
for py_file in directory_path.rglob('*.py'): | |
# Skip if the file is inside an excluded directory | |
if should_exclude(py_file): | |
continue | |
with py_file.open(encoding='utf-8', errors='replace') as f: | |
lines = f.readlines() | |
function_or_class_indices = [idx for idx, line in enumerate(lines) if | |
line.strip().startswith(('def ', 'class '))] | |
function_or_class_indices.append(len(lines)) # Add a sentinel value for easier indexing | |
for i in range(len(function_or_class_indices) - 1): | |
start_idx = function_or_class_indices[i] | |
end_idx = function_or_class_indices[i + 1] | |
segment = "".join(lines[start_idx:end_idx]) | |
declaration = lines[start_idx].strip() | |
if declaration.startswith("def "): | |
total_functions += 1 | |
if '"""' not in segment: | |
missing_function_docstrings += 1 | |
missing_by_directory[ | |
py_file.relative_to(directory_path).parts[0]] += 1 # Grabbing the top-level directory | |
print(f"In {py_file}, missing docstring for function: {declaration}") | |
else: | |
total_classes += 1 | |
if '"""' not in segment: | |
missing_class_docstrings += 1 | |
missing_by_directory[py_file.relative_to(directory_path).parts[0]] += 1 | |
print(f"In {py_file}, missing docstring for class: {declaration}") | |
print("\nSummary:") | |
print( | |
f"Total functions: {total_functions}, With docstrings: {total_functions - missing_function_docstrings}, Missing docstrings: {missing_function_docstrings}") | |
print( | |
f"Total classes: {total_classes}, With docstrings: {total_classes - missing_class_docstrings}, Missing docstrings: {missing_class_docstrings}") | |
print("\nMissing docstrings by directory:") | |
for dir_name, count in missing_by_directory.items(): | |
print(f"{dir_name}: {count} missing docstrings") | |
if __name__ == "__main__": | |
dir_path = Path('.').resolve() # Change this to your desired starting directory if needed | |
exclude_patterns = ['venv', 'runs'] | |
find_missing_docstrings_in_directory(dir_path, exclude_patterns) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment