Created
May 30, 2025 12:14
-
-
Save Urpagin/ecd301d16acb2d4dc9adc0e7760ef9d0 to your computer and use it in GitHub Desktop.
Small Python Filename ASCII-ification (similar to detox)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # Author: Urpagin | |
| # Date: 2025-05-30 | |
| # Description: Removes any non-ASCII character in files and folders passed in arguments. Replaces spaces with underscores. If empty, error out. | |
| import sys | |
| import argparse | |
| from glob import iglob | |
| from pathlib import Path | |
| from typing import Iterator | |
| import unicodedata | |
| # If true, directories will be resolved | |
| def perr(*values: object) -> None: | |
| """Prints to stderr""" | |
| print(*values, file=sys.stderr) | |
| def resolve_paths(parsed: argparse.Namespace) -> set[Path]: | |
| """Using globs, expands them to paths of existing files and directories.""" | |
| include_directories: bool = parsed.directories | |
| include_hidden: bool = parsed.hidden | |
| patterns: list[str] = parsed.files | |
| paths: set[Path] = set() | |
| for pattern in patterns: | |
| try: | |
| glob_iter: Iterator[str] = iglob( | |
| pattern, | |
| include_hidden=include_hidden | |
| ) | |
| for resolved in glob_iter: | |
| path: Path = Path(resolved) | |
| # Skip directories if not wanted | |
| if path.is_dir() and not include_directories: | |
| continue | |
| paths.add(path) | |
| except (OSError, ValueError) as e: | |
| # Handle invalid patterns or filesystem errors | |
| perr(f"Error processing pattern '{pattern}': {e}") | |
| return paths | |
| def parse_args() -> argparse.Namespace: | |
| """Parses the input arguments.""" | |
| parser = argparse.ArgumentParser( | |
| description='Removes non-ASCII characters from file/directory names.' | |
| ) | |
| parser.add_argument( | |
| '-d', '--directories', | |
| action='store_true', | |
| default=False, | |
| help='Include directories' | |
| ) | |
| parser.add_argument( | |
| '-H', '--hidden', | |
| action='store_true', | |
| default=False, | |
| help='Include hidden files and directories' | |
| ) | |
| parser.add_argument( | |
| 'files', | |
| nargs='+', | |
| help='Filenames or glob patterns (e.g., *.txt or video_*.mp4) to process' | |
| ) | |
| return parser.parse_args() | |
| def sanitize(name: str) -> str: | |
| """Remove diacritics, drop non-ASCII chars, replace spaces with underscores.""" | |
| # Normalize and strip combining marks | |
| normalized = unicodedata.normalize('NFKD', name) | |
| without_accents = ''.join( | |
| c for c in normalized | |
| if unicodedata.category(c) != 'Mn' | |
| ) | |
| # Keep only ASCII characters | |
| ascii_only = ''.join( | |
| c for c in without_accents | |
| if ord(c) < 128 | |
| ) | |
| # Replace spaces with underscores | |
| return ascii_only.replace(' ', '_') | |
| def process(paths: set[Path]) -> None: | |
| """Processes the paths, renames them.""" | |
| # Number of paths renamed. | |
| renamed_files_count: int = 0 | |
| renamed_dir_count: int = 0 | |
| # Sort by depth (deepest first) to avoid invalid paths after parent renames | |
| sorted_paths = sorted(paths, key=lambda p: len(p.parts), reverse=True) | |
| for path in sorted_paths: | |
| filename: str = path.name | |
| sanitized: str = sanitize(filename) | |
| if not sanitized: | |
| perr(f"Skipping '{path}': name becomes empty when sanitized") | |
| continue | |
| # Absolute genius, they overloaded the division operator to use the | |
| # .joinpath() method. | |
| new_path: Path = path.parent / sanitized | |
| # no-op | |
| if new_path.name == path.name: | |
| continue | |
| try: | |
| path.rename(new_path) | |
| print(f"Renamed '{path.name}' -> '{new_path.name}'") | |
| if new_path.is_file(): | |
| renamed_files_count += 1 | |
| if new_path.is_dir(): | |
| renamed_dir_count += 1 | |
| except FileExistsError: | |
| perr(f"Cannot rename '{path}' to '{new_path}': target already exists") | |
| except OSError as e: | |
| perr(f"Failed to rename '{path}' to '{new_path}': {e}") | |
| print(f'Total scanned: {len(paths)}\nRenamed:\n\n\t* Files: {renamed_files_count}\n\t* Directories: {renamed_dir_count}') | |
| def main() -> None: | |
| # Arguments such as "test.txt" and "*" or "video_*.txt" | |
| parsed: argparse.Namespace = parse_args() | |
| # All EXISTING paths (files & directories) | |
| paths: set[Path] = resolve_paths(parsed) | |
| # Rename the paths. | |
| process(paths) | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment