Urpagin · May 30, 2025 12:14
diff --git a/detox_ascii.py b/detox_ascii.py
 #!/usr/bin/env python3

 # Author: Urpagin
 # Date: 2025-05-30
 # Description: Removes any non-ASCII character in files and folders passed in arguments. Replaces spaces with underscores. If empty, error out.

 import sys
 import argparse
 from glob import iglob
 from pathlib import Path
 from typing import Iterator
 import unicodedata


 # If true, directories will be resolved
 def perr(*values: object) -> None:
    """Prints to stderr"""
    print(*values, file=sys.stderr)


 def resolve_paths(parsed: argparse.Namespace) -> set[Path]:
    """Using globs, expands them to paths of existing files and directories."""
    include_directories: bool = parsed.directories
    include_hidden: bool = parsed.hidden
    patterns: list[str] = parsed.files
    paths: set[Path] = set()
    
    for pattern in patterns:
        try:
            glob_iter: Iterator[str] = iglob(
                pattern, 
                include_hidden=include_hidden
            )
            
            for resolved in glob_iter:
                path: Path = Path(resolved)
                
                # Skip directories if not wanted
                if path.is_dir() and not include_directories:
                    continue
                    
                paths.add(path)
                
        except (OSError, ValueError) as e:
            # Handle invalid patterns or filesystem errors
            perr(f"Error processing pattern '{pattern}': {e}")
            
    return paths


 def parse_args() -> argparse.Namespace:
    """Parses the input arguments."""

    parser = argparse.ArgumentParser(
        description='Removes non-ASCII characters from file/directory names.'
    )

    parser.add_argument(
        '-d', '--directories',
        action='store_true',
        default=False,
        help='Include directories'
    )

    parser.add_argument(
        '-H', '--hidden',
        action='store_true',
        default=False,
        help='Include hidden files and directories'
    )

    parser.add_argument(
        'files',
        nargs='+',
        help='Filenames or glob patterns (e.g., *.txt or video_*.mp4) to process'
    )

    return parser.parse_args()


 def sanitize(name: str) -> str:
    """Remove diacritics, drop non-ASCII chars, replace spaces with underscores."""
    # Normalize and strip combining marks
    normalized = unicodedata.normalize('NFKD', name)
    without_accents = ''.join(
        c for c in normalized
        if unicodedata.category(c) != 'Mn'
    )
    # Keep only ASCII characters
    ascii_only = ''.join(
        c for c in without_accents
        if ord(c) < 128
    )
    # Replace spaces with underscores
    return ascii_only.replace(' ', '_')


 def process(paths: set[Path]) -> None:
    """Processes the paths, renames them."""

    # Number of paths renamed.
    renamed_files_count: int = 0
    renamed_dir_count: int = 0
    # Sort by depth (deepest first) to avoid invalid paths after parent renames
    sorted_paths = sorted(paths, key=lambda p: len(p.parts), reverse=True)

    for path in sorted_paths:
        filename: str = path.name
        sanitized: str = sanitize(filename)
        if not sanitized:
            perr(f"Skipping '{path}': name becomes empty when sanitized")
            continue

        # Absolute genius, they overloaded the division operator to use the
        # .joinpath() method.
        new_path: Path = path.parent / sanitized

        # no-op
        if new_path.name == path.name:
            continue

        try:
            path.rename(new_path)
            print(f"Renamed '{path.name}' -> '{new_path.name}'")

            if new_path.is_file():
                renamed_files_count += 1

            if new_path.is_dir():
                renamed_dir_count += 1

        except FileExistsError:
            perr(f"Cannot rename '{path}' to '{new_path}': target already exists")
        except OSError as e:
            perr(f"Failed to rename '{path}' to '{new_path}': {e}")


    print(f'Total scanned: {len(paths)}\nRenamed:\n\n\t* Files: {renamed_files_count}\n\t* Directories: {renamed_dir_count}')



 def main() -> None:
    # Arguments such as "test.txt" and "*" or "video_*.txt"
    parsed: argparse.Namespace = parse_args()

    # All EXISTING paths (files & directories)
    paths: set[Path] = resolve_paths(parsed)

    # Rename the paths.
    process(paths)


 if __name__ == '__main__':
    main()
	#!/usr/bin/env python3

	# Author: Urpagin
	# Date: 2025-05-30
	# Description: Removes any non-ASCII character in files and folders passed in arguments. Replaces spaces with underscores. If empty, error out.

	import sys
	import argparse
	from glob import iglob
	from pathlib import Path
	from typing import Iterator
	import unicodedata


	# If true, directories will be resolved
	def perr(*values: object) -> None:
	"""Prints to stderr"""
	print(*values, file=sys.stderr)


	def resolve_paths(parsed: argparse.Namespace) -> set[Path]:
	"""Using globs, expands them to paths of existing files and directories."""
	include_directories: bool = parsed.directories
	include_hidden: bool = parsed.hidden
	patterns: list[str] = parsed.files
	paths: set[Path] = set()

	for pattern in patterns:
	try:
	glob_iter: Iterator[str] = iglob(
	pattern,
	include_hidden=include_hidden
	)

	for resolved in glob_iter:
	path: Path = Path(resolved)

	# Skip directories if not wanted
	if path.is_dir() and not include_directories:
	continue

	paths.add(path)

	except (OSError, ValueError) as e:
	# Handle invalid patterns or filesystem errors
	perr(f"Error processing pattern '{pattern}': {e}")

	return paths


	def parse_args() -> argparse.Namespace:
	"""Parses the input arguments."""

	parser = argparse.ArgumentParser(
	description='Removes non-ASCII characters from file/directory names.'
	)

	parser.add_argument(
	'-d', '--directories',
	action='store_true',
	default=False,
	help='Include directories'
	)

	parser.add_argument(
	'-H', '--hidden',
	action='store_true',
	default=False,
	help='Include hidden files and directories'
	)

	parser.add_argument(
	'files',
	nargs='+',
	help='Filenames or glob patterns (e.g., .txt or video_.mp4) to process'
	)

	return parser.parse_args()


	def sanitize(name: str) -> str:
	"""Remove diacritics, drop non-ASCII chars, replace spaces with underscores."""
	# Normalize and strip combining marks
	normalized = unicodedata.normalize('NFKD', name)
	without_accents = ''.join(
	c for c in normalized
	if unicodedata.category(c) != 'Mn'
	)
	# Keep only ASCII characters
	ascii_only = ''.join(
	c for c in without_accents
	if ord(c) < 128
	)
	# Replace spaces with underscores
	return ascii_only.replace(' ', '_')


	def process(paths: set[Path]) -> None:
	"""Processes the paths, renames them."""

	# Number of paths renamed.
	renamed_files_count: int = 0
	renamed_dir_count: int = 0
	# Sort by depth (deepest first) to avoid invalid paths after parent renames
	sorted_paths = sorted(paths, key=lambda p: len(p.parts), reverse=True)

	for path in sorted_paths:
	filename: str = path.name
	sanitized: str = sanitize(filename)
	if not sanitized:
	perr(f"Skipping '{path}': name becomes empty when sanitized")
	continue

	# Absolute genius, they overloaded the division operator to use the
	# .joinpath() method.
	new_path: Path = path.parent / sanitized

	# no-op
	if new_path.name == path.name:
	continue

	try:
	path.rename(new_path)
	print(f"Renamed '{path.name}' -> '{new_path.name}'")

	if new_path.is_file():
	renamed_files_count += 1

	if new_path.is_dir():
	renamed_dir_count += 1

	except FileExistsError:
	perr(f"Cannot rename '{path}' to '{new_path}': target already exists")
	except OSError as e:
	perr(f"Failed to rename '{path}' to '{new_path}': {e}")


	print(f'Total scanned: {len(paths)}\nRenamed:\n\n\t* Files: {renamed_files_count}\n\t* Directories: {renamed_dir_count}')



	def main() -> None:
	# Arguments such as "test.txt" and "" or "video_.txt"
	parsed: argparse.Namespace = parse_args()

	# All EXISTING paths (files & directories)
	paths: set[Path] = resolve_paths(parsed)

	# Rename the paths.
	process(paths)


	if __name__ == '__main__':
	main()
No results found