Skip to content

Instantly share code, notes, and snippets.

@Urpagin
Created May 30, 2025 12:14
Show Gist options
  • Select an option

  • Save Urpagin/ecd301d16acb2d4dc9adc0e7760ef9d0 to your computer and use it in GitHub Desktop.

Select an option

Save Urpagin/ecd301d16acb2d4dc9adc0e7760ef9d0 to your computer and use it in GitHub Desktop.
Small Python Filename ASCII-ification (similar to detox)
#!/usr/bin/env python3
# Author: Urpagin
# Date: 2025-05-30
# Description: Removes any non-ASCII character in files and folders passed in arguments. Replaces spaces with underscores. If empty, error out.
import sys
import argparse
from glob import iglob
from pathlib import Path
from typing import Iterator
import unicodedata
# If true, directories will be resolved
def perr(*values: object) -> None:
"""Prints to stderr"""
print(*values, file=sys.stderr)
def resolve_paths(parsed: argparse.Namespace) -> set[Path]:
"""Using globs, expands them to paths of existing files and directories."""
include_directories: bool = parsed.directories
include_hidden: bool = parsed.hidden
patterns: list[str] = parsed.files
paths: set[Path] = set()
for pattern in patterns:
try:
glob_iter: Iterator[str] = iglob(
pattern,
include_hidden=include_hidden
)
for resolved in glob_iter:
path: Path = Path(resolved)
# Skip directories if not wanted
if path.is_dir() and not include_directories:
continue
paths.add(path)
except (OSError, ValueError) as e:
# Handle invalid patterns or filesystem errors
perr(f"Error processing pattern '{pattern}': {e}")
return paths
def parse_args() -> argparse.Namespace:
"""Parses the input arguments."""
parser = argparse.ArgumentParser(
description='Removes non-ASCII characters from file/directory names.'
)
parser.add_argument(
'-d', '--directories',
action='store_true',
default=False,
help='Include directories'
)
parser.add_argument(
'-H', '--hidden',
action='store_true',
default=False,
help='Include hidden files and directories'
)
parser.add_argument(
'files',
nargs='+',
help='Filenames or glob patterns (e.g., *.txt or video_*.mp4) to process'
)
return parser.parse_args()
def sanitize(name: str) -> str:
"""Remove diacritics, drop non-ASCII chars, replace spaces with underscores."""
# Normalize and strip combining marks
normalized = unicodedata.normalize('NFKD', name)
without_accents = ''.join(
c for c in normalized
if unicodedata.category(c) != 'Mn'
)
# Keep only ASCII characters
ascii_only = ''.join(
c for c in without_accents
if ord(c) < 128
)
# Replace spaces with underscores
return ascii_only.replace(' ', '_')
def process(paths: set[Path]) -> None:
"""Processes the paths, renames them."""
# Number of paths renamed.
renamed_files_count: int = 0
renamed_dir_count: int = 0
# Sort by depth (deepest first) to avoid invalid paths after parent renames
sorted_paths = sorted(paths, key=lambda p: len(p.parts), reverse=True)
for path in sorted_paths:
filename: str = path.name
sanitized: str = sanitize(filename)
if not sanitized:
perr(f"Skipping '{path}': name becomes empty when sanitized")
continue
# Absolute genius, they overloaded the division operator to use the
# .joinpath() method.
new_path: Path = path.parent / sanitized
# no-op
if new_path.name == path.name:
continue
try:
path.rename(new_path)
print(f"Renamed '{path.name}' -> '{new_path.name}'")
if new_path.is_file():
renamed_files_count += 1
if new_path.is_dir():
renamed_dir_count += 1
except FileExistsError:
perr(f"Cannot rename '{path}' to '{new_path}': target already exists")
except OSError as e:
perr(f"Failed to rename '{path}' to '{new_path}': {e}")
print(f'Total scanned: {len(paths)}\nRenamed:\n\n\t* Files: {renamed_files_count}\n\t* Directories: {renamed_dir_count}')
def main() -> None:
# Arguments such as "test.txt" and "*" or "video_*.txt"
parsed: argparse.Namespace = parse_args()
# All EXISTING paths (files & directories)
paths: set[Path] = resolve_paths(parsed)
# Rename the paths.
process(paths)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment