Skip to content

Instantly share code, notes, and snippets.

@majora2007
Created September 24, 2025 11:09
Show Gist options
  • Save majora2007/fd6c3fcc759bd5f1a71e4a6439f25e60 to your computer and use it in GitHub Desktop.
Save majora2007/fd6c3fcc759bd5f1a71e4a6439f25e60 to your computer and use it in GitHub Desktop.
Restore filenames post Diesel's WebP Conversion script
#!/usr/bin/env python3
"""
File Renamer Script
Matches files between source and output folders and renames output files
to match the cleaner naming convention from source files.
Usage:
python file_renamer.py [--source SOURCE_PATH] [--output OUTPUT_PATH] [--dry]
Note: Search for "Blackhole" to find the defaults for the cmd arguments
"""
import argparse
import os
import re
from pathlib import Path
from typing import List, Tuple, Optional
import difflib
def find_all_files(root_path: Path) -> List[Path]:
"""Recursively find all files in the given directory."""
files = []
for item in root_path.rglob('*'):
if item.is_file():
files.append(item)
return files
def extract_base_name(filename: str) -> str:
"""
Extract the base name from a filename by removing common suffixes and patterns.
Examples:
'BAKEMONOGATARI v01 (2019) (Digital) (F) (LuCaZ).cbz' -> 'BAKEMONOGATARI v01'
'One Piece v001 (2018) (Digital).zip' -> 'One Piece v001'
"""
# Remove file extension
name = Path(filename).stem
# Remove common patterns in parentheses and brackets
# This regex removes patterns like (2019), (Digital), (F), (LuCaZ), etc.
patterns_to_remove = [
r'\s*\(\d{4}\)', # (2019)
r'\s*\(Digital\)', # (Digital)
r'\s*\(F\)', # (F)
r'\s*\([^)]*\)', # Any other parentheses content
r'\s*\[[^\]]*\]', # Any brackets content
r'\s*-\s*\w+$', # Trailing dash and word
]
for pattern in patterns_to_remove:
name = re.sub(pattern, '', name, flags=re.IGNORECASE)
# Clean up extra whitespace
name = ' '.join(name.split())
return name.strip()
def find_best_match(source_base: str, output_files: List[Path], threshold: float = 0.6) -> Optional[Path]:
"""
Find the best matching output file for a given source base name.
Uses fuzzy string matching to handle minor differences.
"""
best_match = None
best_ratio = 0.0
for output_file in output_files:
output_base = extract_base_name(output_file.name)
# Calculate similarity ratio between the clean base names
ratio = difflib.SequenceMatcher(None, source_base.lower(), output_base.lower()).ratio()
if ratio > threshold and ratio > best_ratio:
best_ratio = ratio
best_match = output_file
return best_match
def create_new_filename(source_file: Path, output_file: Path) -> str:
"""
Create the new filename based on the source file's full name
but keeping the output file's extension.
"""
# Use the full source filename (without extension) but keep output extension
source_name_without_ext = source_file.stem
output_extension = output_file.suffix
return f"{source_name_without_ext}{output_extension}"
def preview_renames(renames: List[Tuple[Path, str]]) -> None:
"""Display a preview of the renames that would be performed."""
if not renames:
print("No files would be renamed.")
return
print(f"\nPreview: {len(renames)} files would be renamed:")
print("-" * 80)
for old_path, new_name in renames:
print(f"OLD: {old_path.name}")
print(f"NEW: {new_name}")
print(f"PATH: {old_path.parent}")
print()
def perform_renames(renames: List[Tuple[Path, str]]) -> None:
"""Perform the actual file renames."""
if not renames:
print("No files to rename.")
return
successful = 0
failed = 0
print(f"\nRenaming {len(renames)} files...")
print("-" * 80)
for old_path, new_name in renames:
try:
new_path = old_path.parent / new_name
# Check if target already exists
if new_path.exists() and new_path != old_path:
print(f"SKIP: {old_path.name} -> Target already exists: {new_name}")
continue
# Perform the rename
old_path.rename(new_path)
print(f"SUCCESS: {old_path.name} -> {new_name}")
successful += 1
except Exception as e:
print(f"ERROR: Failed to rename {old_path.name}: {str(e)}")
failed += 1
print(f"\nCompleted: {successful} successful, {failed} failed")
def main():
parser = argparse.ArgumentParser(
description="Rename files in output folder to match cleaner names from source folder"
)
parser.add_argument(
'--source',
default=r'F:\Blackhole\Manga\_processing\!Finished',
help='Source folder path (default: F:\\Blackhole\\Manga\\_processing\\!Finished)'
)
parser.add_argument(
'--output',
default=r'F:\Blackhole\Manga\_output',
help='Output folder path (default: F:\\Blackhole\\Manga\\_output)'
)
parser.add_argument(
'--dry',
action='store_true',
help='Preview mode - show what would be renamed without making changes'
)
parser.add_argument(
'--threshold',
type=float,
default=0.6,
help='Similarity threshold for matching files (0.0-1.0, default: 0.6)'
)
args = parser.parse_args()
# Convert to Path objects
source_path = Path(args.source)
output_path = Path(args.output)
# Validate paths
if not source_path.exists():
print(f"Error: Source path does not exist: {source_path}")
return 1
if not output_path.exists():
print(f"Error: Output path does not exist: {output_path}")
return 1
print(f"Source folder: {source_path}")
print(f"Output folder: {output_path}")
print(f"Dry run: {'Yes' if args.dry else 'No'}")
print(f"Similarity threshold: {args.threshold}")
# Find all files
print("\nScanning folders...")
source_files = find_all_files(source_path)
output_files = find_all_files(output_path)
print(f"Found {len(source_files)} files in source folder")
print(f"Found {len(output_files)} files in output folder")
if not source_files:
print("No source files found.")
return 1
if not output_files:
print("No output files found.")
return 1
# Find matches and prepare renames
renames = []
unmatched_source = []
matched_pairs = []
print("\nMatching files...")
for source_file in source_files:
source_base = extract_base_name(source_file.name)
match = find_best_match(source_base, output_files, args.threshold)
if match:
new_name = create_new_filename(source_file, match)
matched_pairs.append((source_file, match, new_name))
# Only add to renames if the name would actually change
if match.name != new_name:
renames.append((match, new_name))
else:
unmatched_source.append((source_file, source_base))
# Show matched pairs for debugging
if matched_pairs and args.dry:
print(f"\nFound {len(matched_pairs)} matched file pairs:")
for source_file, output_file, new_name in matched_pairs:
print(f"SOURCE: {source_file.name}")
print(f"OUTPUT: {output_file.name}")
print(f"WOULD BECOME: {new_name}")
print(f"NEEDS RENAME: {'Yes' if output_file.name != new_name else 'No'}")
print()
# Show unmatched files
if unmatched_source:
print(f"\nFound {len(unmatched_source)} unmatched source files:")
for source_file, source_base in unmatched_source[:10]: # Show first 10
print(f" {source_base}")
if len(unmatched_source) > 10:
print(f" ... and {len(unmatched_source) - 10} more")
# Preview or perform renames
if args.dry:
preview_renames(renames)
else:
if renames:
response = input(f"\nProceed with renaming {len(renames)} files? (y/N): ")
if response.lower().startswith('y'):
perform_renames(renames)
else:
print("Operation cancelled.")
else:
print("No files need to be renamed.")
return 0
if __name__ == '__main__':
exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment