majora2007 · September 24, 2025 11:09
diff --git a/rename.py b/rename.py
 #!/usr/bin/env python3
 """
 File Renamer Script

 Matches files between source and output folders and renames output files
 to match the cleaner naming convention from source files.

 Usage:
    python file_renamer.py [--source SOURCE_PATH] [--output OUTPUT_PATH] [--dry]
    
 Note: Search for "Blackhole" to find the defaults for the cmd arguments
 """

 import argparse
 import os
 import re
 from pathlib import Path
 from typing import List, Tuple, Optional
 import difflib


 def find_all_files(root_path: Path) -> List[Path]:
    """Recursively find all files in the given directory."""
    files = []
    for item in root_path.rglob('*'):
        if item.is_file():
            files.append(item)
    return files


 def extract_base_name(filename: str) -> str:
    """
    Extract the base name from a filename by removing common suffixes and patterns.
    
    Examples:
    'BAKEMONOGATARI v01 (2019) (Digital) (F) (LuCaZ).cbz' -> 'BAKEMONOGATARI v01'
    'One Piece v001 (2018) (Digital).zip' -> 'One Piece v001'
    """
    # Remove file extension
    name = Path(filename).stem
    
    # Remove common patterns in parentheses and brackets
    # This regex removes patterns like (2019), (Digital), (F), (LuCaZ), etc.
    patterns_to_remove = [
        r'\s*\(\d{4}\)',           # (2019)
        r'\s*\(Digital\)',         # (Digital)
        r'\s*\(F\)',               # (F)
        r'\s*\([^)]*\)',           # Any other parentheses content
        r'\s*\[[^\]]*\]',          # Any brackets content
        r'\s*-\s*\w+$',            # Trailing dash and word
    ]
    
    for pattern in patterns_to_remove:
        name = re.sub(pattern, '', name, flags=re.IGNORECASE)
    
    # Clean up extra whitespace
    name = ' '.join(name.split())
    
    return name.strip()


 def find_best_match(source_base: str, output_files: List[Path], threshold: float = 0.6) -> Optional[Path]:
    """
    Find the best matching output file for a given source base name.
    Uses fuzzy string matching to handle minor differences.
    """
    best_match = None
    best_ratio = 0.0
    
    for output_file in output_files:
        output_base = extract_base_name(output_file.name)
        
        # Calculate similarity ratio between the clean base names
        ratio = difflib.SequenceMatcher(None, source_base.lower(), output_base.lower()).ratio()
        
        if ratio > threshold and ratio > best_ratio:
            best_ratio = ratio
            best_match = output_file
    
    return best_match


 def create_new_filename(source_file: Path, output_file: Path) -> str:
    """
    Create the new filename based on the source file's full name
    but keeping the output file's extension.
    """
    # Use the full source filename (without extension) but keep output extension
    source_name_without_ext = source_file.stem
    output_extension = output_file.suffix
    
    return f"{source_name_without_ext}{output_extension}"


 def preview_renames(renames: List[Tuple[Path, str]]) -> None:
    """Display a preview of the renames that would be performed."""
    if not renames:
        print("No files would be renamed.")
        return
    
    print(f"\nPreview: {len(renames)} files would be renamed:")
    print("-" * 80)
    
    for old_path, new_name in renames:
        print(f"OLD: {old_path.name}")
        print(f"NEW: {new_name}")
        print(f"PATH: {old_path.parent}")
        print()


 def perform_renames(renames: List[Tuple[Path, str]]) -> None:
    """Perform the actual file renames."""
    if not renames:
        print("No files to rename.")
        return
    
    successful = 0
    failed = 0
    
    print(f"\nRenaming {len(renames)} files...")
    print("-" * 80)
    
    for old_path, new_name in renames:
        try:
            new_path = old_path.parent / new_name
            
            # Check if target already exists
            if new_path.exists() and new_path != old_path:
                print(f"SKIP: {old_path.name} -> Target already exists: {new_name}")
                continue
            
            # Perform the rename
            old_path.rename(new_path)
            print(f"SUCCESS: {old_path.name} -> {new_name}")
            successful += 1
            
        except Exception as e:
            print(f"ERROR: Failed to rename {old_path.name}: {str(e)}")
            failed += 1
    
    print(f"\nCompleted: {successful} successful, {failed} failed")


 def main():
    parser = argparse.ArgumentParser(
        description="Rename files in output folder to match cleaner names from source folder"
    )
    parser.add_argument(
        '--source', 
        default=r'F:\Blackhole\Manga\_processing\!Finished',
        help='Source folder path (default: F:\\Blackhole\\Manga\\_processing\\!Finished)'
    )
    parser.add_argument(
        '--output', 
        default=r'F:\Blackhole\Manga\_output',
        help='Output folder path (default: F:\\Blackhole\\Manga\\_output)'
    )
    parser.add_argument(
        '--dry', 
        action='store_true',
        help='Preview mode - show what would be renamed without making changes'
    )
    parser.add_argument(
        '--threshold',
        type=float,
        default=0.6,
        help='Similarity threshold for matching files (0.0-1.0, default: 0.6)'
    )
    
    args = parser.parse_args()
    
    # Convert to Path objects
    source_path = Path(args.source)
    output_path = Path(args.output)
    
    # Validate paths
    if not source_path.exists():
        print(f"Error: Source path does not exist: {source_path}")
        return 1
    
    if not output_path.exists():
        print(f"Error: Output path does not exist: {output_path}")
        return 1
    
    print(f"Source folder: {source_path}")
    print(f"Output folder: {output_path}")
    print(f"Dry run: {'Yes' if args.dry else 'No'}")
    print(f"Similarity threshold: {args.threshold}")
    
    # Find all files
    print("\nScanning folders...")
    source_files = find_all_files(source_path)
    output_files = find_all_files(output_path)
    
    print(f"Found {len(source_files)} files in source folder")
    print(f"Found {len(output_files)} files in output folder")
    
    if not source_files:
        print("No source files found.")
        return 1
    
    if not output_files:
        print("No output files found.")
        return 1
    
    # Find matches and prepare renames
    renames = []
    unmatched_source = []
    matched_pairs = []
    
    print("\nMatching files...")
    for source_file in source_files:
        source_base = extract_base_name(source_file.name)
        match = find_best_match(source_base, output_files, args.threshold)
        
        if match:
            new_name = create_new_filename(source_file, match)
            matched_pairs.append((source_file, match, new_name))
            # Only add to renames if the name would actually change
            if match.name != new_name:
                renames.append((match, new_name))
        else:
            unmatched_source.append((source_file, source_base))
    
    # Show matched pairs for debugging
    if matched_pairs and args.dry:
        print(f"\nFound {len(matched_pairs)} matched file pairs:")
        for source_file, output_file, new_name in matched_pairs:
            print(f"SOURCE: {source_file.name}")
            print(f"OUTPUT: {output_file.name}")
            print(f"WOULD BECOME: {new_name}")
            print(f"NEEDS RENAME: {'Yes' if output_file.name != new_name else 'No'}")
            print()
    
    # Show unmatched files
    if unmatched_source:
        print(f"\nFound {len(unmatched_source)} unmatched source files:")
        for source_file, source_base in unmatched_source[:10]:  # Show first 10
            print(f"  {source_base}")
        if len(unmatched_source) > 10:
            print(f"  ... and {len(unmatched_source) - 10} more")
    
    # Preview or perform renames
    if args.dry:
        preview_renames(renames)
    else:
        if renames:
            response = input(f"\nProceed with renaming {len(renames)} files? (y/N): ")
            if response.lower().startswith('y'):
                perform_renames(renames)
            else:
                print("Operation cancelled.")
        else:
            print("No files need to be renamed.")
    
    return 0


 if __name__ == '__main__':
    exit(main())
	#!/usr/bin/env python3
	"""
	File Renamer Script

	Matches files between source and output folders and renames output files
	to match the cleaner naming convention from source files.

	Usage:
	python file_renamer.py [--source SOURCE_PATH] [--output OUTPUT_PATH] [--dry]

	Note: Search for "Blackhole" to find the defaults for the cmd arguments
	"""

	import argparse
	import os
	import re
	from pathlib import Path
	from typing import List, Tuple, Optional
	import difflib


	def find_all_files(root_path: Path) -> List[Path]:
	"""Recursively find all files in the given directory."""
	files = []
	for item in root_path.rglob('*'):
	if item.is_file():
	files.append(item)
	return files


	def extract_base_name(filename: str) -> str:
	"""
	Extract the base name from a filename by removing common suffixes and patterns.

	Examples:
	'BAKEMONOGATARI v01 (2019) (Digital) (F) (LuCaZ).cbz' -> 'BAKEMONOGATARI v01'
	'One Piece v001 (2018) (Digital).zip' -> 'One Piece v001'
	"""
	# Remove file extension
	name = Path(filename).stem

	# Remove common patterns in parentheses and brackets
	# This regex removes patterns like (2019), (Digital), (F), (LuCaZ), etc.
	patterns_to_remove = [
	r'\s*\(\d{4}\)', # (2019)
	r'\s*\(Digital\)', # (Digital)
	r'\s*\(F\)', # (F)
	r'\s\([^)]\)', # Any other parentheses content
	r'\s\[[^\]]\]', # Any brackets content
	r'\s-\s\w+$', # Trailing dash and word
	]

	for pattern in patterns_to_remove:
	name = re.sub(pattern, '', name, flags=re.IGNORECASE)

	# Clean up extra whitespace
	name = ' '.join(name.split())

	return name.strip()


	def find_best_match(source_base: str, output_files: List[Path], threshold: float = 0.6) -> Optional[Path]:
	"""
	Find the best matching output file for a given source base name.
	Uses fuzzy string matching to handle minor differences.
	"""
	best_match = None
	best_ratio = 0.0

	for output_file in output_files:
	output_base = extract_base_name(output_file.name)

	# Calculate similarity ratio between the clean base names
	ratio = difflib.SequenceMatcher(None, source_base.lower(), output_base.lower()).ratio()

	if ratio > threshold and ratio > best_ratio:
	best_ratio = ratio
	best_match = output_file

	return best_match


	def create_new_filename(source_file: Path, output_file: Path) -> str:
	"""
	Create the new filename based on the source file's full name
	but keeping the output file's extension.
	"""
	# Use the full source filename (without extension) but keep output extension
	source_name_without_ext = source_file.stem
	output_extension = output_file.suffix

	return f"{source_name_without_ext}{output_extension}"


	def preview_renames(renames: List[Tuple[Path, str]]) -> None:
	"""Display a preview of the renames that would be performed."""
	if not renames:
	print("No files would be renamed.")
	return

	print(f"\nPreview: {len(renames)} files would be renamed:")
	print("-" * 80)

	for old_path, new_name in renames:
	print(f"OLD: {old_path.name}")
	print(f"NEW: {new_name}")
	print(f"PATH: {old_path.parent}")
	print()


	def perform_renames(renames: List[Tuple[Path, str]]) -> None:
	"""Perform the actual file renames."""
	if not renames:
	print("No files to rename.")
	return

	successful = 0
	failed = 0

	print(f"\nRenaming {len(renames)} files...")
	print("-" * 80)

	for old_path, new_name in renames:
	try:
	new_path = old_path.parent / new_name

	# Check if target already exists
	if new_path.exists() and new_path != old_path:
	print(f"SKIP: {old_path.name} -> Target already exists: {new_name}")
	continue

	# Perform the rename
	old_path.rename(new_path)
	print(f"SUCCESS: {old_path.name} -> {new_name}")
	successful += 1

	except Exception as e:
	print(f"ERROR: Failed to rename {old_path.name}: {str(e)}")
	failed += 1

	print(f"\nCompleted: {successful} successful, {failed} failed")


	def main():
	parser = argparse.ArgumentParser(
	description="Rename files in output folder to match cleaner names from source folder"
	)
	parser.add_argument(
	'--source',
	default=r'F:\Blackhole\Manga\_processing\!Finished',
	help='Source folder path (default: F:\\Blackhole\\Manga\\_processing\\!Finished)'
	)
	parser.add_argument(
	'--output',
	default=r'F:\Blackhole\Manga\_output',
	help='Output folder path (default: F:\\Blackhole\\Manga\\_output)'
	)
	parser.add_argument(
	'--dry',
	action='store_true',
	help='Preview mode - show what would be renamed without making changes'
	)
	parser.add_argument(
	'--threshold',
	type=float,
	default=0.6,
	help='Similarity threshold for matching files (0.0-1.0, default: 0.6)'
	)

	args = parser.parse_args()

	# Convert to Path objects
	source_path = Path(args.source)
	output_path = Path(args.output)

	# Validate paths
	if not source_path.exists():
	print(f"Error: Source path does not exist: {source_path}")
	return 1

	if not output_path.exists():
	print(f"Error: Output path does not exist: {output_path}")
	return 1

	print(f"Source folder: {source_path}")
	print(f"Output folder: {output_path}")
	print(f"Dry run: {'Yes' if args.dry else 'No'}")
	print(f"Similarity threshold: {args.threshold}")

	# Find all files
	print("\nScanning folders...")
	source_files = find_all_files(source_path)
	output_files = find_all_files(output_path)

	print(f"Found {len(source_files)} files in source folder")
	print(f"Found {len(output_files)} files in output folder")

	if not source_files:
	print("No source files found.")
	return 1

	if not output_files:
	print("No output files found.")
	return 1

	# Find matches and prepare renames
	renames = []
	unmatched_source = []
	matched_pairs = []

	print("\nMatching files...")
	for source_file in source_files:
	source_base = extract_base_name(source_file.name)
	match = find_best_match(source_base, output_files, args.threshold)

	if match:
	new_name = create_new_filename(source_file, match)
	matched_pairs.append((source_file, match, new_name))
	# Only add to renames if the name would actually change
	if match.name != new_name:
	renames.append((match, new_name))
	else:
	unmatched_source.append((source_file, source_base))

	# Show matched pairs for debugging
	if matched_pairs and args.dry:
	print(f"\nFound {len(matched_pairs)} matched file pairs:")
	for source_file, output_file, new_name in matched_pairs:
	print(f"SOURCE: {source_file.name}")
	print(f"OUTPUT: {output_file.name}")
	print(f"WOULD BECOME: {new_name}")
	print(f"NEEDS RENAME: {'Yes' if output_file.name != new_name else 'No'}")
	print()

	# Show unmatched files
	if unmatched_source:
	print(f"\nFound {len(unmatched_source)} unmatched source files:")
	for source_file, source_base in unmatched_source[:10]: # Show first 10
	print(f" {source_base}")
	if len(unmatched_source) > 10:
	print(f" ... and {len(unmatched_source) - 10} more")

	# Preview or perform renames
	if args.dry:
	preview_renames(renames)
	else:
	if renames:
	response = input(f"\nProceed with renaming {len(renames)} files? (y/N): ")
	if response.lower().startswith('y'):
	perform_renames(renames)
	else:
	print("Operation cancelled.")
	else:
	print("No files need to be renamed.")

	return 0


	if __name__ == '__main__':
	exit(main())