Last active
August 17, 2025 02:02
-
-
Save yanli0303/1a63666f5835f80f50eac9d3aaaf7229 to your computer and use it in GitHub Desktop.
Image cleanup script that moves .jpg and .png files that don't match their directory pattern to a backup location.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Image cleanup script that moves .jpg and .png files that don't match their directory pattern to a backup location. | |
| The pattern is derived from the directory name by removing all spaces, dots, hyphens, #, and @. | |
| Files are matched by removing same characters from their names as well. | |
| Instead of deleting files, they are moved to a backup directory with the same structure. | |
| """ | |
| import argparse | |
| import re | |
| import shutil | |
| from pathlib import Path | |
| regex_del_chars = r"[\s\-\.\#\@]+" | |
| image_extensions = {".jpg", ".jpeg", ".png"} | |
| def normalize_name(name: str) -> str: | |
| return re.sub(regex_del_chars, "", name.lower()) | |
| def get_image_files(directory: Path) -> list[Path]: | |
| image_files = [] | |
| try: | |
| for file_path in directory.iterdir(): | |
| if file_path.is_file() and file_path.suffix.lower() in image_extensions: | |
| image_files.append(file_path) | |
| except PermissionError: | |
| print(f"Warning: Permission denied accessing directory: {directory}") | |
| return image_files | |
| def find_files_to_move(directory: Path) -> list[Path]: | |
| """ | |
| Find image files in a directory that don't match the directory pattern. | |
| Args: | |
| directory: The directory to process | |
| Returns: | |
| List of file paths that should be moved to backup | |
| """ | |
| directory_pattern = normalize_name(directory.name) | |
| if not directory_pattern: | |
| print(f"Warning: Directory '{directory.name}' has no valid pattern after normalization") | |
| return [] | |
| image_files = get_image_files(directory) | |
| files_to_move = [] | |
| for file_path in image_files: | |
| # Get filename without extension | |
| filename_stem = file_path.stem | |
| normalized_filename = normalize_name(filename_stem) | |
| # Check if the normalized filename starts with the directory pattern | |
| if not normalized_filename.startswith(directory_pattern): | |
| files_to_move.append(file_path) | |
| return files_to_move | |
| def process_directory(root_path: Path, dry_run: bool = True) -> tuple[int, int]: | |
| """ | |
| Process a directory and all its subdirectories. | |
| Args: | |
| root_path: The root directory to process | |
| dry_run: If True, only show what would be moved without actually moving files | |
| Returns: | |
| Tuple of (total_files_processed, total_files_moved) | |
| """ | |
| if not root_path.exists(): | |
| print(f"Error: Directory '{root_path}' does not exist") | |
| return 0, 0 | |
| if not root_path.is_dir(): | |
| print(f"Error: '{root_path}' is not a directory") | |
| return 0, 0 | |
| total_files_processed = 0 | |
| total_files_moved = 0 | |
| backup_dir = root_path.parent / f"{root_path.name}-to-remove" | |
| # Create backup directory | |
| if not dry_run: | |
| backup_dir.mkdir(exist_ok=True) | |
| print(f"Backup directory: {backup_dir}") | |
| else: | |
| print(f"Backup directory would be created: {backup_dir}") | |
| # Process each subdirectory | |
| try: | |
| subdirectories = [p for p in root_path.iterdir() if p.is_dir()] | |
| except PermissionError: | |
| print(f"Error: Permission denied accessing directory: {root_path}") | |
| return 0, 0 | |
| if not subdirectories: | |
| print(f"No subdirectories found in: {root_path}") | |
| return 0, 0 | |
| print(f"Processing {len(subdirectories)} subdirectories in: {root_path}") | |
| print("-" * 80) | |
| for subdirectory in subdirectories: | |
| print(f"\nProcessing subdirectory: {subdirectory.name}") | |
| directory_pattern = normalize_name(subdirectory.name) | |
| print(f"Directory pattern: '{directory_pattern}'") | |
| files_to_move = find_files_to_move(subdirectory) | |
| total_files_processed += len(get_image_files(subdirectory)) | |
| if not files_to_move: | |
| print(f" No files to move in '{subdirectory.name}'") | |
| continue | |
| print(f" Found {len(files_to_move)} files to move:") | |
| # Create backup subdirectory | |
| backup_subdir = backup_dir / subdirectory.name | |
| if not dry_run: | |
| backup_subdir.mkdir(parents=True, exist_ok=True) | |
| for file_path in files_to_move: | |
| backup_file_path = backup_subdir / file_path.name | |
| print(f" - {file_path.name} -> {backup_file_path}") | |
| if not dry_run: | |
| try: | |
| shutil.move(str(file_path), str(backup_file_path)) | |
| print(" ✓ Moved") | |
| total_files_moved += 1 | |
| except OSError as e: | |
| print(f" ✗ Failed to move: {e}") | |
| else: | |
| total_files_moved += 1 | |
| return total_files_processed, total_files_moved | |
| def main(): | |
| parser = argparse.ArgumentParser( | |
| description="Clean up image files that don't match their directory pattern by moving them to backup", | |
| formatter_class=argparse.RawDescriptionHelpFormatter, | |
| epilog=""" | |
| Examples: | |
| # Dry run (default) - shows what would be moved | |
| python cleanup_images.py /path/to/directory | |
| # Actually move the files to backup | |
| python cleanup_images.py /path/to/directory --execute | |
| """, | |
| ) | |
| parser.add_argument("directory", type=str, help="Path to the directory containing subdirectories to process") | |
| parser.add_argument( | |
| "--execute", action="store_true", help="Actually move files to backup (default is dry-run mode)" | |
| ) | |
| parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose output") | |
| args = parser.parse_args() | |
| # Convert to Path object | |
| root_path = Path(args.directory).resolve() | |
| dry_run = not args.execute | |
| print("Image Cleanup Script") | |
| print("=" * 50) | |
| print(f"Target directory: {root_path}") | |
| print(f"Mode: {'DRY RUN' if dry_run else 'EXECUTE'}") | |
| if dry_run: | |
| print("\n⚠️ DRY RUN MODE: No files will actually be moved") | |
| print("Use --execute flag to actually move files to backup") | |
| else: | |
| print("\n⚠️ EXECUTE MODE: Files will be moved to backup directory!") | |
| response = input("Are you sure you want to continue? (yes/no): ") | |
| if response.lower() not in ["yes", "y"]: | |
| print("Operation cancelled.") | |
| return | |
| print("\nPattern matching rules:") | |
| print("- Directory pattern: remove spaces and hyphens, convert to lowercase") | |
| print("- File matching: filename (without extension) must start with directory pattern") | |
| print("- Target files: .jpg, .jpeg, .png files only") | |
| print("- Non-matching files will be moved to backup directory with same structure") | |
| # Process the directory | |
| total_processed, total_moved = process_directory(root_path, dry_run=dry_run) | |
| # Summary | |
| print("\n" + "=" * 80) | |
| print("SUMMARY") | |
| print("=" * 80) | |
| print(f"Total image files processed: {total_processed}") | |
| if dry_run: | |
| print(f"Files that would be moved to backup: {total_moved}") | |
| else: | |
| print(f"Files actually moved to backup: {total_moved}") | |
| if dry_run and total_moved > 0: | |
| print("\nTo actually move these files to backup, run with --execute flag:") | |
| print(f"python {__file__} '{root_path}' --execute") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.