Skip to content

Instantly share code, notes, and snippets.

@yanli0303
Last active August 17, 2025 02:02
Show Gist options
  • Select an option

  • Save yanli0303/1a63666f5835f80f50eac9d3aaaf7229 to your computer and use it in GitHub Desktop.

Select an option

Save yanli0303/1a63666f5835f80f50eac9d3aaaf7229 to your computer and use it in GitHub Desktop.
Image cleanup script that moves .jpg and .png files that don't match their directory pattern to a backup location.
#!/usr/bin/env python3
"""
Image cleanup script that moves .jpg and .png files that don't match their directory pattern to a backup location.
The pattern is derived from the directory name by removing all spaces, dots, hyphens, #, and @.
Files are matched by removing same characters from their names as well.
Instead of deleting files, they are moved to a backup directory with the same structure.
"""
import argparse
import re
import shutil
from pathlib import Path
regex_del_chars = r"[\s\-\.\#\@]+"
image_extensions = {".jpg", ".jpeg", ".png"}
def normalize_name(name: str) -> str:
return re.sub(regex_del_chars, "", name.lower())
def get_image_files(directory: Path) -> list[Path]:
image_files = []
try:
for file_path in directory.iterdir():
if file_path.is_file() and file_path.suffix.lower() in image_extensions:
image_files.append(file_path)
except PermissionError:
print(f"Warning: Permission denied accessing directory: {directory}")
return image_files
def find_files_to_move(directory: Path) -> list[Path]:
"""
Find image files in a directory that don't match the directory pattern.
Args:
directory: The directory to process
Returns:
List of file paths that should be moved to backup
"""
directory_pattern = normalize_name(directory.name)
if not directory_pattern:
print(f"Warning: Directory '{directory.name}' has no valid pattern after normalization")
return []
image_files = get_image_files(directory)
files_to_move = []
for file_path in image_files:
# Get filename without extension
filename_stem = file_path.stem
normalized_filename = normalize_name(filename_stem)
# Check if the normalized filename starts with the directory pattern
if not normalized_filename.startswith(directory_pattern):
files_to_move.append(file_path)
return files_to_move
def process_directory(root_path: Path, dry_run: bool = True) -> tuple[int, int]:
"""
Process a directory and all its subdirectories.
Args:
root_path: The root directory to process
dry_run: If True, only show what would be moved without actually moving files
Returns:
Tuple of (total_files_processed, total_files_moved)
"""
if not root_path.exists():
print(f"Error: Directory '{root_path}' does not exist")
return 0, 0
if not root_path.is_dir():
print(f"Error: '{root_path}' is not a directory")
return 0, 0
total_files_processed = 0
total_files_moved = 0
backup_dir = root_path.parent / f"{root_path.name}-to-remove"
# Create backup directory
if not dry_run:
backup_dir.mkdir(exist_ok=True)
print(f"Backup directory: {backup_dir}")
else:
print(f"Backup directory would be created: {backup_dir}")
# Process each subdirectory
try:
subdirectories = [p for p in root_path.iterdir() if p.is_dir()]
except PermissionError:
print(f"Error: Permission denied accessing directory: {root_path}")
return 0, 0
if not subdirectories:
print(f"No subdirectories found in: {root_path}")
return 0, 0
print(f"Processing {len(subdirectories)} subdirectories in: {root_path}")
print("-" * 80)
for subdirectory in subdirectories:
print(f"\nProcessing subdirectory: {subdirectory.name}")
directory_pattern = normalize_name(subdirectory.name)
print(f"Directory pattern: '{directory_pattern}'")
files_to_move = find_files_to_move(subdirectory)
total_files_processed += len(get_image_files(subdirectory))
if not files_to_move:
print(f" No files to move in '{subdirectory.name}'")
continue
print(f" Found {len(files_to_move)} files to move:")
# Create backup subdirectory
backup_subdir = backup_dir / subdirectory.name
if not dry_run:
backup_subdir.mkdir(parents=True, exist_ok=True)
for file_path in files_to_move:
backup_file_path = backup_subdir / file_path.name
print(f" - {file_path.name} -> {backup_file_path}")
if not dry_run:
try:
shutil.move(str(file_path), str(backup_file_path))
print(" ✓ Moved")
total_files_moved += 1
except OSError as e:
print(f" ✗ Failed to move: {e}")
else:
total_files_moved += 1
return total_files_processed, total_files_moved
def main():
parser = argparse.ArgumentParser(
description="Clean up image files that don't match their directory pattern by moving them to backup",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Dry run (default) - shows what would be moved
python cleanup_images.py /path/to/directory
# Actually move the files to backup
python cleanup_images.py /path/to/directory --execute
""",
)
parser.add_argument("directory", type=str, help="Path to the directory containing subdirectories to process")
parser.add_argument(
"--execute", action="store_true", help="Actually move files to backup (default is dry-run mode)"
)
parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose output")
args = parser.parse_args()
# Convert to Path object
root_path = Path(args.directory).resolve()
dry_run = not args.execute
print("Image Cleanup Script")
print("=" * 50)
print(f"Target directory: {root_path}")
print(f"Mode: {'DRY RUN' if dry_run else 'EXECUTE'}")
if dry_run:
print("\n⚠️ DRY RUN MODE: No files will actually be moved")
print("Use --execute flag to actually move files to backup")
else:
print("\n⚠️ EXECUTE MODE: Files will be moved to backup directory!")
response = input("Are you sure you want to continue? (yes/no): ")
if response.lower() not in ["yes", "y"]:
print("Operation cancelled.")
return
print("\nPattern matching rules:")
print("- Directory pattern: remove spaces and hyphens, convert to lowercase")
print("- File matching: filename (without extension) must start with directory pattern")
print("- Target files: .jpg, .jpeg, .png files only")
print("- Non-matching files will be moved to backup directory with same structure")
# Process the directory
total_processed, total_moved = process_directory(root_path, dry_run=dry_run)
# Summary
print("\n" + "=" * 80)
print("SUMMARY")
print("=" * 80)
print(f"Total image files processed: {total_processed}")
if dry_run:
print(f"Files that would be moved to backup: {total_moved}")
else:
print(f"Files actually moved to backup: {total_moved}")
if dry_run and total_moved > 0:
print("\nTo actually move these files to backup, run with --execute flag:")
print(f"python {__file__} '{root_path}' --execute")
if __name__ == "__main__":
main()
@yanli0303
Copy link
Copy Markdown
Author

yanli0303 commented Aug 15, 2025

uv run https://gist.github.com/yanli0303/1a63666f5835f80f50eac9d3aaaf7229/raw/ -h

usage: cleanup_images.py [-h] [--execute] [--verbose] directory

Clean up image files that don't match their directory pattern by moving them to backup

positional arguments:
  directory      Path to the directory containing subdirectories to process

options:
  -h, --help     show this help message and exit
  --execute      Actually move files to backup (default is dry-run mode)
  --verbose, -v  Enable verbose output

Examples:
  # Dry run (default) - shows what would be moved
  python cleanup_images.py /path/to/directory
  
  # Actually move the files to backup
  python cleanup_images.py /path/to/directory --execute

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment