|
#!/usr/bin/env python3 |
|
""" |
|
MacBook Storage Cleanup Script |
|
|
|
This script scans your Mac for potentially unwanted files that consume disk space |
|
and allows you to delete them. It focuses on common space-wasters like: |
|
- Large files |
|
- Duplicate files |
|
- Cache files |
|
- Downloads folder items |
|
- Trash items |
|
- Application logs |
|
- Temporary files |
|
|
|
IMPORTANT: Please review files before deletion and back up important data. |
|
""" |
|
|
|
import os |
|
import shutil |
|
import subprocess |
|
import hashlib |
|
import time |
|
import sys |
|
from datetime import datetime |
|
from pathlib import Path |
|
import argparse |
|
|
|
# ANSI color codes for better readability |
|
COLORS = { |
|
'RED': '\033[91m', |
|
'GREEN': '\033[92m', |
|
'YELLOW': '\033[93m', |
|
'BLUE': '\033[94m', |
|
'MAGENTA': '\033[95m', |
|
'CYAN': '\033[96m', |
|
'BOLD': '\033[1m', |
|
'END': '\033[0m' |
|
} |
|
|
|
def colorize(text, color): |
|
"""Add color to terminal text.""" |
|
return f"{COLORS[color]}{text}{COLORS['END']}" |
|
|
|
def get_size_str(size_bytes): |
|
"""Convert bytes to a human-readable format.""" |
|
if size_bytes < 1024: |
|
return f"{size_bytes} B" |
|
elif size_bytes < 1024 * 1024: |
|
return f"{size_bytes/1024:.2f} KB" |
|
elif size_bytes < 1024 * 1024 * 1024: |
|
return f"{size_bytes/(1024*1024):.2f} MB" |
|
else: |
|
return f"{size_bytes/(1024*1024*1024):.2f} GB" |
|
|
|
def get_file_hash(file_path, block_size=65536): |
|
"""Get the MD5 hash of a file for duplicate detection.""" |
|
md5 = hashlib.md5() |
|
try: |
|
with open(file_path, 'rb') as f: |
|
for block in iter(lambda: f.read(block_size), b''): |
|
md5.update(block) |
|
return md5.hexdigest() |
|
except (PermissionError, FileNotFoundError, IsADirectoryError): |
|
return None |
|
|
|
def scan_large_files(min_size_mb=100, paths=None): |
|
"""Find large files on the system.""" |
|
if paths is None: |
|
paths = [os.path.expanduser('~')] |
|
|
|
large_files = [] |
|
total_size = 0 |
|
|
|
print(colorize("\nScanning for large files (this may take a while)...", 'BOLD')) |
|
|
|
for base_path in paths: |
|
for root, _, files in os.walk(base_path): |
|
# Skip system directories and hidden folders |
|
if any(p.startswith('.') for p in root.split(os.sep)) and not root == base_path: |
|
continue |
|
|
|
for file in files: |
|
try: |
|
file_path = os.path.join(root, file) |
|
if os.path.islink(file_path): |
|
continue |
|
|
|
file_size = os.path.getsize(file_path) |
|
|
|
# Check if file is larger than the minimum size |
|
if file_size > min_size_mb * 1024 * 1024: |
|
large_files.append((file_path, file_size)) |
|
total_size += file_size |
|
except (PermissionError, FileNotFoundError): |
|
continue |
|
|
|
# Sort by size (largest first) |
|
large_files.sort(key=lambda x: x[1], reverse=True) |
|
|
|
print(colorize(f"Found {len(large_files)} files larger than {min_size_mb} MB", 'GREEN')) |
|
print(f"Total size: {get_size_str(total_size)}") |
|
|
|
return large_files |
|
|
|
def find_duplicate_files(paths=None): |
|
"""Find duplicate files based on file hash.""" |
|
if paths is None: |
|
paths = [os.path.expanduser('~')] |
|
|
|
hash_dict = {} |
|
duplicates = [] |
|
total_size = 0 |
|
|
|
print(colorize("\nScanning for duplicate files (this may take a while)...", 'BOLD')) |
|
|
|
for base_path in paths: |
|
for root, _, files in os.walk(base_path): |
|
# Skip system directories and hidden folders |
|
if any(p.startswith('.') for p in root.split(os.sep)) and not root == base_path: |
|
continue |
|
|
|
for file in files: |
|
try: |
|
file_path = os.path.join(root, file) |
|
if os.path.islink(file_path): |
|
continue |
|
|
|
file_size = os.path.getsize(file_path) |
|
|
|
# Skip small files to improve performance |
|
if file_size < 1024 * 1024: # 1 MB |
|
continue |
|
|
|
file_hash = get_file_hash(file_path) |
|
|
|
if file_hash: |
|
if file_hash in hash_dict: |
|
if file_path not in hash_dict[file_hash][1]: |
|
hash_dict[file_hash][1].append(file_path) |
|
hash_dict[file_hash][0] += file_size |
|
total_size += file_size |
|
else: |
|
hash_dict[file_hash] = [file_size, [file_path]] |
|
except (PermissionError, FileNotFoundError): |
|
continue |
|
|
|
# Extract only duplicates |
|
for file_hash, (size, paths) in hash_dict.items(): |
|
if len(paths) > 1: |
|
duplicates.append((paths, size)) |
|
|
|
# Sort by size (largest first) |
|
duplicates.sort(key=lambda x: x[1], reverse=True) |
|
|
|
print(colorize(f"Found {len(duplicates)} sets of duplicate files", 'GREEN')) |
|
print(f"Potential space savings: {get_size_str(total_size)}") |
|
|
|
return duplicates |
|
|
|
def scan_caches(): |
|
"""Scan for cache directories that can be safely cleaned.""" |
|
cache_paths = [ |
|
('Browser Caches', os.path.expanduser('~/Library/Caches/Google/Chrome')), |
|
('Browser Caches', os.path.expanduser('~/Library/Caches/com.apple.Safari')), |
|
('Browser Caches', os.path.expanduser('~/Library/Caches/Firefox')), |
|
('Application Caches', os.path.expanduser('~/Library/Caches')), |
|
('System Caches', '/Library/Caches'), |
|
('XCode Derived Data', os.path.expanduser('~/Library/Developer/Xcode/DerivedData')), |
|
('XCode Archives', os.path.expanduser('~/Library/Developer/Xcode/Archives')), |
|
('iOS Device Backups', os.path.expanduser('~/Library/Application Support/MobileSync/Backup')), |
|
] |
|
|
|
cache_files = [] |
|
total_size = 0 |
|
|
|
print(colorize("\nScanning cache directories...", 'BOLD')) |
|
|
|
for cache_type, path in cache_paths: |
|
try: |
|
if os.path.exists(path): |
|
dir_size = get_directory_size(path) |
|
if dir_size > 0: |
|
cache_files.append((cache_type, path, dir_size)) |
|
total_size += dir_size |
|
except (PermissionError, FileNotFoundError): |
|
continue |
|
|
|
# Sort by size (largest first) |
|
cache_files.sort(key=lambda x: x[2], reverse=True) |
|
|
|
print(colorize(f"Found {len(cache_files)} cache directories", 'GREEN')) |
|
print(f"Total size: {get_size_str(total_size)}") |
|
|
|
return cache_files |
|
|
|
def get_directory_size(path): |
|
"""Calculate the total size of a directory.""" |
|
total_size = 0 |
|
try: |
|
with os.scandir(path) as it: |
|
for entry in it: |
|
try: |
|
if entry.is_file(): |
|
total_size += entry.stat().st_size |
|
elif entry.is_dir(): |
|
total_size += get_directory_size(entry.path) |
|
except (PermissionError, FileNotFoundError): |
|
continue |
|
except (PermissionError, FileNotFoundError): |
|
pass |
|
return total_size |
|
|
|
def scan_downloads(): |
|
"""Scan the Downloads folder for old files.""" |
|
downloads_path = os.path.expanduser('~/Downloads') |
|
old_files = [] |
|
total_size = 0 |
|
|
|
print(colorize("\nScanning Downloads folder...", 'BOLD')) |
|
|
|
if not os.path.exists(downloads_path): |
|
print("Downloads folder not found") |
|
return old_files |
|
|
|
# Current time |
|
now = time.time() |
|
|
|
# 30 days in seconds |
|
thirty_days = 30 * 24 * 60 * 60 |
|
|
|
try: |
|
for item in os.listdir(downloads_path): |
|
try: |
|
item_path = os.path.join(downloads_path, item) |
|
|
|
# Skip if it's a symbolic link |
|
if os.path.islink(item_path): |
|
continue |
|
|
|
# Get the last access time |
|
last_access = os.path.getatime(item_path) |
|
days_old = (now - last_access) / (24 * 60 * 60) |
|
|
|
# If the file is older than 30 days |
|
if now - last_access > thirty_days: |
|
if os.path.isfile(item_path): |
|
file_size = os.path.getsize(item_path) |
|
old_files.append((item_path, file_size, days_old)) |
|
total_size += file_size |
|
elif os.path.isdir(item_path): |
|
dir_size = get_directory_size(item_path) |
|
old_files.append((item_path, dir_size, days_old)) |
|
total_size += dir_size |
|
except (PermissionError, FileNotFoundError): |
|
continue |
|
except (PermissionError, FileNotFoundError): |
|
print("Could not access Downloads folder") |
|
return old_files |
|
|
|
# Sort by size (largest first) |
|
old_files.sort(key=lambda x: x[1], reverse=True) |
|
|
|
print(colorize(f"Found {len(old_files)} files older than 30 days", 'GREEN')) |
|
print(f"Total size: {get_size_str(total_size)}") |
|
|
|
return old_files |
|
|
|
def empty_trash(): |
|
"""Empty the Trash.""" |
|
print(colorize("\nChecking Trash...", 'BOLD')) |
|
|
|
trash_path = os.path.expanduser('~/.Trash') |
|
|
|
if not os.path.exists(trash_path): |
|
print("Trash folder not found") |
|
return 0 |
|
|
|
trash_size = get_directory_size(trash_path) |
|
|
|
print(f"Trash size: {get_size_str(trash_size)}") |
|
|
|
return trash_size |
|
|
|
def scan_temp_files(): |
|
"""Scan for temporary files.""" |
|
temp_paths = [ |
|
'/tmp', |
|
os.path.expanduser('~/Library/Logs'), |
|
os.path.expanduser('~/Library/Application Support/CrashReporter'), |
|
] |
|
|
|
temp_files = [] |
|
total_size = 0 |
|
|
|
print(colorize("\nScanning for temporary files...", 'BOLD')) |
|
|
|
for path in temp_paths: |
|
try: |
|
if os.path.exists(path): |
|
dir_size = get_directory_size(path) |
|
if dir_size > 0: |
|
temp_files.append((path, dir_size)) |
|
total_size += dir_size |
|
except (PermissionError, FileNotFoundError): |
|
continue |
|
|
|
# Sort by size (largest first) |
|
temp_files.sort(key=lambda x: x[1], reverse=True) |
|
|
|
print(colorize(f"Found {len(temp_files)} temporary file directories", 'GREEN')) |
|
print(f"Total size: {get_size_str(total_size)}") |
|
|
|
return temp_files |
|
|
|
def delete_files(files_to_delete): |
|
"""Delete the selected files.""" |
|
deleted_size = 0 |
|
deleted_count = 0 |
|
|
|
for file_path in files_to_delete: |
|
try: |
|
if os.path.isfile(file_path): |
|
file_size = os.path.getsize(file_path) |
|
os.remove(file_path) |
|
deleted_size += file_size |
|
deleted_count += 1 |
|
print(f"Deleted: {file_path} ({get_size_str(file_size)})") |
|
elif os.path.isdir(file_path): |
|
dir_size = get_directory_size(file_path) |
|
shutil.rmtree(file_path) |
|
deleted_size += dir_size |
|
deleted_count += 1 |
|
print(f"Deleted directory: {file_path} ({get_size_str(dir_size)})") |
|
except (PermissionError, FileNotFoundError) as e: |
|
print(f"Error deleting {file_path}: {e}") |
|
|
|
return deleted_count, deleted_size |
|
|
|
def main(): |
|
"""Main function to run the cleanup script.""" |
|
parser = argparse.ArgumentParser(description='MacBook Storage Cleanup Script') |
|
parser.add_argument('--large', action='store_true', help='Scan for large files') |
|
parser.add_argument('--dupes', action='store_true', help='Scan for duplicate files') |
|
parser.add_argument('--caches', action='store_true', help='Clean cache files') |
|
parser.add_argument('--downloads', action='store_true', help='Clean old downloads') |
|
parser.add_argument('--trash', action='store_true', help='Empty trash') |
|
parser.add_argument('--temp', action='store_true', help='Clean temporary files') |
|
parser.add_argument('--all', action='store_true', help='Run all cleanup options') |
|
parser.add_argument('--auto', action='store_true', help='Run in automatic mode (no prompts)') |
|
parser.add_argument('--min-size', type=int, default=100, help='Minimum file size in MB for large file scan (default: 100)') |
|
|
|
args = parser.parse_args() |
|
|
|
# If no options are specified, show help |
|
if not any(vars(args).values()): |
|
parser.print_help() |
|
return |
|
|
|
print(colorize("\n===== MacBook Storage Cleanup Script =====", 'BOLD')) |
|
print(colorize("WARNING: Always back up important data before deleting files!", 'RED')) |
|
|
|
deleted_size = 0 |
|
|
|
# Run selected scans |
|
if args.all or args.large: |
|
large_files = scan_large_files(min_size_mb=args.min_size) |
|
if large_files: |
|
print("\nLarge files found:") |
|
for i, (file_path, size) in enumerate(large_files[:10], 1): |
|
print(f"{i}. {file_path} ({get_size_str(size)})") |
|
|
|
if len(large_files) > 10: |
|
print(f"...and {len(large_files) - 10} more files") |
|
|
|
if args.auto: |
|
print("Auto mode enabled. Skipping deletion.") |
|
else: |
|
choice = input("\nDo you want to delete any of these files? (y/n): ") |
|
if choice.lower() == 'y': |
|
indices = input("Enter the numbers of files to delete (comma-separated, e.g., 1,3,5): ") |
|
try: |
|
indices = [int(i.strip()) for i in indices.split(',')] |
|
files_to_delete = [large_files[i-1][0] for i in indices if 1 <= i <= len(large_files)] |
|
count, size = delete_files(files_to_delete) |
|
deleted_size += size |
|
except ValueError: |
|
print("Invalid input") |
|
|
|
if args.all or args.dupes: |
|
duplicates = find_duplicate_files() |
|
if duplicates: |
|
print("\nDuplicate files found:") |
|
for i, (paths, size) in enumerate(duplicates[:5], 1): |
|
print(f"{i}. {len(paths)} copies, {get_size_str(size)} per file") |
|
for j, path in enumerate(paths[:2], 1): |
|
print(f" {j}. {path}") |
|
if len(paths) > 2: |
|
print(f" ...and {len(paths) - 2} more copies") |
|
|
|
if len(duplicates) > 5: |
|
print(f"...and {len(duplicates) - 5} more duplicate sets") |
|
|
|
if args.auto: |
|
print("Auto mode enabled. Skipping deletion.") |
|
else: |
|
choice = input("\nDo you want to view and delete any duplicate sets? (y/n): ") |
|
if choice.lower() == 'y': |
|
set_index = input("Enter the set number to view: ") |
|
try: |
|
set_index = int(set_index) |
|
if 1 <= set_index <= len(duplicates): |
|
paths, size = duplicates[set_index-1] |
|
print(f"\nAll duplicates in set {set_index} (each {get_size_str(size)}):") |
|
for j, path in enumerate(paths, 1): |
|
print(f"{j}. {path}") |
|
|
|
keep_index = input("\nEnter the number of the file to KEEP (all others will be deleted): ") |
|
try: |
|
keep_index = int(keep_index) |
|
if 1 <= keep_index <= len(paths): |
|
files_to_delete = [path for j, path in enumerate(paths, 1) if j != keep_index] |
|
count, size = delete_files(files_to_delete) |
|
deleted_size += size |
|
except ValueError: |
|
print("Invalid input") |
|
except ValueError: |
|
print("Invalid input") |
|
|
|
if args.all or args.caches: |
|
cache_files = scan_caches() |
|
if cache_files: |
|
print("\nCache directories found:") |
|
for i, (cache_type, path, size) in enumerate(cache_files, 1): |
|
print(f"{i}. {cache_type}: {path} ({get_size_str(size)})") |
|
|
|
if args.auto: |
|
print("Auto mode enabled. Cleaning all caches.") |
|
files_to_delete = [path for _, path, _ in cache_files] |
|
count, size = delete_files(files_to_delete) |
|
deleted_size += size |
|
else: |
|
choice = input("\nDo you want to clean any cache directories? (y/n): ") |
|
if choice.lower() == 'y': |
|
indices = input("Enter the numbers of directories to clean (comma-separated, e.g., 1,3,5): ") |
|
try: |
|
indices = [int(i.strip()) for i in indices.split(',')] |
|
files_to_delete = [cache_files[i-1][1] for i in indices if 1 <= i <= len(cache_files)] |
|
count, size = delete_files(files_to_delete) |
|
deleted_size += size |
|
except ValueError: |
|
print("Invalid input") |
|
|
|
if args.all or args.downloads: |
|
old_downloads = scan_downloads() |
|
if old_downloads: |
|
print("\nOld files in Downloads folder:") |
|
for i, (path, size, days) in enumerate(old_downloads[:10], 1): |
|
print(f"{i}. {path} ({get_size_str(size)}, {days:.1f} days old)") |
|
|
|
if len(old_downloads) > 10: |
|
print(f"...and {len(old_downloads) - 10} more files") |
|
|
|
if args.auto: |
|
print("Auto mode enabled. Cleaning old downloads.") |
|
files_to_delete = [path for path, _, _ in old_downloads] |
|
count, size = delete_files(files_to_delete) |
|
deleted_size += size |
|
else: |
|
choice = input("\nDo you want to delete any of these old downloads? (y/n): ") |
|
if choice.lower() == 'y': |
|
indices = input("Enter the numbers of files to delete (comma-separated, e.g., 1,3,5): ") |
|
try: |
|
indices = [int(i.strip()) for i in indices.split(',')] |
|
files_to_delete = [old_downloads[i-1][0] for i in indices if 1 <= i <= len(old_downloads)] |
|
count, size = delete_files(files_to_delete) |
|
deleted_size += size |
|
except ValueError: |
|
print("Invalid input") |
|
|
|
if args.all or args.trash: |
|
trash_size = empty_trash() |
|
if trash_size > 0: |
|
if args.auto: |
|
print("Auto mode enabled. Emptying trash.") |
|
try: |
|
subprocess.run(['rm', '-rf', os.path.expanduser('~/.Trash/*')], check=True) |
|
print(f"Emptied trash ({get_size_str(trash_size)})") |
|
deleted_size += trash_size |
|
except subprocess.CalledProcessError: |
|
print("Error emptying trash") |
|
else: |
|
choice = input("\nDo you want to empty the trash? (y/n): ") |
|
if choice.lower() == 'y': |
|
try: |
|
subprocess.run(['rm', '-rf', os.path.expanduser('~/.Trash/*')], check=True) |
|
print(f"Emptied trash ({get_size_str(trash_size)})") |
|
deleted_size += trash_size |
|
except subprocess.CalledProcessError: |
|
print("Error emptying trash") |
|
|
|
if args.all or args.temp: |
|
temp_files = scan_temp_files() |
|
if temp_files: |
|
print("\nTemporary file directories found:") |
|
for i, (path, size) in enumerate(temp_files, 1): |
|
print(f"{i}. {path} ({get_size_str(size)})") |
|
|
|
if args.auto: |
|
print("Auto mode enabled. Cleaning temporary files.") |
|
files_to_delete = [path for path, _ in temp_files] |
|
count, size = delete_files(files_to_delete) |
|
deleted_size += size |
|
else: |
|
choice = input("\nDo you want to clean any temporary file directories? (y/n): ") |
|
if choice.lower() == 'y': |
|
indices = input("Enter the numbers of directories to clean (comma-separated, e.g., 1,3,5): ") |
|
try: |
|
indices = [int(i.strip()) for i in indices.split(',')] |
|
files_to_delete = [temp_files[i-1][0] for i in indices if 1 <= i <= len(temp_files)] |
|
count, size = delete_files(files_to_delete) |
|
deleted_size += size |
|
except ValueError: |
|
print("Invalid input") |
|
|
|
print(colorize(f"\nTotal space freed: {get_size_str(deleted_size)}", 'BOLD')) |
|
print(colorize("Cleanup completed!", 'GREEN')) |
|
|
|
if __name__ == "__main__": |
|
main() |