Skip to content

Instantly share code, notes, and snippets.

@iamEtornam
Created May 17, 2025 08:15
Show Gist options
  • Save iamEtornam/6d15d32404aff2df25585237aa255e96 to your computer and use it in GitHub Desktop.
Save iamEtornam/6d15d32404aff2df25585237aa255e96 to your computer and use it in GitHub Desktop.
A comprehensive Python script that can help you identify and delete unwanted files taking up space on your MacBook.

I've created a comprehensive Python script that can help you identify and delete unwanted files taking up space on your MacBook. This script includes several features to identify different types of space-wasting files:

  • Large files - Finds files over a specified size (default 100MB)
  • Duplicate files - Identifies identical files based on content hash
  • Cache files - Locates browser caches and application caches
  • Downloads folder - Finds old files in your Downloads folder
  • Trash - Checks and empties your trash
  • Temporary files - Locates temp files and logs

How to use the script:

  • Save the script to your computer (e.g., cleanup.py)
  • Make it executable: chmod +x cleanup.py
  • Run it with options to specify what you want to scan:
Scan for all file types
./cleanup.py --all

# Scan only for specific file types
./cleanup.py --large --dupes

# Scan for large files over 500MB
./cleanup.py --large --min-size 500

# Run in automatic mode (no prompts)
./cleanup.py --all --auto

The script will show you what it finds and ask for confirmation before deleting anything (unless you use --auto). This way, you can review files before removing them.

#!/usr/bin/env python3
"""
MacBook Storage Cleanup Script
This script scans your Mac for potentially unwanted files that consume disk space
and allows you to delete them. It focuses on common space-wasters like:
- Large files
- Duplicate files
- Cache files
- Downloads folder items
- Trash items
- Application logs
- Temporary files
IMPORTANT: Please review files before deletion and back up important data.
"""
import os
import shutil
import subprocess
import hashlib
import time
import sys
from datetime import datetime
from pathlib import Path
import argparse
# ANSI color codes for better readability
COLORS = {
'RED': '\033[91m',
'GREEN': '\033[92m',
'YELLOW': '\033[93m',
'BLUE': '\033[94m',
'MAGENTA': '\033[95m',
'CYAN': '\033[96m',
'BOLD': '\033[1m',
'END': '\033[0m'
}
def colorize(text, color):
"""Add color to terminal text."""
return f"{COLORS[color]}{text}{COLORS['END']}"
def get_size_str(size_bytes):
"""Convert bytes to a human-readable format."""
if size_bytes < 1024:
return f"{size_bytes} B"
elif size_bytes < 1024 * 1024:
return f"{size_bytes/1024:.2f} KB"
elif size_bytes < 1024 * 1024 * 1024:
return f"{size_bytes/(1024*1024):.2f} MB"
else:
return f"{size_bytes/(1024*1024*1024):.2f} GB"
def get_file_hash(file_path, block_size=65536):
"""Get the MD5 hash of a file for duplicate detection."""
md5 = hashlib.md5()
try:
with open(file_path, 'rb') as f:
for block in iter(lambda: f.read(block_size), b''):
md5.update(block)
return md5.hexdigest()
except (PermissionError, FileNotFoundError, IsADirectoryError):
return None
def scan_large_files(min_size_mb=100, paths=None):
"""Find large files on the system."""
if paths is None:
paths = [os.path.expanduser('~')]
large_files = []
total_size = 0
print(colorize("\nScanning for large files (this may take a while)...", 'BOLD'))
for base_path in paths:
for root, _, files in os.walk(base_path):
# Skip system directories and hidden folders
if any(p.startswith('.') for p in root.split(os.sep)) and not root == base_path:
continue
for file in files:
try:
file_path = os.path.join(root, file)
if os.path.islink(file_path):
continue
file_size = os.path.getsize(file_path)
# Check if file is larger than the minimum size
if file_size > min_size_mb * 1024 * 1024:
large_files.append((file_path, file_size))
total_size += file_size
except (PermissionError, FileNotFoundError):
continue
# Sort by size (largest first)
large_files.sort(key=lambda x: x[1], reverse=True)
print(colorize(f"Found {len(large_files)} files larger than {min_size_mb} MB", 'GREEN'))
print(f"Total size: {get_size_str(total_size)}")
return large_files
def find_duplicate_files(paths=None):
"""Find duplicate files based on file hash."""
if paths is None:
paths = [os.path.expanduser('~')]
hash_dict = {}
duplicates = []
total_size = 0
print(colorize("\nScanning for duplicate files (this may take a while)...", 'BOLD'))
for base_path in paths:
for root, _, files in os.walk(base_path):
# Skip system directories and hidden folders
if any(p.startswith('.') for p in root.split(os.sep)) and not root == base_path:
continue
for file in files:
try:
file_path = os.path.join(root, file)
if os.path.islink(file_path):
continue
file_size = os.path.getsize(file_path)
# Skip small files to improve performance
if file_size < 1024 * 1024: # 1 MB
continue
file_hash = get_file_hash(file_path)
if file_hash:
if file_hash in hash_dict:
if file_path not in hash_dict[file_hash][1]:
hash_dict[file_hash][1].append(file_path)
hash_dict[file_hash][0] += file_size
total_size += file_size
else:
hash_dict[file_hash] = [file_size, [file_path]]
except (PermissionError, FileNotFoundError):
continue
# Extract only duplicates
for file_hash, (size, paths) in hash_dict.items():
if len(paths) > 1:
duplicates.append((paths, size))
# Sort by size (largest first)
duplicates.sort(key=lambda x: x[1], reverse=True)
print(colorize(f"Found {len(duplicates)} sets of duplicate files", 'GREEN'))
print(f"Potential space savings: {get_size_str(total_size)}")
return duplicates
def scan_caches():
"""Scan for cache directories that can be safely cleaned."""
cache_paths = [
('Browser Caches', os.path.expanduser('~/Library/Caches/Google/Chrome')),
('Browser Caches', os.path.expanduser('~/Library/Caches/com.apple.Safari')),
('Browser Caches', os.path.expanduser('~/Library/Caches/Firefox')),
('Application Caches', os.path.expanduser('~/Library/Caches')),
('System Caches', '/Library/Caches'),
('XCode Derived Data', os.path.expanduser('~/Library/Developer/Xcode/DerivedData')),
('XCode Archives', os.path.expanduser('~/Library/Developer/Xcode/Archives')),
('iOS Device Backups', os.path.expanduser('~/Library/Application Support/MobileSync/Backup')),
]
cache_files = []
total_size = 0
print(colorize("\nScanning cache directories...", 'BOLD'))
for cache_type, path in cache_paths:
try:
if os.path.exists(path):
dir_size = get_directory_size(path)
if dir_size > 0:
cache_files.append((cache_type, path, dir_size))
total_size += dir_size
except (PermissionError, FileNotFoundError):
continue
# Sort by size (largest first)
cache_files.sort(key=lambda x: x[2], reverse=True)
print(colorize(f"Found {len(cache_files)} cache directories", 'GREEN'))
print(f"Total size: {get_size_str(total_size)}")
return cache_files
def get_directory_size(path):
"""Calculate the total size of a directory."""
total_size = 0
try:
with os.scandir(path) as it:
for entry in it:
try:
if entry.is_file():
total_size += entry.stat().st_size
elif entry.is_dir():
total_size += get_directory_size(entry.path)
except (PermissionError, FileNotFoundError):
continue
except (PermissionError, FileNotFoundError):
pass
return total_size
def scan_downloads():
"""Scan the Downloads folder for old files."""
downloads_path = os.path.expanduser('~/Downloads')
old_files = []
total_size = 0
print(colorize("\nScanning Downloads folder...", 'BOLD'))
if not os.path.exists(downloads_path):
print("Downloads folder not found")
return old_files
# Current time
now = time.time()
# 30 days in seconds
thirty_days = 30 * 24 * 60 * 60
try:
for item in os.listdir(downloads_path):
try:
item_path = os.path.join(downloads_path, item)
# Skip if it's a symbolic link
if os.path.islink(item_path):
continue
# Get the last access time
last_access = os.path.getatime(item_path)
days_old = (now - last_access) / (24 * 60 * 60)
# If the file is older than 30 days
if now - last_access > thirty_days:
if os.path.isfile(item_path):
file_size = os.path.getsize(item_path)
old_files.append((item_path, file_size, days_old))
total_size += file_size
elif os.path.isdir(item_path):
dir_size = get_directory_size(item_path)
old_files.append((item_path, dir_size, days_old))
total_size += dir_size
except (PermissionError, FileNotFoundError):
continue
except (PermissionError, FileNotFoundError):
print("Could not access Downloads folder")
return old_files
# Sort by size (largest first)
old_files.sort(key=lambda x: x[1], reverse=True)
print(colorize(f"Found {len(old_files)} files older than 30 days", 'GREEN'))
print(f"Total size: {get_size_str(total_size)}")
return old_files
def empty_trash():
"""Empty the Trash."""
print(colorize("\nChecking Trash...", 'BOLD'))
trash_path = os.path.expanduser('~/.Trash')
if not os.path.exists(trash_path):
print("Trash folder not found")
return 0
trash_size = get_directory_size(trash_path)
print(f"Trash size: {get_size_str(trash_size)}")
return trash_size
def scan_temp_files():
"""Scan for temporary files."""
temp_paths = [
'/tmp',
os.path.expanduser('~/Library/Logs'),
os.path.expanduser('~/Library/Application Support/CrashReporter'),
]
temp_files = []
total_size = 0
print(colorize("\nScanning for temporary files...", 'BOLD'))
for path in temp_paths:
try:
if os.path.exists(path):
dir_size = get_directory_size(path)
if dir_size > 0:
temp_files.append((path, dir_size))
total_size += dir_size
except (PermissionError, FileNotFoundError):
continue
# Sort by size (largest first)
temp_files.sort(key=lambda x: x[1], reverse=True)
print(colorize(f"Found {len(temp_files)} temporary file directories", 'GREEN'))
print(f"Total size: {get_size_str(total_size)}")
return temp_files
def delete_files(files_to_delete):
"""Delete the selected files."""
deleted_size = 0
deleted_count = 0
for file_path in files_to_delete:
try:
if os.path.isfile(file_path):
file_size = os.path.getsize(file_path)
os.remove(file_path)
deleted_size += file_size
deleted_count += 1
print(f"Deleted: {file_path} ({get_size_str(file_size)})")
elif os.path.isdir(file_path):
dir_size = get_directory_size(file_path)
shutil.rmtree(file_path)
deleted_size += dir_size
deleted_count += 1
print(f"Deleted directory: {file_path} ({get_size_str(dir_size)})")
except (PermissionError, FileNotFoundError) as e:
print(f"Error deleting {file_path}: {e}")
return deleted_count, deleted_size
def main():
"""Main function to run the cleanup script."""
parser = argparse.ArgumentParser(description='MacBook Storage Cleanup Script')
parser.add_argument('--large', action='store_true', help='Scan for large files')
parser.add_argument('--dupes', action='store_true', help='Scan for duplicate files')
parser.add_argument('--caches', action='store_true', help='Clean cache files')
parser.add_argument('--downloads', action='store_true', help='Clean old downloads')
parser.add_argument('--trash', action='store_true', help='Empty trash')
parser.add_argument('--temp', action='store_true', help='Clean temporary files')
parser.add_argument('--all', action='store_true', help='Run all cleanup options')
parser.add_argument('--auto', action='store_true', help='Run in automatic mode (no prompts)')
parser.add_argument('--min-size', type=int, default=100, help='Minimum file size in MB for large file scan (default: 100)')
args = parser.parse_args()
# If no options are specified, show help
if not any(vars(args).values()):
parser.print_help()
return
print(colorize("\n===== MacBook Storage Cleanup Script =====", 'BOLD'))
print(colorize("WARNING: Always back up important data before deleting files!", 'RED'))
deleted_size = 0
# Run selected scans
if args.all or args.large:
large_files = scan_large_files(min_size_mb=args.min_size)
if large_files:
print("\nLarge files found:")
for i, (file_path, size) in enumerate(large_files[:10], 1):
print(f"{i}. {file_path} ({get_size_str(size)})")
if len(large_files) > 10:
print(f"...and {len(large_files) - 10} more files")
if args.auto:
print("Auto mode enabled. Skipping deletion.")
else:
choice = input("\nDo you want to delete any of these files? (y/n): ")
if choice.lower() == 'y':
indices = input("Enter the numbers of files to delete (comma-separated, e.g., 1,3,5): ")
try:
indices = [int(i.strip()) for i in indices.split(',')]
files_to_delete = [large_files[i-1][0] for i in indices if 1 <= i <= len(large_files)]
count, size = delete_files(files_to_delete)
deleted_size += size
except ValueError:
print("Invalid input")
if args.all or args.dupes:
duplicates = find_duplicate_files()
if duplicates:
print("\nDuplicate files found:")
for i, (paths, size) in enumerate(duplicates[:5], 1):
print(f"{i}. {len(paths)} copies, {get_size_str(size)} per file")
for j, path in enumerate(paths[:2], 1):
print(f" {j}. {path}")
if len(paths) > 2:
print(f" ...and {len(paths) - 2} more copies")
if len(duplicates) > 5:
print(f"...and {len(duplicates) - 5} more duplicate sets")
if args.auto:
print("Auto mode enabled. Skipping deletion.")
else:
choice = input("\nDo you want to view and delete any duplicate sets? (y/n): ")
if choice.lower() == 'y':
set_index = input("Enter the set number to view: ")
try:
set_index = int(set_index)
if 1 <= set_index <= len(duplicates):
paths, size = duplicates[set_index-1]
print(f"\nAll duplicates in set {set_index} (each {get_size_str(size)}):")
for j, path in enumerate(paths, 1):
print(f"{j}. {path}")
keep_index = input("\nEnter the number of the file to KEEP (all others will be deleted): ")
try:
keep_index = int(keep_index)
if 1 <= keep_index <= len(paths):
files_to_delete = [path for j, path in enumerate(paths, 1) if j != keep_index]
count, size = delete_files(files_to_delete)
deleted_size += size
except ValueError:
print("Invalid input")
except ValueError:
print("Invalid input")
if args.all or args.caches:
cache_files = scan_caches()
if cache_files:
print("\nCache directories found:")
for i, (cache_type, path, size) in enumerate(cache_files, 1):
print(f"{i}. {cache_type}: {path} ({get_size_str(size)})")
if args.auto:
print("Auto mode enabled. Cleaning all caches.")
files_to_delete = [path for _, path, _ in cache_files]
count, size = delete_files(files_to_delete)
deleted_size += size
else:
choice = input("\nDo you want to clean any cache directories? (y/n): ")
if choice.lower() == 'y':
indices = input("Enter the numbers of directories to clean (comma-separated, e.g., 1,3,5): ")
try:
indices = [int(i.strip()) for i in indices.split(',')]
files_to_delete = [cache_files[i-1][1] for i in indices if 1 <= i <= len(cache_files)]
count, size = delete_files(files_to_delete)
deleted_size += size
except ValueError:
print("Invalid input")
if args.all or args.downloads:
old_downloads = scan_downloads()
if old_downloads:
print("\nOld files in Downloads folder:")
for i, (path, size, days) in enumerate(old_downloads[:10], 1):
print(f"{i}. {path} ({get_size_str(size)}, {days:.1f} days old)")
if len(old_downloads) > 10:
print(f"...and {len(old_downloads) - 10} more files")
if args.auto:
print("Auto mode enabled. Cleaning old downloads.")
files_to_delete = [path for path, _, _ in old_downloads]
count, size = delete_files(files_to_delete)
deleted_size += size
else:
choice = input("\nDo you want to delete any of these old downloads? (y/n): ")
if choice.lower() == 'y':
indices = input("Enter the numbers of files to delete (comma-separated, e.g., 1,3,5): ")
try:
indices = [int(i.strip()) for i in indices.split(',')]
files_to_delete = [old_downloads[i-1][0] for i in indices if 1 <= i <= len(old_downloads)]
count, size = delete_files(files_to_delete)
deleted_size += size
except ValueError:
print("Invalid input")
if args.all or args.trash:
trash_size = empty_trash()
if trash_size > 0:
if args.auto:
print("Auto mode enabled. Emptying trash.")
try:
subprocess.run(['rm', '-rf', os.path.expanduser('~/.Trash/*')], check=True)
print(f"Emptied trash ({get_size_str(trash_size)})")
deleted_size += trash_size
except subprocess.CalledProcessError:
print("Error emptying trash")
else:
choice = input("\nDo you want to empty the trash? (y/n): ")
if choice.lower() == 'y':
try:
subprocess.run(['rm', '-rf', os.path.expanduser('~/.Trash/*')], check=True)
print(f"Emptied trash ({get_size_str(trash_size)})")
deleted_size += trash_size
except subprocess.CalledProcessError:
print("Error emptying trash")
if args.all or args.temp:
temp_files = scan_temp_files()
if temp_files:
print("\nTemporary file directories found:")
for i, (path, size) in enumerate(temp_files, 1):
print(f"{i}. {path} ({get_size_str(size)})")
if args.auto:
print("Auto mode enabled. Cleaning temporary files.")
files_to_delete = [path for path, _ in temp_files]
count, size = delete_files(files_to_delete)
deleted_size += size
else:
choice = input("\nDo you want to clean any temporary file directories? (y/n): ")
if choice.lower() == 'y':
indices = input("Enter the numbers of directories to clean (comma-separated, e.g., 1,3,5): ")
try:
indices = [int(i.strip()) for i in indices.split(',')]
files_to_delete = [temp_files[i-1][0] for i in indices if 1 <= i <= len(temp_files)]
count, size = delete_files(files_to_delete)
deleted_size += size
except ValueError:
print("Invalid input")
print(colorize(f"\nTotal space freed: {get_size_str(deleted_size)}", 'BOLD'))
print(colorize("Cleanup completed!", 'GREEN'))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment