Skip to content

Instantly share code, notes, and snippets.

@twobob
Created July 22, 2023 23:09
Show Gist options
  • Save twobob/f8f47f27bc05842b62bf59f6f5e85d40 to your computer and use it in GitHub Desktop.
Save twobob/f8f47f27bc05842b62bf59f6f5e85d40 to your computer and use it in GitHub Desktop.
'Delete all subdirectories in the specified directory, except for the largest n subdirectories
import os
import heapq
import shutil
import argparse
def get_directory_size(directory):
"""
Calculate the size of the directory.
Parameters:
directory (str): Path of the directory.
Returns:
int: Total size of the directory in bytes.
"""
total_size = 0
for dirpath, _, filenames in os.walk(directory):
for f in filenames:
file_path = os.path.join(dirpath, f)
total_size += os.path.getsize(file_path)
return total_size
def delete_subdirectories(directory, num_to_keep=500):
"""
Delete all subdirectories in the given directory, keeping only the largest
'num_to_keep' directories based on size.
Parameters:
directory (str): Path of the directory.
num_to_keep (int): Number of largest directories to retain.
Returns:
None
"""
directory_sizes = []
for root, _, _ in os.walk(directory):
if 'tagged' in root.split(os.path.sep):
continue # Skip the 'tagged' subdirectory
size = get_directory_size(root)
directory_sizes.append((size, root))
# Get the 'num_to_keep' largest directories based on their sizes
largest_directories = heapq.nlargest(num_to_keep, directory_sizes)
# Get the list of directories to delete
directories_to_delete = [directory for _, directory in directory_sizes if directory not in [d for _, d in largest_directories]]
# Confirmation before deleting directories
num_directories_to_delete = len(directories_to_delete)
num_largest_directories = len(largest_directories)
print(f"Total subdirectories: {num_directories_to_delete + num_largest_directories}")
print(f"Top {num_to_keep} subdirectories to keep: {num_largest_directories}")
print(f"Subdirectories to be deleted: {num_directories_to_delete}")
user_confirmation = input("Do you want to proceed with the deletion? (y/n): ")
if user_confirmation.lower() == 'y':
# Delete all subdirectories that are not in the list
for root, dirs, _ in os.walk(directory, topdown=True):
for directory in dirs:
directory_path = os.path.join(root, directory)
if directory_path in directories_to_delete and directory != 'tagged':
print(f"Deleting directory: {directory_path}")
shutil.rmtree(directory_path)
else:
print(f"Directory not in the deletion list: {directory_path}")
print("Deletion completed.")
else:
print("Deletion cancelled.")
def main():
parser = argparse.ArgumentParser(description='Delete all subdirectories in the specified directory, except for the largest n subdirectories.',
usage='python delete_dirs.py /path/to/directory --num_to_keep 250')
parser.add_argument('dir', type=str, help='Path to the directory where subdirectories are to be deleted')
parser.add_argument('--num_to_keep', type=int, default=500, help='Number of largest subdirectories to keep (default: 500)')
args = parser.parse_args()
delete_subdirectories(args.dir, args.num_to_keep)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment