|
import argparse |
|
import hashlib |
|
import os |
|
import sys |
|
|
|
from typing import List |
|
|
|
|
|
def calculate_file_hash(file_path: str, algorithm: str = 'sha256') -> str: |
|
""" |
|
Calculate the hash of a file. |
|
|
|
Args: |
|
file_path (str): The path to the file. |
|
algorithm (str, optional): The hash algorithm to use. Defaults to 'sha256'. |
|
|
|
Returns: |
|
str: The hash of the file. |
|
""" |
|
hash_func = hashlib.new(algorithm) |
|
with open(file_path, 'rb') as f: |
|
while True: |
|
data = f.read(8192) |
|
if not data: |
|
break |
|
hash_func.update(data) |
|
return hash_func.hexdigest() |
|
|
|
|
|
def replace_last_substring(s: str, old: str, new: str) -> str: |
|
""" |
|
Replace the last occurrence of a substring in a string. |
|
|
|
Args: |
|
s (str): The original string. |
|
old (str): The substring to replace. |
|
new (str): The string to replace the substring with. |
|
|
|
Returns: |
|
str: The string with the last occurrence of the substring replaced. |
|
""" |
|
# rsplit splits the string into a list, starting from the right |
|
# The second argument '1' tells it to split only once |
|
parts = s.rsplit(old, 1) |
|
# join the parts back together with the new character |
|
return new.join(parts) |
|
|
|
|
|
def rename_files_in_directory(target_path: str, preview: bool = False) -> None: |
|
""" |
|
Rename files in a directory and its subdirectories. |
|
|
|
Args: |
|
target_path (str): The path to the directory. |
|
preview (bool, optional): Whether to only preview the changes. Defaults to False. |
|
""" |
|
for root, dirs, files in os.walk(target_path): |
|
for file in files: |
|
old_path = os.path.join(root, file) |
|
if file.startswith('.') or not os.path.isfile(old_path): |
|
continue |
|
|
|
new_root = replace_last_substring(root, "/", " ") |
|
new_path = os.path.join(new_root, file) |
|
|
|
if new_path.startswith('. /'): |
|
# file is in root so rename would break path |
|
continue |
|
|
|
if preview: |
|
print(f"Preview: Rename {old_path} to {new_path}") |
|
else: |
|
try: |
|
os.makedirs(new_root, exist_ok=True) # create the new directory if it doesn't exist |
|
os.rename(old_path, new_path) |
|
print(f"Renamed {old_path} to {new_path}") |
|
except OSError as e: |
|
print(f"Error renaming {old_path} to {new_path}: {e}") |
|
|
|
|
|
def delete_empty_directories(target_path: str, preview: bool = False) -> None: |
|
""" |
|
Delete empty directories in a directory and its subdirectories. |
|
|
|
Args: |
|
target_path (str): The path to the directory. |
|
preview (bool, optional): Whether to only preview the changes. Defaults to False. |
|
""" |
|
for root, dirs, files in os.walk(target_path, topdown=False): |
|
for directory in dirs: |
|
dir_path = os.path.join(root, directory) |
|
if not os.listdir(dir_path) or (len(os.listdir(dir_path)) == 1 and '.DS_Store' in os.listdir(dir_path)): |
|
if preview: |
|
print(f"Preview: Delete empty directory: {dir_path}") |
|
else: |
|
try: |
|
# If .DS_Store file exists, delete it |
|
ds_store_path = os.path.join(dir_path, '.DS_Store') |
|
if os.path.exists(ds_store_path): |
|
os.remove(ds_store_path) |
|
print(f"Deleted: {ds_store_path}") |
|
os.rmdir(dir_path) |
|
print(f"Deleted empty directory: {dir_path}") |
|
except OSError as e: |
|
print(f"Error deleting {dir_path}: {e}") |
|
|
|
|
|
def delete_duplicate_files(target_path: str, preview: bool = False) -> None: |
|
""" |
|
Find and delete duplicate files in a directory and its subdirectories. |
|
|
|
Args: |
|
target_path (str): The path to the directory. |
|
preview (bool, optional): Whether to only preview the changes. Defaults to False. |
|
""" |
|
unique_file_hashes = {} |
|
duplicate_lists = {} |
|
for path, names, filenames in os.walk(target_path): |
|
for filename in filenames: |
|
file_path = os.path.join(path, filename) |
|
file_hash = calculate_file_hash(file_path) |
|
if file_hash in unique_file_hashes: |
|
# already saw this hash, adding the newfound path as a potential duplicate |
|
if file_hash in duplicate_lists: |
|
duplicate_lists[file_hash].append(file_path) |
|
|
|
# since key exists here, original is already in the list |
|
else: |
|
duplicate_lists[file_hash] = [file_path] |
|
|
|
# however, here we need to also add first file with that hash as a candidate for removal |
|
# we will let user choose one later |
|
duplicate_lists[file_hash].append(unique_file_hashes[file_hash]) |
|
else: |
|
# encountering this hash for the first time |
|
unique_file_hashes[file_hash] = file_path |
|
|
|
for _, duplicates in duplicate_lists.items(): |
|
file_to_keep = ask_user_to_choose_file(list(duplicates)) |
|
files_to_delete = [file for file in duplicates if file != file_to_keep] |
|
for file_to_delete in files_to_delete: |
|
if preview: |
|
print(f"Would delete: {file_to_delete}") |
|
else: |
|
os.remove(file_to_delete) |
|
print(f"Deleted: {file_to_delete}") |
|
|
|
|
|
def ask_user_to_choose_file(files: List[str]) -> str: |
|
""" |
|
Ask the user to choose a file to keep. |
|
|
|
Args: |
|
files (List[str]): A list of file paths. |
|
|
|
Returns: |
|
str: The file path chosen by the user. |
|
""" |
|
files.sort() # sort the file paths alphabetically |
|
shortest_file = min(files, key=len) # find the shortest file path |
|
default_choice = files.index(shortest_file) + 1 # get the number of the shortest file path |
|
while True: |
|
for i, file in enumerate(files, start=1): |
|
print(f"{i}: {file}") |
|
choice = input(f"Which file do you want to keep? (1-{len(files)}, default is {default_choice}): ") |
|
if not choice: # if the user presses enter with no input |
|
return shortest_file |
|
elif choice.isdigit() and 1 <= int(choice) <= len(files): |
|
return files[int(choice) - 1] |
|
else: |
|
print( |
|
f"Invalid input. Please enter a number between 1 and {len(files)} or press enter to choose the default.") |
|
|
|
|
|
# Check if the folder path is provided as a command line argument |
|
if len(sys.argv) < 2: |
|
print("Usage: python script.py folder_path") |
|
sys.exit(1) |
|
|
|
# Get the folder path from the command line argument |
|
folder_path = sys.argv[1] |
|
|
|
# Check if the specified directory exists |
|
if not os.path.isdir(folder_path): |
|
print("Error: The specified directory does not exist.") |
|
sys.exit(1) |
|
|
|
|
|
def main(): |
|
# Parse command line arguments |
|
parser = argparse.ArgumentParser(description='Rename files, prune duplicates, and delete empty directories.') |
|
parser.add_argument('folder_path', help='the path to the folder to process') |
|
args = parser.parse_args() |
|
|
|
# Get the folder path from the command line argument |
|
target_path = args.folder_path |
|
|
|
# Check if the specified directory exists |
|
if not os.path.isdir(target_path): |
|
print("Error: The specified directory does not exist.") |
|
sys.exit(1) |
|
|
|
# Enable preview mode |
|
preview_mode = True |
|
|
|
# Rename files (preview mode) |
|
# rename_files_in_directory(target_path, preview=preview_mode) |
|
|
|
# Prune duplicates (preview mode) |
|
delete_duplicate_files(target_path, preview=preview_mode) |
|
|
|
# Delete empty directories (preview mode) |
|
delete_empty_directories(target_path, preview=preview_mode) |
|
|
|
while True: |
|
# Prompt for confirmation before making changes |
|
confirm_changes = input("Do you want to apply the changes? (yes/no): ") |
|
|
|
if confirm_changes.lower() == 'yes': |
|
# Rename files |
|
# rename_files_in_directory(target_path, preview=False) |
|
|
|
# Prune duplicates |
|
delete_duplicate_files(target_path, preview=False) |
|
|
|
# Delete empty directories |
|
delete_empty_directories(target_path, preview=False) |
|
|
|
break |
|
elif confirm_changes.lower() == 'no': |
|
print("No changes will be applied.") |
|
break |
|
else: |
|
continue |
|
|
|
|
|
if __name__ == '__main__': |
|
main() |