Last active
October 15, 2024 18:41
-
-
Save alisterburt/4060daf7864f591e4aafc6e8dece42c8 to your computer and use it in GitHub Desktop.
find relion directories
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from concurrent.futures import ThreadPoolExecutor, as_completed | |
from pathlib import Path | |
def find_relion_dirs(root_dir): | |
""" | |
Recursively walks through the directory and finds all directories that contain a 'Refine3D' or 'Class3D' subdirectory. | |
Args: | |
root_dir (str): The root directory to start the search. | |
Returns: | |
List of directories that contain a 'Refine3D' subdirectory. | |
""" | |
refine3d_dirs = [] | |
# Walk through the root directory | |
for current_dir, subdirs, _ in os.walk(root_dir): | |
if 'Refine3D' in subdirs or 'Class3D' in subdirs: | |
refine3d_dirs.append(current_dir) | |
# only keep the highest level dirs, subdirs are trash etc | |
refine3d_dirs_pruned = prune_directories(refine3d_dirs) | |
return refine3d_dirs_pruned | |
def prune_directories(dir_list): | |
# Sort directories by length to ensure higher-level directories appear first | |
dir_list.sort(key=lambda x: x.count('/')) | |
pruned_list = [] | |
for directory in dir_list: | |
# Check if the current directory is a subdirectory of any directory in the pruned list | |
if not any(directory.startswith(pruned_dir + '/') for pruned_dir in pruned_list): | |
pruned_list.append(directory) | |
return pruned_list | |
def get_directory_size(directory): | |
total_size = 0 | |
for dirpath, dirnames, filenames in os.walk(directory): | |
for f in filenames: | |
fp = os.path.join(dirpath, f) | |
# Skip if it is a symbolic link | |
if not os.path.islink(fp): | |
total_size += os.path.getsize(fp) | |
return total_size | |
root_directory = Path() | |
directories = [ | |
directory | |
for directory | |
in root_directory.glob('*') | |
if directory.is_dir() | |
] | |
results = [] | |
# Use ThreadPoolExecutor to map the function in parallel | |
with ThreadPoolExecutor(max_workers=8) as executor: | |
relion_directories = [] | |
# Submit tasks to the pool | |
future_to_directory = {executor.submit(find_relion_dirs, directory): directory for directory in directories} | |
total_size_bytes = 0 | |
# Get directory size as they are completed | |
for future in as_completed(future_to_directory): | |
directory = future_to_directory[future] | |
new_directories = future.result() | |
print(f'found {len(new_directories)} RELION directories for {directory}') | |
relion_dirs_size = sum(get_directory_size(directory) for directory in new_directories) | |
total_size_bytes += relion_dirs_size | |
print(f'total size of RELION dirs for {directory} is {relion_dirs_size / (1024 ** 3):.2f}GiB') | |
print(f'total up to now: {total_size_bytes / (1024 ** 3):.2f}GiB') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment