Skip to content

Instantly share code, notes, and snippets.

@alisterburt
Last active October 15, 2024 18:41
Show Gist options
  • Save alisterburt/4060daf7864f591e4aafc6e8dece42c8 to your computer and use it in GitHub Desktop.
Save alisterburt/4060daf7864f591e4aafc6e8dece42c8 to your computer and use it in GitHub Desktop.
find relion directories
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
def find_relion_dirs(root_dir):
"""
Recursively walks through the directory and finds all directories that contain a 'Refine3D' or 'Class3D' subdirectory.
Args:
root_dir (str): The root directory to start the search.
Returns:
List of directories that contain a 'Refine3D' subdirectory.
"""
refine3d_dirs = []
# Walk through the root directory
for current_dir, subdirs, _ in os.walk(root_dir):
if 'Refine3D' in subdirs or 'Class3D' in subdirs:
refine3d_dirs.append(current_dir)
# only keep the highest level dirs, subdirs are trash etc
refine3d_dirs_pruned = prune_directories(refine3d_dirs)
return refine3d_dirs_pruned
def prune_directories(dir_list):
# Sort directories by length to ensure higher-level directories appear first
dir_list.sort(key=lambda x: x.count('/'))
pruned_list = []
for directory in dir_list:
# Check if the current directory is a subdirectory of any directory in the pruned list
if not any(directory.startswith(pruned_dir + '/') for pruned_dir in pruned_list):
pruned_list.append(directory)
return pruned_list
def get_directory_size(directory):
total_size = 0
for dirpath, dirnames, filenames in os.walk(directory):
for f in filenames:
fp = os.path.join(dirpath, f)
# Skip if it is a symbolic link
if not os.path.islink(fp):
total_size += os.path.getsize(fp)
return total_size
root_directory = Path()
directories = [
directory
for directory
in root_directory.glob('*')
if directory.is_dir()
]
results = []
# Use ThreadPoolExecutor to map the function in parallel
with ThreadPoolExecutor(max_workers=8) as executor:
relion_directories = []
# Submit tasks to the pool
future_to_directory = {executor.submit(find_relion_dirs, directory): directory for directory in directories}
total_size_bytes = 0
# Get directory size as they are completed
for future in as_completed(future_to_directory):
directory = future_to_directory[future]
new_directories = future.result()
print(f'found {len(new_directories)} RELION directories for {directory}')
relion_dirs_size = sum(get_directory_size(directory) for directory in new_directories)
total_size_bytes += relion_dirs_size
print(f'total size of RELION dirs for {directory} is {relion_dirs_size / (1024 ** 3):.2f}GiB')
print(f'total up to now: {total_size_bytes / (1024 ** 3):.2f}GiB')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment