Last active
January 20, 2025 18:03
-
-
Save shahpnmlab/7bcbbf4078c4625db7d1073dd1e615fd to your computer and use it in GitHub Desktop.
summarise m refinement progress
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import typer | |
import xml.etree.ElementTree as ET | |
from pathlib import Path | |
from typing import Dict, List, Optional, Tuple | |
from dataclasses import dataclass | |
from rich.console import Console | |
from rich.table import Table | |
from collections import defaultdict | |
app = typer.Typer() | |
console = Console() | |
@dataclass | |
class SpeciesData: | |
species_dir: str | |
file_path: Path | |
guid: str | |
version: str | |
global_resolution: float | |
global_bfactor: float | |
previous_version: Optional[str] | |
iteration: int | |
def find_species_files(directory: Path) -> Dict[str, List[Path]]: | |
"""Recursively find all .species files grouped by species directory.""" | |
species_files = defaultdict(list) | |
species_dir = directory / 'species' | |
if not species_dir.exists(): | |
console.print(f"[yellow]Warning: Species directory not found at {species_dir}[/yellow]") | |
return species_files | |
try: | |
for species_path in species_dir.iterdir(): | |
if species_path.is_dir(): | |
for species_file in species_path.rglob("*.species"): | |
species_files[species_path.name].append(species_file) | |
except Exception as e: | |
console.print(f"[yellow]Warning: Error searching directory {directory}: {str(e)}[/yellow]") | |
return species_files | |
def parse_species_file(file_path: Path) -> Dict: | |
"""Parse a species XML file and extract relevant parameters.""" | |
tree = ET.parse(file_path) | |
root = tree.getroot() | |
params = {} | |
for param in root.findall('Param'): | |
params[param.get('Name')] = param.get('Value') | |
return params | |
def build_version_chain(version_map: Dict[str, SpeciesData]) -> List[str]: | |
"""Build a chronological chain of versions starting from the latest.""" | |
# Find versions that aren't referenced as previous versions (these are the latest versions) | |
all_previous = {data.previous_version for data in version_map.values() if data.previous_version} | |
latest_versions = [ver for ver in version_map.keys() if ver not in all_previous] | |
chains = [] | |
for latest in latest_versions: | |
chain = [] | |
current = latest | |
while current: | |
chain.append(current) | |
current = version_map[current].previous_version | |
chains.append(chain) | |
# Return the longest chain (main development line) | |
return max(chains, key=len) if chains else [] | |
def build_species_chain(species_dir: str, species_files: List[Path]) -> Tuple[List[SpeciesData], List[str]]: | |
"""Build chains for all versions of a species and return chronological version order.""" | |
version_map = {} | |
# First pass: create SpeciesData objects | |
for file_path in species_files: | |
try: | |
params = parse_species_file(file_path) | |
version = params['Version'] | |
species_data = SpeciesData( | |
species_dir=species_dir, | |
file_path=file_path, | |
guid=params['GUID'], | |
version=version, | |
global_resolution=float(params['GlobalResolution']), | |
global_bfactor=float(params['GlobalBFactor']), | |
previous_version=params.get('PreviousVersion'), | |
iteration=1 | |
) | |
version_map[version] = species_data | |
except Exception as e: | |
console.print(f"[red]Error processing file {file_path}: {str(e)}[/red]") | |
# Build chronological chain | |
chronological_versions = build_version_chain(version_map) | |
# Convert to list of SpeciesData | |
chain_data = [version_map[version] for version in chronological_versions] | |
# Update iterations | |
for i, data in enumerate(chain_data): | |
data.iteration = len(chain_data) - i | |
return chain_data, chronological_versions | |
def create_summary_table(species_data_map: Dict[str, Tuple[List[SpeciesData], List[str]]]) -> Table: | |
"""Create a rich table summarizing species data grouped by species directory.""" | |
table = Table(title="Species Analysis Summary") | |
table.add_column("Species Directory", style="cyan") | |
table.add_column("Version Count") | |
table.add_column("Best Resolution", justify="right") | |
table.add_column("Current Resolution", justify="right") | |
table.add_column("Worst Resolution", justify="right") | |
table.add_column("Latest B-Factor", justify="right") | |
table.add_column("Versions (Latest → First)", style="dim") | |
for species_dir, (data_list, versions) in sorted(species_data_map.items()): | |
if not data_list: | |
continue | |
resolutions = [d.global_resolution for d in data_list] | |
best_res = min(resolutions) | |
worst_res = max(resolutions) | |
# Current (latest) resolution | |
current_res = data_list[0].global_resolution if data_list else 0 | |
# Latest B-factor (first in chronological list) | |
latest_bfactor = data_list[0].global_bfactor if data_list else 0 | |
# Join versions in chronological order | |
versions_str = " → ".join(versions) | |
table.add_row( | |
species_dir, | |
str(len(data_list)), | |
f"{best_res:.6f}", | |
f"{current_res:.6f}", | |
f"{worst_res:.6f}", | |
f"{latest_bfactor:.0f}", | |
versions_str | |
) | |
return table | |
def save_detailed_table(species_data_map: Dict[str, Tuple[List[SpeciesData], List[str]]], output_path: Path): | |
"""Save detailed information grouped by species directory.""" | |
headers = ["Species_Directory", "Iteration", "Version", "Resolution", "B_Factor", "Previous_Version", "File_Path"] | |
with output_path.open('w') as f: | |
f.write('\t'.join(headers) + '\n') | |
for species_dir, (data_list, _) in sorted(species_data_map.items()): | |
for data in data_list: | |
row = [ | |
species_dir, | |
str(data.iteration), | |
data.version, | |
f"{data.global_resolution:.6f}", | |
f"{data.global_bfactor:.0f}", | |
data.previous_version or "None", | |
str(data.file_path) | |
] | |
f.write('\t'.join(row) + '\n') | |
@app.command() | |
def analyze( | |
search_path: Path = typer.Argument(..., help="Directory containing the species folder"), | |
output: Path = typer.Option("species_analysis.txt", help="Output file path for the detailed table") | |
): | |
""" | |
Analyze species files organized by species directory. | |
Creates a summary table and saves detailed information to a file. | |
""" | |
if not search_path.exists(): | |
console.print(f"[red]Error: Directory {search_path} does not exist[/red]") | |
raise typer.Exit(1) | |
try: | |
species_files_map = find_species_files(search_path) | |
if not species_files_map: | |
console.print(f"[yellow]No species directories found in {search_path}/species[/yellow]") | |
raise typer.Exit(1) | |
species_data_map = {} | |
console.print(f"\nAnalyzing species in: {search_path}/species") | |
for species_dir, files in species_files_map.items(): | |
console.print(f"Processing {species_dir}...") | |
chain_data, versions = build_species_chain(species_dir, files) | |
if chain_data: | |
species_data_map[species_dir] = (chain_data, versions) | |
summary_table = create_summary_table(species_data_map) | |
console.print("\nSpecies Analysis Summary:") | |
console.print(summary_table) | |
save_detailed_table(species_data_map, output) | |
console.print(f"\n[green]Detailed analysis saved to {output}[/green]") | |
except Exception as e: | |
console.print(f"[red]Error: {str(e)}[/red]") | |
raise typer.Exit(1) | |
if __name__ == "__main__": | |
app() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment