Skip to content

Instantly share code, notes, and snippets.

@shahpnmlab
Last active January 20, 2025 18:03
Show Gist options
  • Save shahpnmlab/7bcbbf4078c4625db7d1073dd1e615fd to your computer and use it in GitHub Desktop.
Save shahpnmlab/7bcbbf4078c4625db7d1073dd1e615fd to your computer and use it in GitHub Desktop.
summarise m refinement progress
import typer
import xml.etree.ElementTree as ET
from pathlib import Path
from typing import Dict, List, Optional, Tuple
from dataclasses import dataclass
from rich.console import Console
from rich.table import Table
from collections import defaultdict
app = typer.Typer()
console = Console()
@dataclass
class SpeciesData:
species_dir: str
file_path: Path
guid: str
version: str
global_resolution: float
global_bfactor: float
previous_version: Optional[str]
iteration: int
def find_species_files(directory: Path) -> Dict[str, List[Path]]:
"""Recursively find all .species files grouped by species directory."""
species_files = defaultdict(list)
species_dir = directory / 'species'
if not species_dir.exists():
console.print(f"[yellow]Warning: Species directory not found at {species_dir}[/yellow]")
return species_files
try:
for species_path in species_dir.iterdir():
if species_path.is_dir():
for species_file in species_path.rglob("*.species"):
species_files[species_path.name].append(species_file)
except Exception as e:
console.print(f"[yellow]Warning: Error searching directory {directory}: {str(e)}[/yellow]")
return species_files
def parse_species_file(file_path: Path) -> Dict:
"""Parse a species XML file and extract relevant parameters."""
tree = ET.parse(file_path)
root = tree.getroot()
params = {}
for param in root.findall('Param'):
params[param.get('Name')] = param.get('Value')
return params
def build_version_chain(version_map: Dict[str, SpeciesData]) -> List[str]:
"""Build a chronological chain of versions starting from the latest."""
# Find versions that aren't referenced as previous versions (these are the latest versions)
all_previous = {data.previous_version for data in version_map.values() if data.previous_version}
latest_versions = [ver for ver in version_map.keys() if ver not in all_previous]
chains = []
for latest in latest_versions:
chain = []
current = latest
while current:
chain.append(current)
current = version_map[current].previous_version
chains.append(chain)
# Return the longest chain (main development line)
return max(chains, key=len) if chains else []
def build_species_chain(species_dir: str, species_files: List[Path]) -> Tuple[List[SpeciesData], List[str]]:
"""Build chains for all versions of a species and return chronological version order."""
version_map = {}
# First pass: create SpeciesData objects
for file_path in species_files:
try:
params = parse_species_file(file_path)
version = params['Version']
species_data = SpeciesData(
species_dir=species_dir,
file_path=file_path,
guid=params['GUID'],
version=version,
global_resolution=float(params['GlobalResolution']),
global_bfactor=float(params['GlobalBFactor']),
previous_version=params.get('PreviousVersion'),
iteration=1
)
version_map[version] = species_data
except Exception as e:
console.print(f"[red]Error processing file {file_path}: {str(e)}[/red]")
# Build chronological chain
chronological_versions = build_version_chain(version_map)
# Convert to list of SpeciesData
chain_data = [version_map[version] for version in chronological_versions]
# Update iterations
for i, data in enumerate(chain_data):
data.iteration = len(chain_data) - i
return chain_data, chronological_versions
def create_summary_table(species_data_map: Dict[str, Tuple[List[SpeciesData], List[str]]]) -> Table:
"""Create a rich table summarizing species data grouped by species directory."""
table = Table(title="Species Analysis Summary")
table.add_column("Species Directory", style="cyan")
table.add_column("Version Count")
table.add_column("Best Resolution", justify="right")
table.add_column("Current Resolution", justify="right")
table.add_column("Worst Resolution", justify="right")
table.add_column("Latest B-Factor", justify="right")
table.add_column("Versions (Latest → First)", style="dim")
for species_dir, (data_list, versions) in sorted(species_data_map.items()):
if not data_list:
continue
resolutions = [d.global_resolution for d in data_list]
best_res = min(resolutions)
worst_res = max(resolutions)
# Current (latest) resolution
current_res = data_list[0].global_resolution if data_list else 0
# Latest B-factor (first in chronological list)
latest_bfactor = data_list[0].global_bfactor if data_list else 0
# Join versions in chronological order
versions_str = " → ".join(versions)
table.add_row(
species_dir,
str(len(data_list)),
f"{best_res:.6f}",
f"{current_res:.6f}",
f"{worst_res:.6f}",
f"{latest_bfactor:.0f}",
versions_str
)
return table
def save_detailed_table(species_data_map: Dict[str, Tuple[List[SpeciesData], List[str]]], output_path: Path):
"""Save detailed information grouped by species directory."""
headers = ["Species_Directory", "Iteration", "Version", "Resolution", "B_Factor", "Previous_Version", "File_Path"]
with output_path.open('w') as f:
f.write('\t'.join(headers) + '\n')
for species_dir, (data_list, _) in sorted(species_data_map.items()):
for data in data_list:
row = [
species_dir,
str(data.iteration),
data.version,
f"{data.global_resolution:.6f}",
f"{data.global_bfactor:.0f}",
data.previous_version or "None",
str(data.file_path)
]
f.write('\t'.join(row) + '\n')
@app.command()
def analyze(
search_path: Path = typer.Argument(..., help="Directory containing the species folder"),
output: Path = typer.Option("species_analysis.txt", help="Output file path for the detailed table")
):
"""
Analyze species files organized by species directory.
Creates a summary table and saves detailed information to a file.
"""
if not search_path.exists():
console.print(f"[red]Error: Directory {search_path} does not exist[/red]")
raise typer.Exit(1)
try:
species_files_map = find_species_files(search_path)
if not species_files_map:
console.print(f"[yellow]No species directories found in {search_path}/species[/yellow]")
raise typer.Exit(1)
species_data_map = {}
console.print(f"\nAnalyzing species in: {search_path}/species")
for species_dir, files in species_files_map.items():
console.print(f"Processing {species_dir}...")
chain_data, versions = build_species_chain(species_dir, files)
if chain_data:
species_data_map[species_dir] = (chain_data, versions)
summary_table = create_summary_table(species_data_map)
console.print("\nSpecies Analysis Summary:")
console.print(summary_table)
save_detailed_table(species_data_map, output)
console.print(f"\n[green]Detailed analysis saved to {output}[/green]")
except Exception as e:
console.print(f"[red]Error: {str(e)}[/red]")
raise typer.Exit(1)
if __name__ == "__main__":
app()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment