shahpnmlab · January 20, 2025 18:03
diff --git a/m_refine_summary.py b/m_refine_summary.py
 import typer
 import xml.etree.ElementTree as ET
 from pathlib import Path
 from typing import Dict, List, Optional, Tuple
 from dataclasses import dataclass
 from rich.console import Console
 from rich.table import Table
 from collections import defaultdict

 app = typer.Typer()
 console = Console()

 @dataclass
 class SpeciesData:
    species_dir: str
    file_path: Path
    guid: str
    version: str
    global_resolution: float
    global_bfactor: float
    previous_version: Optional[str]
    iteration: int

 def find_species_files(directory: Path) -> Dict[str, List[Path]]:
    """Recursively find all .species files grouped by species directory."""
    species_files = defaultdict(list)
    species_dir = directory / 'species'

    if not species_dir.exists():
        console.print(f"[yellow]Warning: Species directory not found at {species_dir}[/yellow]")
        return species_files

    try:
        for species_path in species_dir.iterdir():
            if species_path.is_dir():
                for species_file in species_path.rglob("*.species"):
                    species_files[species_path.name].append(species_file)
    except Exception as e:
        console.print(f"[yellow]Warning: Error searching directory {directory}: {str(e)}[/yellow]")

    return species_files

 def parse_species_file(file_path: Path) -> Dict:
    """Parse a species XML file and extract relevant parameters."""
    tree = ET.parse(file_path)
    root = tree.getroot()

    params = {}
    for param in root.findall('Param'):
        params[param.get('Name')] = param.get('Value')

    return params

 def build_version_chain(version_map: Dict[str, SpeciesData]) -> List[str]:
    """Build a chronological chain of versions starting from the latest."""
    # Find versions that aren't referenced as previous versions (these are the latest versions)
    all_previous = {data.previous_version for data in version_map.values() if data.previous_version}
    latest_versions = [ver for ver in version_map.keys() if ver not in all_previous]

    chains = []
    for latest in latest_versions:
        chain = []
        current = latest
        while current:
            chain.append(current)
            current = version_map[current].previous_version
        chains.append(chain)

    # Return the longest chain (main development line)
    return max(chains, key=len) if chains else []

 def build_species_chain(species_dir: str, species_files: List[Path]) -> Tuple[List[SpeciesData], List[str]]:
    """Build chains for all versions of a species and return chronological version order."""
    version_map = {}

    # First pass: create SpeciesData objects
    for file_path in species_files:
        try:
            params = parse_species_file(file_path)
            version = params['Version']
            species_data = SpeciesData(
                species_dir=species_dir,
                file_path=file_path,
                guid=params['GUID'],
                version=version,
                global_resolution=float(params['GlobalResolution']),
                global_bfactor=float(params['GlobalBFactor']),
                previous_version=params.get('PreviousVersion'),
                iteration=1
            )
            version_map[version] = species_data
        except Exception as e:
            console.print(f"[red]Error processing file {file_path}: {str(e)}[/red]")

    # Build chronological chain
    chronological_versions = build_version_chain(version_map)

    # Convert to list of SpeciesData
    chain_data = [version_map[version] for version in chronological_versions]

    # Update iterations
    for i, data in enumerate(chain_data):
        data.iteration = len(chain_data) - i

    return chain_data, chronological_versions

 def create_summary_table(species_data_map: Dict[str, Tuple[List[SpeciesData], List[str]]]) -> Table:
    """Create a rich table summarizing species data grouped by species directory."""
    table = Table(title="Species Analysis Summary")

    table.add_column("Species Directory", style="cyan")
    table.add_column("Version Count")
    table.add_column("Best Resolution", justify="right")
    table.add_column("Current Resolution", justify="right")
    table.add_column("Worst Resolution", justify="right")
    table.add_column("Latest B-Factor", justify="right")
    table.add_column("Versions (Latest → First)", style="dim")

    for species_dir, (data_list, versions) in sorted(species_data_map.items()):
        if not data_list:
            continue

        resolutions = [d.global_resolution for d in data_list]
        best_res = min(resolutions)
        worst_res = max(resolutions)
        # Current (latest) resolution
        current_res = data_list[0].global_resolution if data_list else 0

        # Latest B-factor (first in chronological list)
        latest_bfactor = data_list[0].global_bfactor if data_list else 0

        # Join versions in chronological order
        versions_str = " → ".join(versions)

        table.add_row(
            species_dir,
            str(len(data_list)),
            f"{best_res:.6f}",
            f"{current_res:.6f}",
            f"{worst_res:.6f}",
            f"{latest_bfactor:.0f}",
            versions_str
        )

    return table

 def save_detailed_table(species_data_map: Dict[str, Tuple[List[SpeciesData], List[str]]], output_path: Path):
    """Save detailed information grouped by species directory."""
    headers = ["Species_Directory", "Iteration", "Version", "Resolution", "B_Factor", "Previous_Version", "File_Path"]

    with output_path.open('w') as f:
        f.write('\t'.join(headers) + '\n')

        for species_dir, (data_list, _) in sorted(species_data_map.items()):
            for data in data_list:
                row = [
                    species_dir,
                    str(data.iteration),
                    data.version,
                    f"{data.global_resolution:.6f}",
                    f"{data.global_bfactor:.0f}",
                    data.previous_version or "None",
                    str(data.file_path)
                ]
                f.write('\t'.join(row) + '\n')

 @app.command()
 def analyze(
    search_path: Path = typer.Argument(..., help="Directory containing the species folder"),
    output: Path = typer.Option("species_analysis.txt", help="Output file path for the detailed table")
 ):
    """
    Analyze species files organized by species directory.
    Creates a summary table and saves detailed information to a file.
    """
    if not search_path.exists():
        console.print(f"[red]Error: Directory {search_path} does not exist[/red]")
        raise typer.Exit(1)

    try:
        species_files_map = find_species_files(search_path)
        if not species_files_map:
            console.print(f"[yellow]No species directories found in {search_path}/species[/yellow]")
            raise typer.Exit(1)

        species_data_map = {}
        console.print(f"\nAnalyzing species in: {search_path}/species")

        for species_dir, files in species_files_map.items():
            console.print(f"Processing {species_dir}...")
            chain_data, versions = build_species_chain(species_dir, files)
            if chain_data:
                species_data_map[species_dir] = (chain_data, versions)

        summary_table = create_summary_table(species_data_map)
        console.print("\nSpecies Analysis Summary:")
        console.print(summary_table)

        save_detailed_table(species_data_map, output)
        console.print(f"\n[green]Detailed analysis saved to {output}[/green]")

    except Exception as e:
        console.print(f"[red]Error: {str(e)}[/red]")
        raise typer.Exit(1)

 if __name__ == "__main__":
    app()
	import typer
	import xml.etree.ElementTree as ET
	from pathlib import Path
	from typing import Dict, List, Optional, Tuple
	from dataclasses import dataclass
	from rich.console import Console
	from rich.table import Table
	from collections import defaultdict

	app = typer.Typer()
	console = Console()

	@dataclass
	class SpeciesData:
	species_dir: str
	file_path: Path
	guid: str
	version: str
	global_resolution: float
	global_bfactor: float
	previous_version: Optional[str]
	iteration: int

	def find_species_files(directory: Path) -> Dict[str, List[Path]]:
	"""Recursively find all .species files grouped by species directory."""
	species_files = defaultdict(list)
	species_dir = directory / 'species'

	if not species_dir.exists():
	console.print(f"[yellow]Warning: Species directory not found at {species_dir}[/yellow]")
	return species_files

	try:
	for species_path in species_dir.iterdir():
	if species_path.is_dir():
	for species_file in species_path.rglob("*.species"):
	species_files[species_path.name].append(species_file)
	except Exception as e:
	console.print(f"[yellow]Warning: Error searching directory {directory}: {str(e)}[/yellow]")

	return species_files

	def parse_species_file(file_path: Path) -> Dict:
	"""Parse a species XML file and extract relevant parameters."""
	tree = ET.parse(file_path)
	root = tree.getroot()

	params = {}
	for param in root.findall('Param'):
	params[param.get('Name')] = param.get('Value')

	return params

	def build_version_chain(version_map: Dict[str, SpeciesData]) -> List[str]:
	"""Build a chronological chain of versions starting from the latest."""
	# Find versions that aren't referenced as previous versions (these are the latest versions)
	all_previous = {data.previous_version for data in version_map.values() if data.previous_version}
	latest_versions = [ver for ver in version_map.keys() if ver not in all_previous]

	chains = []
	for latest in latest_versions:
	chain = []
	current = latest
	while current:
	chain.append(current)
	current = version_map[current].previous_version
	chains.append(chain)

	# Return the longest chain (main development line)
	return max(chains, key=len) if chains else []

	def build_species_chain(species_dir: str, species_files: List[Path]) -> Tuple[List[SpeciesData], List[str]]:
	"""Build chains for all versions of a species and return chronological version order."""
	version_map = {}

	# First pass: create SpeciesData objects
	for file_path in species_files:
	try:
	params = parse_species_file(file_path)
	version = params['Version']
	species_data = SpeciesData(
	species_dir=species_dir,
	file_path=file_path,
	guid=params['GUID'],
	version=version,
	global_resolution=float(params['GlobalResolution']),
	global_bfactor=float(params['GlobalBFactor']),
	previous_version=params.get('PreviousVersion'),
	iteration=1
	)
	version_map[version] = species_data
	except Exception as e:
	console.print(f"[red]Error processing file {file_path}: {str(e)}[/red]")

	# Build chronological chain
	chronological_versions = build_version_chain(version_map)

	# Convert to list of SpeciesData
	chain_data = [version_map[version] for version in chronological_versions]

	# Update iterations
	for i, data in enumerate(chain_data):
	data.iteration = len(chain_data) - i

	return chain_data, chronological_versions

	def create_summary_table(species_data_map: Dict[str, Tuple[List[SpeciesData], List[str]]]) -> Table:
	"""Create a rich table summarizing species data grouped by species directory."""
	table = Table(title="Species Analysis Summary")

	table.add_column("Species Directory", style="cyan")
	table.add_column("Version Count")
	table.add_column("Best Resolution", justify="right")
	table.add_column("Current Resolution", justify="right")
	table.add_column("Worst Resolution", justify="right")
	table.add_column("Latest B-Factor", justify="right")
	table.add_column("Versions (Latest → First)", style="dim")

	for species_dir, (data_list, versions) in sorted(species_data_map.items()):
	if not data_list:
	continue

	resolutions = [d.global_resolution for d in data_list]
	best_res = min(resolutions)
	worst_res = max(resolutions)
	# Current (latest) resolution
	current_res = data_list[0].global_resolution if data_list else 0

	# Latest B-factor (first in chronological list)
	latest_bfactor = data_list[0].global_bfactor if data_list else 0

	# Join versions in chronological order
	versions_str = " → ".join(versions)

	table.add_row(
	species_dir,
	str(len(data_list)),
	f"{best_res:.6f}",
	f"{current_res:.6f}",
	f"{worst_res:.6f}",
	f"{latest_bfactor:.0f}",
	versions_str
	)

	return table

	def save_detailed_table(species_data_map: Dict[str, Tuple[List[SpeciesData], List[str]]], output_path: Path):
	"""Save detailed information grouped by species directory."""
	headers = ["Species_Directory", "Iteration", "Version", "Resolution", "B_Factor", "Previous_Version", "File_Path"]

	with output_path.open('w') as f:
	f.write('\t'.join(headers) + '\n')

	for species_dir, (data_list, _) in sorted(species_data_map.items()):
	for data in data_list:
	row = [
	species_dir,
	str(data.iteration),
	data.version,
	f"{data.global_resolution:.6f}",
	f"{data.global_bfactor:.0f}",
	data.previous_version or "None",
	str(data.file_path)
	]
	f.write('\t'.join(row) + '\n')

	@app.command()
	def analyze(
	search_path: Path = typer.Argument(..., help="Directory containing the species folder"),
	output: Path = typer.Option("species_analysis.txt", help="Output file path for the detailed table")
	):
	"""
	Analyze species files organized by species directory.
	Creates a summary table and saves detailed information to a file.
	"""
	if not search_path.exists():
	console.print(f"[red]Error: Directory {search_path} does not exist[/red]")
	raise typer.Exit(1)

	try:
	species_files_map = find_species_files(search_path)
	if not species_files_map:
	console.print(f"[yellow]No species directories found in {search_path}/species[/yellow]")
	raise typer.Exit(1)

	species_data_map = {}
	console.print(f"\nAnalyzing species in: {search_path}/species")

	for species_dir, files in species_files_map.items():
	console.print(f"Processing {species_dir}...")
	chain_data, versions = build_species_chain(species_dir, files)
	if chain_data:
	species_data_map[species_dir] = (chain_data, versions)

	summary_table = create_summary_table(species_data_map)
	console.print("\nSpecies Analysis Summary:")
	console.print(summary_table)

	save_detailed_table(species_data_map, output)
	console.print(f"\n[green]Detailed analysis saved to {output}[/green]")

	except Exception as e:
	console.print(f"[red]Error: {str(e)}[/red]")
	raise typer.Exit(1)

	if __name__ == "__main__":
	app()