Skip to content

Instantly share code, notes, and snippets.

@shahpnmlab
Last active January 21, 2025 11:24
Show Gist options
  • Save shahpnmlab/a06fb2f2199c9a831518c6f3c65a1e77 to your computer and use it in GitHub Desktop.
Save shahpnmlab/a06fb2f2199c9a831518c6f3c65a1e77 to your computer and use it in GitHub Desktop.
script to re-extract particles from tiltseries after refinement in M
import starfile
import pandas as pd
import numpy as np
import typer
from pathlib import Path
from typing import Optional
def process_star_file(input_path: str, output_path: Optional[str] = None) -> Optional[pd.DataFrame]:
"""
Process a star file to average temporal samples and rename columns to Relion convention.
Args:
input_path (str): Path to input star file
output_path (str, optional): Path to save processed star file. If None, returns DataFrame
Returns:
pandas.DataFrame if output_path is None, else None
"""
# Read the star file
df = starfile.read(input_path)
# Get all column names
columns = df.columns.tolist()
# Create mapping for coordinate columns dynamically
coord_groups = {
'X': [col for col in columns if col.startswith('wrpCoordinateX')],
'Y': [col for col in columns if col.startswith('wrpCoordinateY')],
'Z': [col for col in columns if col.startswith('wrpCoordinateZ')]
}
# Create mapping for angle columns dynamically
angle_groups = {
'Rot': [col for col in columns if col.startswith('wrpAngleRot')],
'Tilt': [col for col in columns if col.startswith('wrpAngleTilt')],
'Psi': [col for col in columns if col.startswith('wrpAnglePsi')]
}
# New DataFrame to store processed data
processed_df = pd.DataFrame()
# Process coordinates
for coord, cols in coord_groups.items():
new_col = f'rlnCoordinate{coord}'
processed_df[new_col] = df[cols].mean(axis=1)
# Process angles
for angle, cols in angle_groups.items():
new_col = f'rlnAngle{angle}'
processed_df[new_col] = df[cols].mean(axis=1)
# Copy over the micrograph name
processed_df['rlnMicrographName'] = df['wrpSourceName']
# Reorder columns to match typical Relion convention
column_order = [
'rlnMicrographName',
'rlnCoordinateX', 'rlnCoordinateY', 'rlnCoordinateZ',
'rlnAngleRot', 'rlnAngleTilt', 'rlnAnglePsi'
]
processed_df = processed_df[column_order]
if output_path:
# Create a new star file dictionary with the processed data
output_data = {'data_particles': processed_df}
starfile.write(output_data, output_path)
return None
return processed_df
app = typer.Typer(help="Process Warp star files to Relion format")
@app.command()
def convert(
input_path: Path = typer.Option(..., "--i", help="Input star file path", exists=True),
output_path: Path = typer.Option(None, "--output", "--o", help="Output star file path. If not provided, will use input_path with '_processed.star' suffix"),
overwrite: bool = typer.Option(False, "--overwrite", help="Overwrite output file if it exists")
) -> None:
"""
Convert a Warp star file to Relion format, averaging temporal samples.
"""
# If no output path provided, create one based on input path
if output_path is None:
print("Use |WarpTools ts_particle_extract --coords_angpix 1 --other-args| to re-extract particles")
output_path = input_path.parent / f"{input_path.stem}_processed.star"
# Check if output file exists and handle overwrite
if output_path.exists() and not overwrite:
typer.echo(f"Error: Output file {output_path} already exists. Use --overwrite to force.")
raise typer.Exit(1)
try:
process_star_file(str(input_path), str(output_path))
typer.echo(f"Successfully processed {input_path} to {output_path}")
except Exception as e:
typer.echo(f"Error processing file: {str(e)}")
raise typer.Exit(1)
if __name__ == "__main__":
app()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment