Created
April 4, 2022 19:48
-
-
Save sminot/1294d4f93c3b4e45f1de56ab95d9a8ce to your computer and use it in GitHub Desktop.
Extract the gene annotation table from the geneshot results HDF5
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import click | |
import os | |
import pandas as pd | |
# Set up the command line processor | |
@click.command() | |
@click.option("--input_fp", help="Provide the path to the geneshot output file containing all gene abundances (*.results.hdf5)") | |
def geneshot_extract_gene_annot(input_fp): | |
assert input_fp is not None, "Must provide path to the geneshot output file containing all gene abundances (*.results.hdf5)" | |
assert os.path.exists(input_fp), f"Cannot file file: {input_fp}" | |
assert input_fp.endswith(".results.hdf5"), f"Input file must be named *.results.hdf5" | |
print(f"Reading {input_fp}") | |
# Open the file | |
with pd.HDFStore(input_fp, "r") as store: | |
# Write out the gene annotation table | |
pd.read_hdf(store, "/annot/gene/all").to_csv(input_fp.replace(".results.hdf5", '') + ".gene_annot.csv", index=None) | |
if __name__ == "__main__": | |
geneshot_extract_gene_annot() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment