Skip to content

Instantly share code, notes, and snippets.

@soh-i
Created June 23, 2015 07:54
Show Gist options
  • Select an option

  • Save soh-i/0de55a2a62ceb0221da0 to your computer and use it in GitHub Desktop.

Select an option

Save soh-i/0de55a2a62ceb0221da0 to your computer and use it in GitHub Desktop.
Pretty simple CLI tool for generating fasta record from genomic region defined by GFF record
from pyfasta import Fasta
import HTSeq
import argparse
import re
def __mk_rna(*seq):
return re.sub(r"[tT]", "U", "".join(seq)).upper()
def __query_builder(rec):
if not isinstance(rec, HTSeq.GenomicFeature):
raise TypeError()
else:
return {"chr": rec.iv.chrom, "start": rec.iv.start, "stop": rec.iv.end, "strand": rec .iv.strand}
def __extract_sequence(q, genome):
fa = Fasta(genome)
return fa.sequence(q)
def __to_fasta(header, seq):
return ">%s\n%s" % (header, seq)
def GFF_to_Fasta(args):
for rec in HTSeq.GFF_Reader(args.gff):
query = __query_builder(rec)
seq = __extract_sequence(query, args.genome)
header = rec.attr["Name"] + ":" + rec.attr["transcript_id"]
print __to_fasta(header, __mk_rna(seq))
if __name__ == "__main__":
parse = argparse.ArgumentParser(description="Generate fasta sequence from GFF record")
parse.add_argument("--genome", type=str, dest="genome", help="Genome sequence", required=True)
parse.add_argument("--gff", type=str, dest="gff", help="GFF file", required=True)
args = parse.parse_args()
GFF_to_Fasta(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment