Last active
December 20, 2018 19:57
-
-
Save bluegenes/c70daf6fbfea8e2f569f0c6f8261048b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Author: Tessa Pierce | |
import argparse | |
import screed | |
import re | |
def extract_contigs(in_fasta, pattern, outF): | |
if not outF: | |
outF = in_fasta.split('.fa')[0] + '_' + pattern + '.fa' | |
with screed.open(in_fasta) as seqF: | |
with open(outF, 'w') as o: | |
for read in seqF: | |
match = re.search(pattern, read.name, re.IGNORECASE) | |
if match: | |
o.write('\n'.join(['>'+read.name,read.sequence]) + '\n') | |
if(__name__=='__main__'): | |
parser = argparse.ArgumentParser(description="Create simple BED from fasta") | |
parser.add_argument('fasta', help='input fasta file') | |
parser.add_argument('--pattern', help='pattern to match') | |
parser.add_argument('--out', help='output fasta file', default=None) | |
args = parser.parse_args() | |
extract_contigs(args.fasta, args.pattern, args.out) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment