Skip to content

Instantly share code, notes, and snippets.

@philippbayer
Created December 17, 2019 12:55
Show Gist options
  • Select an option

  • Save philippbayer/2a7bc2a8275086945d028a80abaf3341 to your computer and use it in GitHub Desktop.

Select an option

Save philippbayer/2a7bc2a8275086945d028a80abaf3341 to your computer and use it in GitHub Desktop.
trims fastas of several sequences/genes using a single known sequence/gene
import subprocess
from Bio.Seq import Seq
from Bio import SeqIO
trimal = '/path/to/trimal'
muscle = '/path/to/muscle3.8.31_i86linux64'
# this is the fasta containing the single reference gene we use for trimming all of our seqeunces
ref_gene = '/path/to/single/ref.fasta'
all_out = open('file_to_fix.Trimmed.fasta', 'w')
# this is the fasta containing all the sequences we want to trim
for s in SeqIO.parse('file_to_fix.fasta', 'fasta'):
# concatenate with single gene, align using muscle, trim beguinning and endnig gaps with trimAl
with open('Both.fasta', 'w') as out:
out.write(s.format('fasta'))
cmd0 = 'cat %s >> Both.fasta'%ref_gene
p = subprocess.Popen(cmd0, stdout=subprocess.PIPE, shell=True)
(output, err) = p.communicate()
p_status = p.wait()
cmd1 = '%s -in Both.fasta -out Result.fasta'%muscle
p = subprocess.Popen(cmd1, stdout=subprocess.PIPE, shell=True)
(output, err) = p.communicate()
p_status = p.wait()
cmd2 = '%s -in Result.fasta -nogaps -keepheader -terminalonly > Result_Trimmed.fasta'%trimal
p = subprocess.Popen(cmd2, stdout=subprocess.PIPE, shell=True)
(output, err) = p.communicate()
p_status = p.wait()
x = [y for y in SeqIO.parse('Result_Trimmed.fasta', 'fasta')]
x = x[0]
x.seq = Seq(str(x.seq).replace("-", ""))
all_out.write(x.format('fasta'))
all_out.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment