Last active
November 25, 2016 14:33
-
-
Save wdecoster/5782affc0753c9b89a05306fe942a021 to your computer and use it in GitHub Desktop.
Written for https://www.biostars.org/p/223495
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import sys, os, glob | |
| from Bio import SeqIO | |
| def getfiles(): | |
| files = glob.glob("*.csv") + glob.glob(".tfa") | |
| print("Found {} files to filter".format(len(files))) | |
| return(files) | |
| def filter(inputfile): | |
| countAll = 0 | |
| countGood = 0 | |
| goodproteins=[] | |
| for record in SeqIO.parse(sys.argv[1],'fasta'): | |
| countAll += 1 #For each record in the file, increment countAll | |
| if len(record.seq) > 20: | |
| countGood += 1 #For each good record in the file, increment countGood | |
| goodproteins.append(record) | |
| SeqIO.write(goodproteins,filtered_file,"fasta") | |
| print("Filtering retained {} out of {} sequences in this file".format(countGood, countAll)) | |
| def main(): | |
| if sys.argv[1] == 'dir': #Alternatively, could check if the path in sys.argv[1] is a file or a directory and act accordingly | |
| files = getfiles() | |
| else: | |
| if os.path.isfile(sys.argv[1]): | |
| files = [sys.argv[1]] | |
| else: | |
| sys.exit("Invalid input!") | |
| for input in files: | |
| filter(input) | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment