Last active
April 22, 2017 22:42
-
-
Save pollin14/1afb42deddd98314a6e1a3a6b8ab9a1e to your computer and use it in GitHub Desktop.
Read a FASTA format and remove all except the ID and the sequence and write a new FASTA file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Tested with python Python 2.7.12 | |
# Instructions | |
# | |
# 1. Put the fasta file in the same directory of this file with the name | |
# blast.fasta. | |
# 2. Run the script with the origin file name and the destination file name | |
# | |
# Example: python clean_blast.py file1.fasta my_file.fasta | |
# Used to read arguments of the command line. | |
import sys | |
# Variables definitions. The id and buffer are reserved key words. You cannot used valid variable names | |
# thus it is necessary added a prefix, in this case an underscore. | |
_id = "" | |
_buffer = "" | |
sequence = "" | |
# Validation of the command line arguments | |
if len(sys.argv) < 2 or sys.argv[1] == '': | |
print("You need to specific the fasta file to read.") | |
print("") | |
print("Example: python script_name.py file1.fasta my_file.fasta") | |
exit() | |
if len(sys.argv) < 3 or sys.argv[2] == '': | |
print("You need to specific the destination file name.") | |
print("") | |
print("Example: python script_name.py file1.fasta my_file.fasta") | |
exit() | |
print("Start program"); | |
sourcePathName = sys.argv[1] | |
destinationPathName = sys.argv[2] | |
print("Reading file") | |
with open(sourcePathName, "r") as lines: | |
for line in lines: | |
if line[:1] == ">": # First letter of the line | |
if _id != "" and sequence != "": # Save in the buffer when the we have id and sequence with any value | |
_buffer = _buffer + _id + sequence | |
_id = line.split(" ")[0] + "\n" # Get only the first number of the id | |
sequence = "" | |
else: | |
sequence = sequence + line | |
print("Writing file") | |
f = open(destinationPathName, 'w') | |
f.write(_buffer) | |
f.close() | |
print("File wrote in " + destinationPathName) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment