Skip to content

Instantly share code, notes, and snippets.

@joowkim
Created November 10, 2016 12:34
Show Gist options
  • Save joowkim/5000ba4371b0fc906ae00bfba740dcb4 to your computer and use it in GitHub Desktop.
Save joowkim/5000ba4371b0fc906ae00bfba740dcb4 to your computer and use it in GitHub Desktop.
import sys
fastaDict = {}
def fastaParser(infile):
currentGene = ""
with open(infile) as f:
for line in f:
line = line.strip()
if len(line) > 0 and line[0] == ">":
currentGene = line[1:].split(" ")[0]
fastaDict[currentGene] = ""
else:
line = line.upper()
fastaDict[currentGene] += line
def complement(seq):
ntDict = {"A": "T", "T": "A", "G": "C", "C": "G"}
tmp = ""
for nt in range(len(seq)):
tmp += ntDict[seq[nt]]
return tmp
def selectReverseSeq(seq, pre, post):
tmp = ""
tmp = seq[int(post) - 1:int(pre) - 2:-1].upper()
return tmp
def runSeqSelect(infile, geneName, pre, post):
pre = int(pre)
post = int(post)
if post > pre:
tmpSeq = ""
fastaParser(infile)
if geneName in fastaDict.keys():
tmpSeq = fastaDict[geneName]
return tmpSeq[pre - 1:post]
else:
tmpSeq = ""
fastaParser(infile)
if geneName in fastaDict.keys():
tmpSeq = fastaDict[geneName]
tmpSeq = tmpSeq[pre - 1:post - 2:-1]
return complement(tmpSeq)
if __name__ == "__main__":
if len(sys.argv) != 5:
print
"python [seqSelect] [fastaFile] [geneName] [startPostion] [endPosition]"
print
sys.argv
else:
print(runSeqSelect(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment