Last active
December 14, 2016 12:20
-
-
Save klprint/233fd8a03dc6a615524a69672ee5ae38 to your computer and use it in GitHub Desktop.
This script parses TMHMM short output to a full-length topology output.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################################# | |
######################## TMHMM Parser ########################### | |
################################################################# | |
# Created by: Kevin Leiss | |
# Last Updated: 14.12.2016 | |
# | |
# License: Feel free to use the script, but please refer to me if | |
# you used it for publication. | |
# | |
# Usage: Scroll down to the botton and specify the file-path to your | |
# short TMHMM output. Dont forget to write the StdOut to a file. | |
# | |
# Example: python3 TMHMM_parser.py > Output.txt | |
# | |
# Output info: i = loop inside | |
# o = loop outside | |
# T = transmembrane helix | |
def readTMHMMfile(file_path) : | |
print('GeneID\tAAlength\texpAA\tFirst60\tpredHel\tTopology') | |
with open(file_path, 'r') as tmhFile: | |
for line in tmhFile: | |
informations = line.split(sep='\t') | |
GeneID = informations[0].split(':')[0] | |
lenInfo = informations[1].split('=')[1] | |
expAA = informations[2].split('=')[1] | |
firstSixty = informations[3].split('=')[1] | |
predHel = informations[4].split('=')[1] | |
topSeq = informations[5].split('=')[1].rstrip() | |
#print(topSeq) | |
TMHseq = '' | |
if len(topSeq) > 1: | |
charI = 0 | |
location = '' | |
locTM = '' | |
locTM2 = '0' | |
toCoord = False | |
locTMprev = '' | |
for char in topSeq: | |
if char == 'i' or char == 'o' : | |
if toCoord : | |
TMHseq = TMHseq + location * (int(locTM)-int(locTMprev) - 1) + 'T' * ( int(locTM2) - int(locTM) ) | |
#print(location + ' ' + locTM + ' ' + locTM2 + ' ' + locTMprev) | |
#print(int('0')) | |
location = char | |
toCoord = False | |
locTMprev = locTM2 | |
locTM = '' | |
locTM2 = '' | |
if char in ['1','2','3','4','5','6','7','8','9','0']: | |
if toCoord : | |
locTM2 = locTM2 + char | |
else : | |
locTM = locTM + char | |
if char == '-' : | |
toCoord = True | |
else : | |
location = topSeq[0] | |
TMHseq = TMHseq + (int(lenInfo) - len(TMHseq)) * location | |
topSeq = TMHseq | |
print(GeneID + '\t' + lenInfo + '\t' + expAA + '\t' + firstSixty + '\t' + predHel + '\t' + topSeq) | |
if __name__ == '__main__': | |
file = '../Data/TMHMM-predictions.txt' # Specify the file-path here to the TMHMM output | |
readTMHMMfile(file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment