Skip to content

Instantly share code, notes, and snippets.

@klprint
Last active December 14, 2016 12:20
Show Gist options
  • Save klprint/233fd8a03dc6a615524a69672ee5ae38 to your computer and use it in GitHub Desktop.
Save klprint/233fd8a03dc6a615524a69672ee5ae38 to your computer and use it in GitHub Desktop.
This script parses TMHMM short output to a full-length topology output.
#################################################################
######################## TMHMM Parser ###########################
#################################################################
# Created by: Kevin Leiss
# Last Updated: 14.12.2016
#
# License: Feel free to use the script, but please refer to me if
# you used it for publication.
#
# Usage: Scroll down to the botton and specify the file-path to your
# short TMHMM output. Dont forget to write the StdOut to a file.
#
# Example: python3 TMHMM_parser.py > Output.txt
#
# Output info: i = loop inside
# o = loop outside
# T = transmembrane helix
def readTMHMMfile(file_path) :
print('GeneID\tAAlength\texpAA\tFirst60\tpredHel\tTopology')
with open(file_path, 'r') as tmhFile:
for line in tmhFile:
informations = line.split(sep='\t')
GeneID = informations[0].split(':')[0]
lenInfo = informations[1].split('=')[1]
expAA = informations[2].split('=')[1]
firstSixty = informations[3].split('=')[1]
predHel = informations[4].split('=')[1]
topSeq = informations[5].split('=')[1].rstrip()
#print(topSeq)
TMHseq = ''
if len(topSeq) > 1:
charI = 0
location = ''
locTM = ''
locTM2 = '0'
toCoord = False
locTMprev = ''
for char in topSeq:
if char == 'i' or char == 'o' :
if toCoord :
TMHseq = TMHseq + location * (int(locTM)-int(locTMprev) - 1) + 'T' * ( int(locTM2) - int(locTM) )
#print(location + ' ' + locTM + ' ' + locTM2 + ' ' + locTMprev)
#print(int('0'))
location = char
toCoord = False
locTMprev = locTM2
locTM = ''
locTM2 = ''
if char in ['1','2','3','4','5','6','7','8','9','0']:
if toCoord :
locTM2 = locTM2 + char
else :
locTM = locTM + char
if char == '-' :
toCoord = True
else :
location = topSeq[0]
TMHseq = TMHseq + (int(lenInfo) - len(TMHseq)) * location
topSeq = TMHseq
print(GeneID + '\t' + lenInfo + '\t' + expAA + '\t' + firstSixty + '\t' + predHel + '\t' + topSeq)
if __name__ == '__main__':
file = '../Data/TMHMM-predictions.txt' # Specify the file-path here to the TMHMM output
readTMHMMfile(file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment