Created
June 30, 2010 18:03
-
-
Save bryan-lunt/459005 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Created on Jun 29, 2010 | |
@author: [email protected] | |
''' | |
from Bio.Seq import Seq | |
import Bio.PDB as PDB | |
import Bio.PDB.Polypeptide as PP | |
from itertools import izip, count | |
def get_seqres_from_structfile(structfile): | |
"""Expects an opened handle to the pdbXXXX.ent file to be read | |
Returns a dict of ChainID -> Bio.Seq.Seq objects representing the seqres entries of the pdb file. | |
""" | |
lines = list() | |
#get all SEQRES entries | |
for line in structfile: | |
if line.startswith("SEQRES"): | |
lines.append(line) | |
threeDict = dict() | |
for line in lines: | |
chainID = line[11] | |
ress = line[19:] | |
resList = threeDict.setdefault(chainID,list()) | |
resList.extend(ress.split()) | |
#TODO Infer for each sequences what its alphabet should be. | |
retdict = dict() | |
for key,value in threeDict.iteritems(): | |
try: | |
mySeq = Seq("".join([PP.to_one_letter_code[i] for i in value])) | |
retdict[key] = mySeq | |
except: | |
pass | |
return retdict | |
def get_chainSeqs_from_structure(structure): | |
"""Expects a Bio.PDB.Structure | |
Returns a dictionary mapping chain IDs -> lists of polypeptides for that chain | |
""" | |
myBuilder = PP.PPBuilder() | |
retval = dict([(aChain.id,myBuilder.build_peptides(aChain)) for aChain in structure.get_chains()]) | |
return retval | |
def mapSeqResToATOM(seqres,polypeplist): | |
"""Returns a list of (singleLetter, <residue> or None) tuples for every residue in the seqres, mapping seqres position to a residue in the model (via provided polypeptide list) | |
""" | |
#we could guard against overlap here by specifying the subsection to find in, but is that necessary? | |
offsetList = [(seqres.find(polypep.get_sequence().tostring()),polypep) for polypep in polypeplist] | |
retlist = [(i,None) for i in seqres] | |
#This is much easier to read and cleaner than the old version | |
#BUT, it assigns areas of memory twice, not the most super-efficient code, but who cares? | |
for startPos, polypep in offsetList: | |
end=startPos+len(polypep) | |
retlist[startPos:end] = zip(seqres[startPos:end],polypep) | |
return retlist | |
def mapAll(structfile,structure): | |
"""Returns a map of chainID -> list of (singleLetter, <residue> or None) tuples for every residue in the seqres | |
""" | |
seqRes = get_seqres_from_structfile(structfile) | |
chainSeqs = get_chainSeqs_from_structure(structure) | |
retDict = dict() | |
for key in seqRes.keys(): | |
if not key in chainSeqs.keys(): | |
continue | |
retDict[key] = mapSeqResToATOM(seqRes[key],chainSeqs[key]) | |
return retDict | |
if __name__ == "__main__": | |
#do some tests | |
plist = PDB.PDBList() | |
modelfile = plist.retrieve_pdb_file("1CO0",pdir="/tmp/foobar/") | |
aparser = PDB.PDBParser() | |
structure = aparser.get_structure("X", modelfile) | |
foomaps = mapAll(open(modelfile),structure) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment