Skip to content

Instantly share code, notes, and snippets.

@mgalardini
Last active August 29, 2015 14:07
Show Gist options
  • Save mgalardini/63aa3a9fc7bdd8acfe68 to your computer and use it in GitHub Desktop.
Save mgalardini/63aa3a9fc7bdd8acfe68 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import sys
if len(sys.argv) < 4:
sys.stderr.write('USAGE: map2PDB PDB_FILE ALIG_FASTA POS_AA [AA_POS, [AA_POS, ...]]')
sys.stderr.write('\n')
sys.exit(65)
pdb = sys.argv[1]
align = sys.argv[2]
retrieve = sys.argv[3:]
from Bio import AlignIO
from Bio.PDB import PDBParser
from Bio.SeqUtils import seq1
def check_residue(pdb_struct, alignment, position, aa):
'''
Given a pdb structure and a corresponding alignment with the original seq.
and a position in the sequence, returns the actual position in the
PDB file of the desired residue and the residue, if exists
'''
a = alignment
# First sequence is the PDB target
# Second sequence is the original sequence
# Check the given Aa
if a[1][position - 1] != aa:
# Sometimes there are Aa shifts in protherm entries (updates in the proteomes?)
# Try to fix it
# This of course brings some nasty problems when homopolymers are present
found = False
for i in range(1, 5):
if a[1][position -1 + i] == aa:
position = position + 1 +i
found = True
break
if a[1][position -1 - i] == aa:
position = position + 1 -i
found = True
if not found:
raise ValueError('Could not find Aa %s in sequence window %d - %d'
%(aa, position-5, position+5))
# Start Aa (may be different)
pdb_aa = a[0][position - 1]
# If gap it means no luck with this structure
# But it could also mean that something went wrong with the alignment
# even though this should be rare
if pdb_aa == '-':
return None
# Derive the relative position in the pdb file
gaps = a[0][:position].seq.count('-')
rel_pos = position - gaps
# Get the residue number from the pdb file
chain = a[0].id.split('_')[-1]
if chain == '':
chain = ' '
c = pdb_struct[0][chain]
residue = c.get_list()[rel_pos - 1]
# Sanity check
if seq1(residue.get_resname()) != pdb_aa:
raise KeyError('Expecting Aa %s in the PDB file, found %s'%(pdb_aa,
seq1(residue.get_resname())))
return pdb_aa, residue.id[1]
# Parse the PDB just once, save some time
p = PDBParser()
s = p.get_structure('x', pdb)
# Parse the alignment
alignment = AlignIO.read(align, 'fasta')
# Here check the positions
for retr in retrieve:
aa = retr[0]
pos = int(retr[1:])
try:
result = check_residue(s,
alignment,
pos, aa)
if result is None:
sys.stderr.write('Position %d corresponds to a gap in PDB file'%pos)
sys.stderr.write('\n')
continue
print(result[0], result[1])
except Exception as e:
sys.stderr.write('ERROR: %s (%s)'%(e, retr))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment