Skip to content

Instantly share code, notes, and snippets.

@JoaoRodrigues
Created October 29, 2014 00:27
Show Gist options
  • Save JoaoRodrigues/02181f993b0a48a213dd to your computer and use it in GitHub Desktop.
Save JoaoRodrigues/02181f993b0a48a213dd to your computer and use it in GitHub Desktop.
Sampling Analysis in BioPython
#!/usr/bin/env python
# -*- coding: utf-8 -*
"""
Utility script to analyze several docking poses of a protein complex.
Produces PDB file with color-coded ligand center-of-mass positions
that reflect the population density (i.e. number of neighbor models).
Works with multiple chain models by considering as "ligand" all chains
defined as NOT part of the receptor. Default, chain A is the only receptor.
João Rodrigues @ Utrecht, 2014
"""
import copy
import os
import sys
try:
from Bio.PDB import PDBParser
from Bio.PDB import PDBIO
from Bio.PDB import Superimposer
from Bio.PDB import StructureBuilder
from Bio.PDB import NeighborSearch
except ImportError:
raise Exception('Could not import Biopython PDB modules')
# Main Code
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='Analyse sampling of docking runs.')
# Define args
parser.add_argument('structures', metavar='pdb_file(s)', type=str, nargs='+',
help='Docking models containing all chains')
# Define options
parser.add_argument('--output', metavar='output.pdb', type=str,
default='sampling.pdb',
help='Name of the output analysis PDB file')
parser.add_argument('--receptor', metavar='A', type=str, nargs='+',
default='A',
help='Chains to consider as receptor')
parser.add_argument('--radius', metavar='N Å', type=float,
default=10.0,
help='Radius in Å to consider neighboring ligand COMs')
ui = parser.parse_args() # user input
pdb_parser = PDBParser(QUIET=1, PERMISSIVE=1)
# Start analysis
print "= Starting ..."
lst_models = []
for pdb_model in ui.structures:
fpath = os.path.abspath(pdb_model)
if not fpath.endswith('.pdb'):
print >>sys.stderr, "'{0}' is not a PDB file. Ignoring...".format(fpath)
continue
lst_models.append(fpath)
print "\t Read {0} models".format(len(lst_models))
# Fit on receptor chains of the first model
# Select only CA or P atoms
print "= Extracting reference atoms from first model"
receptor_chains = set(ui.receptor)
print "\tUsing chains as receptor: {0}".format(','.join(receptor_chains))
refe_atoms = []
refe_path = lst_models[0]
refe_model = pdb_parser.get_structure(os.path.basename(refe_path), refe_path)
for residue in refe_model.get_residues():
if residue.parent.id in receptor_chains:
if residue.id[0] == ' ' and 'CA' in residue:
refe_atoms.append(residue['CA'])
elif residue.id[0] == ' ' and 'P' in residue:
refe_atoms.append(residue['P'])
if not refe_atoms:
msg = '\tNo "CA" or "P" atoms found in first model for the specified chain(s)'
print >>sys.stderr, msg
sys.exit(1)
# Create 'sampling' structure with receptor chains of first model
sb = StructureBuilder.StructureBuilder()
sb.init_structure('sampling')
sb.init_model(0)
sb.init_seg('')
for chain in refe_model.get_chains():
if chain.id in receptor_chains:
sb.model.add(chain)
# Free space?
del refe_model
# Fit to reference
si = Superimposer()
print "= Fitting the remaining models"
ligand_atoms = []
for model_path in lst_models:
model = pdb_parser.get_structure(os.path.basename(model_path), model_path)
mobi_atoms = []
ligand_atoms.append([])
for residue in model.get_residues():
if residue.parent.id in receptor_chains:
if residue.id[0] == ' ' and 'CA' in residue:
mobi_atoms.append(residue['CA'])
elif residue.id[0] == ' ' and 'P' in residue:
mobi_atoms.append(residue['P'])
else: # collect ligand atoms
for atom in residue:
ligand_atoms[-1].append(atom)
if not mobi_atoms:
msg = '\tNo "CA" or "P" atoms found in "{0}" for the specified chain(s). Skipping.'
print >>sys.stderr, msg.format(model.id)
continue
si.set_atoms(refe_atoms, mobi_atoms)
si.apply(model.get_atoms())
# Free Space?
del model
# C. Calculate rotated ligand center-of-mass
# D. Append COM as a residue of chain X for the sampling.pdb structure.
print "= Calculating ligand center-of-mass"
sb.init_chain('X')
ligand_coms = []
modifier = 0 # id modifier for >9999 models
for imodel, atom_lst in enumerate(ligand_atoms, start=1):
coord_lst = [a.coord for a in atom_lst]
ligand_com = sum(coord_lst)/len(coord_lst)
# Create 'virtual' atom with COM coordinates
# Show as oxygen since red color is nice to visualize.
# Avoid numbering issues..
# Im leaving this like this.. who wants to analyze more than 20k positions..
if imodel >= 9999:
modifier = -9998
sb.init_chain('Y')
sb.init_residue('COM', ' ', imodel+modifier, ' ')
sb.init_atom('XYZ', ligand_com, 0.0, 0.0, ' ', 'XYZ', 1, 'O')
ligand_coms.append(sb.atom)
# Analyze COM distances
print "= Analyzing COM distances"
nb = NeighborSearch(ligand_coms)
nneighbors = []
for com in ligand_coms:
nn = nb.search(com.coord, ui.radius, level='R')
# minimum 1 (itself)
nneighbors.append(len(nn))
# Normalize neighbor counts
max_nn = float(max(nneighbors))
min_nn = min(nneighbors)
nneighbors = [(n-min_nn)/max_nn for n in nneighbors]
print '\tLarger Cluster: {0} elements'.format(int(max_nn))
# Add to bfactor
for com, nn in zip(ligand_coms, nneighbors):
com.bfactor = nn
# Write sampling.pdb
print "= Writing analysis file to: {0}".format(ui.output)
io = PDBIO()
output_structure = sb.get_structure()
io.set_structure(output_structure)
io.save(ui.output)
print "= Writing Pymol visualization file"
with open('sampling.pml', 'w') as handle:
print >>handle, 'color white'
print >>handle, 'spectrum b, rainbow, chain X+Y'
print >>handle, 'as spheres, chain X+Y'
print >>handle, 'as surface, not chain X+Y'
print >>handle, 'center not chain X+Y'
print >>handle, 'zoom'
print "= End"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment